from mrjob.job import MRJob import re WORD_RE = re.compile(r"[\w]+") class MRAvgWordLen(MRJob): def mapper(self, _, line): for word in WORD_RE.findall(line): yield None, len(word) # no combiner because reduction is not # both associative and commutative def reducer(self, _, lens): lens_list = list(lens) yield None, sum(lens_list)/len(lens_list) if __name__ == '__main__': MRAvgWordLen.run()