from mrjob.job import MRJob import re WORD_RE = re.compile(r"[\w]+") class MRAvgWordLen(MRJob): def mapper(self, _, line): for word in WORD_RE.findall(line): yield None, len(word) # no combiner because reduction is not # both associative and commutative # can be addressed by having combiner emit tuples of sum and count of words # then having reducer sum both and divide def reducer(self, _, lens): lens_list = list(lens) yield None, sum(lens_list)/len(lens_list) # better approach: # sum = 0 # count = 0 # for l in lens: # sum += l # count += 1 # yield None, sum/count if __name__ == '__main__': MRAvgWordLen.run()