from mrjob.job import MRJob
import re

WORD_RE = re.compile(r"[\w]+")

class MRAvgWordLen(MRJob):
  

def mapper(self, _, line):
    for word in WORD_RE.findall(line):
      yield None, len(word)
  
  #  no combiner because reduction is not
  #  both associative and commutative

  #  can be addressed by having combiner emit tuples of sum and count of words
  #  then having reducer sum both and divide
 
  def reducer(self, _, lens):
    lens_list = list(lens)
    yield None, sum(lens_list)/len(lens_list)
    # better approach:
    # sum = 0
    # count = 0
    # for l in lens:
    #   sum += l
    #   count += 1
    # yield None, sum/count

if __name__ == '__main__':
  MRAvgWordLen.run()