# Citation: # Gurmeet Singh Manku and Rajeev Motwani. 2002. Approximate frequency counts over data streams. In Proceedings of the 28th international conference on Very Large Data Bases(VLDB '02). # Feel free to contact me if you # have any issues obtaining this paper # or want to discuss it further class Entry(object): # track info about single element def __init__(self, e,f,delta): # constructor self.e = e # value of element self.f = f # “absolute frequency” of occurrence self.delta = delta # “absolute error” class LossyCounter(object): FACTOR = 0.1 def __init__(self, s): self.s = s self.error = s * self.FACTOR self.w = int(1/self.error) self.entries = {} self.N = 0 def count_element(self, elem): self.N += 1 b_current = int(math.ceil(float(self.N)/self.w)) if self.entries.has_key(elem): self.entries[elem].f += 1 else: self.entries[elem] = Entry(elem, 1, b_current - 1) if self.N % self.w == 0: for elem in self.entries.keys(): e = self.entries[elem] if e.f + e.delta <= b_current: del self.entries[elem] def get_counts(self): return [e for e in self.entries.values()
if e.f >= (self.s - self.error)*self.N]