#!/opt/python/bin/python
# a slightly optimized version of Santiago Gala's original Python
# implementation.  see:
# http://memojo.com/~sgala/blog/2007/09/29/Python-Erlang-Map-Reduce

FILE = "o1000k.ap"

def widefinder():
	import re,mmap,os
	from collections import defaultdict

	pat = re.compile(r"GET /ongoing/When/\d\d\dx/(\d\d\d\d/\d\d/\d\d/[^ .]+) ")

	fileobj = open(FILE, "rb")

	# mmap
	filemap = mmap.mmap(
		fileobj.fileno(),
		os.path.getsize(FILE),
		access=mmap.ACCESS_READ
	)
	
	count = defaultdict(int)
	for file in pat.findall(filemap):
		count[file] += 1

	for key in sorted(count, key=count.get)[-10:]:
	    print "%40s = %s" % (key, count[key])

import cProfile, pstats
cProfile.run('widefinder()', 'cprof.out')

p = pstats.Stats('cprof.out')
p.strip_dirs()
p.sort_stats('time').print_stats()
