#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Make a Zipf distribution the supposedly easy way.

It doesn't seem to actually work very well, though.

Although it does easily generate distributions spanning three orders
of magnitude, they aren’t very Zipfian.  For example:

    kragen@inexorable:~/devel/inexorable-misc$ ./zipf.py 200 10000000
    322809: 164
    273933: 109
    254075: 143
    235432: 146
    232399: 29
    225789: 39
    214326: 161
    177778: 70
    160230: 190
    158333: 93
    …
      968: 21
      521: 24
      157: 198
    kragen@inexorable:~/devel/inexorable-misc$ 

In a Zipf distribution, the top ten items would have covered the first
order of magnitude.  Instead, they cover a factor of 2.

"""

import random, sys

def zipf(x, y):
    items = range(x)
    for ii in range(x, y):
        items.append(random.choice(items))

    counts = {}
    for item in items:
        counts.setdefault(item, 0)
        counts[item] += 1

    return reversed(sorted((y, x) for x, y in counts.items()))

if __name__ == '__main__':
    for item, count in zipf(int(sys.argv[1]), int(sys.argv[2])):
        print "%5d: %d" % (item, count)

