#!/usr/bin/python # -*- coding: utf-8 -*- """2:26:13:32 Thomemblecame shat parther com notheign frour dot toples ch bery he theness froildred to yough saiders st to gold pakkubleth them ing prat So by youlnem and the LORD of Jer witerval; I whis boured ing comman the of thy the king the parefouse, ske fore""" from collections import Counter import random import sys class Model: def __init__(self, context_len, inputfile): self.context_len = context_len self.freq = Counter() self.cfreq = {} # context frequencies context = () for n, line in enumerate(inputfile): for char in line: self.freq[char] += 1 if context not in self.cfreq: self.cfreq[context] = Counter() self.cfreq[context][char] += 1 context = (context[1:] if len(context) == context_len else context) + (char,) def generate(self, nchars): chars, cdf = build_cdf(self.freq, lambda char: 1) cdfs = {} for i, context in enumerate(self.cfreq): cdfs[context] = build_cdf(self.cfreq[context], lambda char: 0) context = () for i in range(nchars): cchars, ccdf = cdfs.get(context, (chars, cdf)) p = random.randrange(ccdf[-1]) for c, cdfi in zip(cchars, ccdf): if cdfi >= p: yield c context = (context[1:] if len(context) == self.context_len else context) + (c,) break def build_cdf(freq, basefreq): chars = [] cdf = [] for char in map(chr, range(256)): chars.append(char) cdf.append(freq[char] + basefreq(char) + (0 if cdf == [] else cdf[-1])) return chars, cdf if __name__ == '__main__': model = Model(2, open('/home/user/netbook-misc-devel/bible-pg10.txt' if len(sys.argv) == 1 else sys.argv[1])) print(''.join(model.generate(1000)))