#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import sys
import subprocess

def main():
    if len(sys.argv) == 1:
        demo("Twas brillig, and the slighthy toves did gyre and gimble in the wabe")
    else:
        demo(' '.join(sys.argv[1:]))

def demo(text):
    pipe = subprocess.Popen(['espeak', '-qxv', 'en-us', text],
                            stdout=subprocess.PIPE)
    output = pipe.communicate()[0]
    print output

    hav2hef = PatternPost('hav ', unvoiced, 'hef ')
    assert hav2hef.match('hav t@5').rendering == 'hef '

    patterns = ([Pattern("'and", "4nd"), Pattern('and', 'end'),
                 Pattern('kan', 'ken'),
                 hav2hef,
                 Pattern('hav', 'hev'),
                 Pattern('fO@r ', 'fer '),
                 Pattern('fO@ ', 'fer '),
                 Pattern('t#', 'd'),
                 Pattern('t2U', 'tu'),
                 Pattern('\n ', '\n'),
                 Pattern('nt#', 'nt'),
                 Pattern('sd', 'st'), # this is a bug in eSpeak I think
                 Pattern('i:;IN', 'iji6'), # e.g. “being”
                 ]
                + [Pattern(x, x) for x in 'twzbrlgndsivmkfuhp \n']
                + [Pattern('0', '0'), Pattern('I2', '1'), Pattern('S', 'c'),
                   Pattern('N', '6'),
                   Pattern("E", '3'),
                   Pattern('Dat', 'qet'),
                   Pattern('o@r', '0r'), Pattern('o@', '0r'),
                   Pattern('aI', 'ai'), Pattern("'aI", 'ai'), Pattern(',aI', 'ai'), 
                   Pattern('aU', 'au'), Pattern("'aU", 'au'),
                   Pattern('a#', 'e'),
                   Pattern("'aa", '4'),
                   Pattern("'aN", '86'), # in my idiolect anyway!
                   Pattern("a", '4'),
                   Pattern("A@r", "ar"),
                   Pattern("A@", "ar"),
                   Pattern("A", "a"),
                   Pattern('IN', 'i6'),
                   Pattern('I', '1'), Pattern('D', 'q'),
                   Pattern('e@r', '3r'), Pattern('e@', '3r'),
                   Pattern('@3r', 'er'), Pattern('@3', 'er'),
                   Pattern('@2', 'e'), Pattern('@5', 'e'),
                   Pattern('T', 'x'),
                   Pattern('oU', 'o'), Pattern('3r', 'er'), Pattern('3', 'er'),
                   Pattern('Z', 'j'),
                   Pattern('@Ll', 'el'), Pattern('@L', 'el'),
                   Pattern('eI', '8'),
                   Pattern('O@r', '0r'), Pattern('O@', '0r'),
                   Pattern('O', '0'),
                   Pattern('o@', '0'), Pattern('@', 'e'),
                   # This next one should be generalized to (most) other intervocalic
                   # occurrences of /t/.
                   PatternPost("Vt", emphasized, "7d"),
                   Pattern('V', '7'),
                   Pattern('U@', '5r'),
                   Pattern('U', '5'),
                   Pattern('j', 'y'),
                   ]
                + [Pattern(x, '') for x in "'_-:,;!"])

    pos = 0
    while pos < len(output):
        tail = output[pos:]
        for pat in patterns:
            match = pat.match(tail)
            if match:
                sys.stdout.write(match.rendering)
                pos += match.length
                break
        else:
            print '<' + output[pos] + '>',
            pos += 1

    print

def unvoiced(tail):
    return tail[:1] in 'tsfphTSk'

def emphasized(tail):
    return tail[:1] in ",'"

class Pattern:
    def __init__(self, a, b):
        self.a = a
        self.b = b
    def match(self, text):
        if text.startswith(self.a):
            return Match(len(self.a), self.b)

class PatternPost:
    def __init__(self, pat, post, rendering):
        self.pat = pat
        self.post = post
        self.rendering = rendering
    def match(self, text):
        if not text.startswith(self.pat):
            return False
        if not self.post(text[len(self.pat):]):
            return False
        return Match(len(self.pat), self.rendering)

class Match:
    def __init__(self, length, rendering):
        self.length = length
        self.rendering = rendering

if __name__ == '__main__':
    import cgitb
    cgitb.enable(format='text')
    main()
