#!/usr/bin/python """Full backtracking regexp matcher in 11 LoC. Prompted by a remark of Dave Long. A regular expression is either: - a catenation of two regular expressions HT, which matches strings made up of two catenated parts that match H and T; - an alternation of two regular expressions A|B, which matches everything either A or B matches; - a Kleene-star of a regular expression R*, which matches the empty string and everything RR* matches; - or a literal string S, which matches only itself. This code directly implements that definition. """ from collections import namedtuple class Cat(namedtuple("Cat", ("h", "t"))): matches = lambda self, s: any(self.h.matches(h) and self.t.matches(t) for h, t in splits(s)) class Alt(namedtuple("Alt", ("a", "b"))): matches = lambda self, s: self.a.matches(s) or self.b.matches(s) class Star(namedtuple("Star", ("r"))): matches = lambda self, s: not s or Cat(self.r, self).matches(s) class Lit(namedtuple("Lit", ("s"))): matches = lambda self, s: s == self.s splits = lambda s: ((s[:i], s[i:]) for i in range(len(s)+1))