diff options
author | Nick Mathewson <nickm@torproject.org> | 2008-11-23 04:31:53 +0000 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2008-11-23 04:31:53 +0000 |
commit | d19ceed522d80d0b3dba446933a5b9316dc48c0b (patch) | |
tree | cc6fbde2bc2432b3a3e591a96f803198668c1b63 /lib/sexp/access.py | |
parent | 7f3418fcd091da3fb5cdc11c4820b43bb90d2d20 (diff) |
Okay, so I'll admit that my vision of a future where all the world is an s-expression is probably no more than a figment of my imagination. Someday, though, somebody will want to parse spki in python, and they will sure be glad that svn preserves deleted files.
git-svn-id: file:///home/or/svnrepo/updater/trunk@17371 55e972cd-5a19-0410-ae62-a4d7a52db4cd
Diffstat (limited to 'lib/sexp/access.py')
-rw-r--r-- | lib/sexp/access.py | 544 |
1 files changed, 0 insertions, 544 deletions
diff --git a/lib/sexp/access.py b/lib/sexp/access.py deleted file mode 100644 index 74f5022..0000000 --- a/lib/sexp/access.py +++ /dev/null @@ -1,544 +0,0 @@ - -import re -import sys - -def s_tag(s): - """Returns the tag of an s-expression (that is, the string that is its - first element), or None of the expression has no tag. - - >>> s_tag("a string") is None - True - >>> s_tag(["a-tagged", "list"]) - 'a-tagged' - >>> s_tag([["untagged"], "list"]) is None - True - """ - if len(s) and not isinstance(s, str) and isinstance(s[0],str): - return s[0] - else: - return None - -def s_child(s, tag): - """Return the fist child of 's' whose tag is 'tag', or None if no such - child exists. - - >>> x = [ 'example', ['greeting', 'hello'], [ 'world', 'earth'] ] - >>> s_child(x, "greeting") - ['greeting', 'hello'] - >>> s_child(x, "world") - ['world', 'earth'] - >>> print s_child(x, "foo") - None - """ - - for child in s: - if s_tag(child) == tag: - return child - return None - -def s_attr(s, tag): - """Returns the second element of the child of 's' whose tag is 'tag'. - This is helpful for extracting a (key val) element. Returns None - if there is no such element. - """ - ch = s_child(s,tag) - if ch == None or len(ch) < 2: - return None - return ch[1] - -def s_children(s, tag): - """Returns a generator yielding all children of 's' whose tag is 'tag'. - - >>> x = [ ['quark', 'top'], ['cheese', 'stilton'], ['quark', 'bottom'], - ... ['cheese', 'cheddar'], "cheese" ] - >>> list(s_children(x, "Foo")) - [] - >>> list(s_children(x, "cheese")) - [['cheese', 'stilton'], ['cheese', 'cheddar']] - """ - return (ch for ch in s if s_tag(ch) == tag) - -def s_descendants(s, tags=()): - """Yield every descendant of 's' whose tag is in 'tags'. If 'tags' is - false, yield every descendant of s. Items are returned in depth-first - order. - - >>> x = [ 'foo', ['bar', ['foo', 'quuz'], ['foo', ['foo', 'zilch']] ], - ... ['foo', 'quum'], ['mulch', 'mulchy', 'foo', ['foo', 'baaz']]] - >>> list(s_descendants(x, ['mulch'])) - [['mulch', 'mulchy', 'foo', ['foo', 'baaz']]] - >>> for item in s_descendants(x, ['foo']): print item - ['foo', 'quuz'] - ['foo', ['foo', 'zilch']] - ['foo', 'zilch'] - ['foo', 'quum'] - ['foo', 'baaz'] - >>> x = ['a', 'b', 'c', ['d', ['e', 'f']], ['g']] - >>> list(s_descendants(x)) - [['d', ['e', 'f']], ['e', 'f'], ['g']] - """ - stack = [ ] - push = stack.append - pop = stack.pop - - idx = 0 - while 1: - while idx == len(s): - try: - s, idx = pop() - except IndexError: - return - if isinstance(s[idx], str): - idx += 1 - continue - if not tags or s_tag(s[idx]) in tags: - yield s[idx] - push((s, idx+1)) - s = s[idx] - idx = 0 - -def attrs_to_dict(sexpr): - """Return a dictionary mapping keys of the attributes in sexpr to - their values. Only the last element in the attribute list counts. - - >>> s = [ 'given-name', - ... ["Tigra", 'Rachel'], ["Bunny", "Elana"] ] - >>> attrs_to_dict(s) - {'Tigra': ['Rachel'], 'Bunny': ['Elana']} - """ - result = {} - for ch in sexpr: - tag = s_tag(ch) - if tag is not None: - result[tag]=ch[1:] - return result - -class SExpr(list): - """Wraps an s-expresion list to return its tagged children as attributes. - - >>> s = [ 'cat', ['cheezburger', 'can has'], ['laser', 'can not has'], - ... ['adjectives', ['furry', 'yes'], ['nuclear', 'no']]] - >>> s = SExpr(s) - >>> s[0] - 'cat' - >>> s_tag(s) - 'cat' - >>> s.cheezburger - ['cheezburger', 'can has'] - >>> s.cheezburger # Check caching. - ['cheezburger', 'can has'] - >>> s.adjectives.furry - ['furry', 'yes'] - >>> s.adjectives.nuclear - ['nuclear', 'no'] - >>> s.do_not_want - Traceback (most recent call last): - ... - AttributeError: do_not_want - """ - - def __init__(self, stuff=()): - list.__init__(self, stuff) - self._d = None - - def __getattr__(self, item): - if self._d is None: self._buildDict() - try: - idx = self._d[item] - except KeyError: - raise AttributeError(item) - return self[idx] - - def __getitem__(self, idx): - item = list.__getitem__(self, idx) - if type(item) in (list, tuple): #exact match only. - item = self[idx] = SExpr(item) - return item - - def _buildDict(self): - self._d = d = {} - for idx in xrange(len(self)): - item = list.__getitem__(self, idx) - t = s_tag(item) - if t is not None: - d[t] = idx - - -def _s_lookup_all(s, path, callback): - # XXXX: Watch out; ** gets pretty heavy pretty fast. - - if isinstance(path, str): - path = path.split(".") - - if len(path) == 0: - callback(s) - return - - for p_idx in xrange(len(path)): - p_item = path[p_idx] - - if p_item == '*': - for ch in s: - if not isinstance(ch, str): - _s_lookup_all(ch, path[p_idx+1:], callback) - return - elif p_item == '**': - for ch in s_descendants(s): - if not isinstance(ch, str): - _s_lookup_all(ch, path[p_idx+1:], callback) - return - elif p_item.startswith('**'): - for ch in s_descendants(s): - if s_tag(ch) == p_item[2:]: - _s_lookup_all(ch, path[p_idx+1:], callback) - else: - for ch in s_children(s, p_item): - _s_lookup_all(ch, path[p_idx+1:], callback) - return - - callback(s) - -def s_lookup_all(s, path): - """Path-based lookup. "*" matches any single element; "**" matches all - descendants. Not too efficient. - - >>> x = ['alice', - ... ['father', 'bob', ['mother', 'carol'], ['father', 'dave']], - ... ['mother', 'eve', ['mother', 'frances', ['dog', 'spot']], - ... ['father', 'gill']], - ... ['marmoset', 'tiffany'], - ... ['marmoset', 'gilbert'] ] - >>> s_lookup_all(x, "father") - [['father', 'bob', ['mother', 'carol'], ['father', 'dave']]] - >>> s_lookup_all(x, "father.mother") - [['mother', 'carol']] - >>> s_lookup_all(x, "*.mother") - [['mother', 'carol'], ['mother', 'frances', ['dog', 'spot']]] - >>> s_lookup_all(x, "**.dog") - [['dog', 'spot']] - >>> s_lookup_all(x, "**mother.dog") - [['dog', 'spot']] - >>> s_lookup_all(x, "mother.*.dog") - [['dog', 'spot']] - >>> s_lookup_all(x, "marmoset") - [['marmoset', 'tiffany'], ['marmoset', 'gilbert']] - """ - result = [] - _s_lookup_all(s, path, result.append) - return result - -def s_lookup(s, path): - r = s_lookup_all(s, path) - if len(r): - return r[0] - return None - -### Schema objects. You shouldn't instantiate these by hand; use -### parseSchema instead. - -class Schema: - """A schema represents a pattern to be applied to s-expressions. - Generate them with parseSchema. - """ - def matches(self, s): - """Return true iff s matches this schema.""" - raise NotImplemented() - def rep(self): - """Return the s-expression representing this schema.""" - raise NotImplemented() - -class _Singleton(Schema): - '''superclass for all schemas that represent a single string or list.''' - def isSingleton(self): - return True - - def clear(self): - '''used during parsing. resets this schema to an - I-have-matched-nothing state. ''' - self._got = False - def matchItem(self, item): - '''used during parsing. Returns true iff this schema can consume - item in its current state.''' - if not self._got and self.matches(item): - self._got = True - return True - else: - return False - def isSatisfied(self): - '''used during parsing. Returns true iff this schema would be - satisfied parsing no more items.''' - return self._got - -class _Span(Schema): - '''superclass for all schemas that represent a variable number of strings - or lists.''' - def isSingleton(self): - return False - def clear(self): - pass - def matchItem(self, item): - raise NotImplemented() - def isSatisfied(self): - raise NotImplemented() - -class _AnyItem(_Singleton): - 'schema representing any item' - def matches(self,item): - return True - def rep(self): - return "?" -class _AnyString(_Singleton): - 'schema representing any single string' - def matches(self,item): - return isinstance(item, str) - def rep(self): - return "." -class _ExactString(_Singleton): - 'schema that matches only a particular string' - def __init__(self, s): - self._s = s - def matches(self, item): - return item == self._s - def rep(self): - return "=%s"%self._s -class _ReString(_Singleton): - 'schema that matches all strings following a particular regex.' - def __init__(self, s, regex=None): - if regex is None: - regex = re.compile(s) - self._re = regex - self._s = s - def matches(self, item): - if not isinstance(item, str): - return False - m = self._re.match(item) - return m and m.end() == len(item) - def rep(self): - return "%s"%self._s -class _List(_Singleton): - 'schema that matches any list whose items match a sequence of schemas' - def __init__(self, subpatterns): - self._pats = subpatterns - def clear(self): - _Singleton.clear(self) - for p in self._pats: - p.clear() - def matches(self, item): - if isinstance(item, str): - return False - - i_idx = 0 - pat_idx = 0 - while i_idx < len(item): - try: - subpat = self._pats[pat_idx] - except: - return False # Too many items. - - if subpat.isSingleton(): - if not subpat.matches(item[i_idx]): - return False - i_idx += 1 - pat_idx += 1 - else: - subpat.clear() - while i_idx < len(item) and subpat.matchItem(item[i_idx]): - i_idx += 1 - if not subpat.isSatisfied(): - return False - pat_idx += 1 - - # Out of items, but we have more patterns. Make sure they all accept - # 0 items. - if pat_idx < len(self._pats): - for subpat in self._pats[pat_idx:]: - subpat.clear() - if not subpat.isSatisfied(): - return False - return True - - def rep(self): - return [ p.rep() for p in self._pats ] - -class _AnyItems(_Span): - '''schema matching any number of any items''' - def matchItem(self, item): - return True - def isSatisfied(self): - return True - def rep(self): - return "_" - -class _NMatches(_Span): - 'schema matching another schema a given number of times.' - def __init__(self, alternatives, lo, hi): - self.lo = lo - self.hi = hi - self.count = 0 - self.alternatives = alternatives - for a in alternatives: - if not a.isSingleton(): - raise SchemaFormatError("Nexted span inside span") - def clear(self): - self.count = 0 - for p in self.alternatives: - p.clear() - def matchItem(self, item): - if self.count == self.hi: - return False - for p in self.alternatives: - if p.matches(item): - self.count += 1 - return True - return False - def isSatisfied(self): - return self.lo <= self.count <= self.hi - - def rep(self): - name = { (1,1): ":oneof", - (0,1): ":maybe", - (0,sys.maxint): ":anyof", - (1,sys.maxint): ":someof" }.get((self.lo, self.hi)) - if name is None: - name = ":%d-%d"%(self.lo, self.hi) - result = [ name ] - result.extend(p.rep() for p in self.alternatives) - return result - -class _Unordered(_Span): - '''schema containing a number of subitems, all of which must match in - some order.''' - def __init__(self, alternatives): - self.alternatives = alternatives - def clear(self): - for p in self.alternatives: - p.clear() - def matchItem(self, item): - for p in self.alternatives: - if p.matchItem(item): - return True - return False - def isSatisfied(self): - for p in self.alternatives: - if not p.isSatisfied(): - return False - return True - def rep(self): - result = [ ":unordered" ] - result.extend(p.rep() for p in self.alternatives) - return result - -class SchemaFormatError(Exception): - pass - -_RE_PAT = re.compile(r'/((?:[\\.]|[^\\/]+)*)/([ilmstx]*)', re.I) - -def parseSchema(s, table=None): - """Return a schema object to represent a possible set of s-expressions. - The syntax is: - "=string" matches a string itself. - "*" matches any number of items and can occur only at the end of a list. - "_" matches a single item. - "." matches any string. - "/re/" matches a regex. - ".name" matches a named schema stored in the map 'table'. - - (i1 i2 i3) matches a list of i1 followed by i2 followed by i3. - - (:maybe i1) matches zero or one of i1. - (:oneof i1 i2 i3) matches any one of the items i1, i2, i3. - (:anyof i1 i2 i3) matches zero or more of the items i1, i2, i3. - (:someof i1 i2 i3) matches one or more of i1, i2, and i3. - - (:unordered i1 i2 i3) matches all of i1, i2, and i3, in any order. - - The matching algorithm is a stupid greedy algorithm. If you need to - check stuff it can't handle, write a new thing. - - >>> import sexp.parse - >>> P = sexp.parse.parse - >>> PS = lambda s: parseSchema(sexp.parse.parse(s)) - >>> S1 = PS("(=hello _ . /.*geuse/)") - >>> S1.matches(P("(hello (my little) 11:friend from Betelgeuse)")) - True - >>> S1.matches(P("(hello (my little) (friend from) Betelgeuse)")) - False - >>> S1.matches(P("(hello (my little) 11:friend from Betelgeuse Prime)")) - False - >>> S1.matches(P("(hello (my little) friendfrom BetelgeusePrime)")) - False - - >>> S2 = PS("(=greetings (:oneof =world =gentlebeings) *)") - >>> S2.matches(P("greetings")) - False - >>> S2.matches(P("(greetings gentlebeings)")) - True - >>> S2.matches(P("(greetings world please take us to (your leader))")) - True - """ - if isinstance(s, str): - if not len(s): - raise SchemaFormatError("Empty string encountered") - if s == '*': - return _AnyItems() - elif s == '_': - return _AnyItem() - elif s == '.': - return _AnyString() - elif s.startswith('='): - return _ExactString(s[1:]) - elif s.startswith('.'): - try: - return table[s[1:]] - except KeyError: - raise SchemaFormatError("Unknown reference %s"%s) - else: - m = _RE_PAT.match(s) - if m: - flags = 0 - for char in m.group(2): - flags |= { "i":re.I, "l":re.L, "m":re.M, "s":re.S, - "t":re.T, "x":re.X }[char.lower()] - try: - p = re.compile(m.group(1), flags) - except re.error, e: - raise SchemaFormatError("Couldn't compile %s"%s) - - return _ReString(s, p) - - raise SchemaFormatError("Confusing entry %s"%s) - elif len(s) and isinstance(s[0], str) and s[0].startswith(':'): - tag = s[0] - - m = re.match(r'\:(\d*)(\-\d*)?$', tag) - if m: - g = m.groups() - if g[0]: - lo = int(g[0], 10) - else: - lo = 0 - if g[1]: - if len(g[1]) > 1: - hi = int(g[1][1:], 10) - else: - hi = sys.maxint - else: - hi = lo - else: - try: - lo,hi = { ":maybe": (0,1), - ":oneof": (1,1), - ":anyof": (0,sys.maxint), - ":someof":(1,sys.maxint), - ":unordered": (None, None) }[tag] - except KeyError: - raise SchemaFormatError("Unknown tag %s"%tag) - - subitems = [ parseSchema(i, table) for i in s[1:] ] - if lo is not None: - return _NMatches(subitems, lo, hi) - else: - return _Unordered(subitems) - else: - return _List([ parseSchema(i, table) for i in s ]) - |