More tests and bugfixes for data access code in sexp lib

git-svn-id: file:///home/or/svnrepo/updater/trunk@16855 55e972cd-5a19-0410-ae62-a4d7a52db4cd
author: Nick Mathewson <nickm@torproject.org> 2008-09-11 19:57:27 +0000
committer: Nick Mathewson <nickm@torproject.org> 2008-09-11 19:57:27 +0000
commit: c27349f728b2c8e749b7fd6fd13fad9c05c0cc04 (patch)
tree: b1485f7725e2a4600f19beabbe082a30a583cf40 /lib
parent: 6dd05e0e0f3a95204fb1f93f433c4d51c3f4459b (diff)
1 files changed, 177 insertions, 35 deletions
diff --git a/lib/sexp/access.py b/lib/sexp/access.py
index d9b40dc..beb87c9 100644
--- a/lib/sexp/access.py
+++ b/lib/sexp/access.py
@@ -3,7 +3,9 @@ import re
 import sys
 
 def s_tag(s):
-    """
+    """Returns the tag of an s-expression (that is, the string that is its
+       first element), or None of the expression has no tag.
+
     >>> s_tag("a string") is None
     True
     >>> s_tag(["a-tagged", "list"])
@@ -17,15 +19,54 @@ def s_tag(s):
         return None
 
 def s_child(s, tag):
+    """Return the fist child of 's' whose tag is 'tag', or None if no such
+       child exists.
+
+    >>> x = [ 'example', ['greeting', 'hello'], [ 'world', 'earth'] ]
+    >>> s_child(x, "greeting")
+    ['greeting', 'hello']
+    >>> s_child(x, "world")
+    ['world', 'earth']
+    >>> print s_child(x, "foo")
+    None
+    """
+
     for child in s:
         if s_tag(child) == tag:
             return child
     return None
 
 def s_children(s, tag):
+    """Returns a generator yielding all children of 's' whose tag is 'tag'.
+
+    >>> x = [ ['quark', 'top'], ['cheese', 'stilton'], ['quark', 'bottom'],
+    ...       ['cheese', 'cheddar'], "cheese" ]
+    >>> list(s_children(x, "Foo"))
+    []
+    >>> list(s_children(x, "cheese"))
+    [['cheese', 'stilton'], ['cheese', 'cheddar']]
+    """
     return (ch for ch in s if s_tag(ch) == tag)
 
 def s_descendants(s, tags=()):
+    """Yield every descendant of 's' whose tag is in 'tags'.  If 'tags' is
+       false, yield every descendant of s.  Items are returned in depth-first
+       order.
+
+    >>> x = [ 'foo', ['bar', ['foo', 'quuz'], ['foo', ['foo', 'zilch']] ],
+    ...      ['foo', 'quum'], ['mulch', 'mulchy', 'foo', ['foo', 'baaz']]]
+    >>> list(s_descendants(x, ['mulch']))
+    [['mulch', 'mulchy', 'foo', ['foo', 'baaz']]]
+    >>> for item in s_descendants(x, ['foo']): print item
+    ['foo', 'quuz']
+    ['foo', ['foo', 'zilch']]
+    ['foo', 'zilch']
+    ['foo', 'quum']
+    ['foo', 'baaz']
+    >>> x = ['a', 'b', 'c', ['d', ['e', 'f']], ['g']]
+    >>> list(s_descendants(x))
+    [['d', ['e', 'f']], ['e', 'f'], ['g']]
+    """
     stack = [ ]
     push = stack.append
     pop = stack.pop
@@ -40,18 +81,48 @@ def s_descendants(s, tags=()):
         if isinstance(s[idx], str):
             idx += 1
             continue
-        if s_tag(s[idx]) in tags:
+        if not tags or s_tag(s[idx]) in tags:
             yield s[idx]
+        push((s, idx+1))
+        s = s[idx]
+        idx = 0
 
 
 class SExpr(list):
+    """Wraps an s-expresion list to return its tagged children as attributes.
+
+    >>> s = [ 'cat', ['cheezburger', 'can has'], ['laser', 'can not has'],
+    ...       ['adjectives', ['furry', 'yes'], ['nuclear', 'no']]]
+    >>> s = SExpr(s)
+    >>> s[0]
+    'cat'
+    >>> s_tag(s)
+    'cat'
+    >>> s.cheezburger
+    ['cheezburger', 'can has']
+    >>> s.cheezburger  # Check caching.
+    ['cheezburger', 'can has']
+    >>> s.adjectives.furry
+    ['furry', 'yes']
+    >>> s.adjectives.nuclear
+    ['nuclear', 'no']
+    >>> s.do_not_want
+    Traceback (most recent call last):
+    ...
+    AttributeError: do_not_want
+    """
+
     def __init__(self, stuff=()):
         list.__init__(self, stuff)
         self._d = None
 
     def __getattr__(self, item):
         if self._d is None: self._buildDict()
-        return self[self._d[item]]
+        try:
+            idx = self._d[item]
+        except KeyError:
+            raise AttributeError(item)
+        return self[idx]
 
     def __getitem__(self, idx):
         item = list.__getitem__(self, idx)
@@ -67,8 +138,8 @@ class SExpr(list):
             if t is not None:
                 d[t] = idx
 
-def _s_lookup_all(s, path, callback):
 
+def _s_lookup_all(s, path, callback):
     # XXXX: Watch out; ** gets pretty heavy pretty fast.
 
     if isinstance(path, str):
@@ -84,17 +155,17 @@ def _s_lookup_all(s, path, callback):
         if p_item == '*':
             for ch in s:
                 if not isinstance(ch, str):
-                    _s_lookup_all(s, path[p_idx+1:], callback)
+                    _s_lookup_all(ch, path[p_idx+1:], callback)
             return
         elif p_item == '**':
             for ch in s_descendants(s):
                 if not isinstance(ch, str):
-                    _s_lookup_all(s, path[p_idx+1:], callback)
+                    _s_lookup_all(ch, path[p_idx+1:], callback)
             return
         elif p_item.startswith('**'):
             for ch in s_descendants(s):
                 if s_tag(ch) == p_item[2:]:
-                    _s_lookup_all(s, path[p_idx+1:], callback)
+                    _s_lookup_all(ch, path[p_idx+1:], callback)
         else:
             s = s_child(s, p_item)
             if s is None:
@@ -103,6 +174,26 @@ def _s_lookup_all(s, path, callback):
     callback(s)
 
 def s_lookup_all(s, path):
+    """Path-based lookup.  "*" matches any single element; "**" matches all
+       descendants.  Not too efficient.
+
+    >>> x = ['alice',
+    ...           ['father', 'bob', ['mother', 'carol'], ['father', 'dave']],
+    ...           ['mother', 'eve', ['mother', 'frances', ['dog', 'spot']],
+    ...                             ['father', 'gill']]]
+    >>> s_lookup_all(x, "father")
+    [['father', 'bob', ['mother', 'carol'], ['father', 'dave']]]
+    >>> s_lookup_all(x, "father.mother")
+    [['mother', 'carol']]
+    >>> s_lookup_all(x, "*.mother")
+    [['mother', 'carol'], ['mother', 'frances', ['dog', 'spot']]]
+    >>> s_lookup_all(x, "**.dog")
+    [['dog', 'spot']]
+    >>> s_lookup_all(x, "**mother.dog")
+    [['dog', 'spot']]
+    >>> s_lookup_all(x, "mother.*.dog")
+    [['dog', 'spot']]
+    """
     result = []
     _s_lookup_all(s, path, result.append)
     return result
@@ -113,24 +204,45 @@ def s_lookup(s, path):
         return r[0]
     return None
 
-class _Singleton:
+### Schema objects. You shouldn't instantiate these by hand; use
+### parseSchema instead.
+
+class Schema:
+    """A schema represents a pattern to be applied to s-expressions.
+       Generate them with parseSchema.
+    """
+    def matches(self, s):
+        """Return true iff s matches this schema."""
+        raise NotImplemented()
+    def rep(self):
+        """Return the s-expression representing this schema."""
+        raise NotImplemented()
+
+class _Singleton(Schema):
+    '''superclass for all schemas that represent a single string or list.'''
     def isSingleton(self):
         return True
-    def matches(self, item):
-        raise NotImplemented()
 
     def clear(self):
+        '''used during parsing.  resets this schema to an
+           I-have-matched-nothing state. '''
         self._got = False
     def matchItem(self, item):
+        '''used during parsing.  Returns true iff this schema can consume
+           item in its current state.'''
         if not self._got and self.matches(item):
             self._got = True
             return True
         else:
             return False
     def isSatisfied(self):
+        '''used during parsing.  Returns true iff this schema would be
+           satisfied parsing no more items.'''
         return self._got
 
-class _Span:
+class _Span(Schema):
+    '''superclass for all schemas that represent a variable number of strings
+       or lists.'''
     def isSingleton(self):
         return False
     def clear(self):
@@ -141,16 +253,19 @@ class _Span:
         raise NotImplemented()
 
 class _AnyItem(_Singleton):
+    'schema representing any item'
     def matches(self,item):
         return True
     def rep(self):
         return "?"
 class _AnyString(_Singleton):
+    'schema representing any single string'
     def matches(self,item):
         return isinstance(item, str)
     def rep(self):
         return "."
 class _ExactString(_Singleton):
+    'schema that matches only a particular string'
     def __init__(self, s):
         self._s = s
     def matches(self, item):
@@ -158,6 +273,7 @@ class _ExactString(_Singleton):
     def rep(self):
         return "=%s"%self._s
 class _ReString(_Singleton):
+    'schema that matches all strings following a particular regex.'
     def __init__(self, s, regex=None):
         if regex is None:
             regex = re.compile(s)
@@ -171,6 +287,7 @@ class _ReString(_Singleton):
     def rep(self):
         return "%s"%self._s
 class _List(_Singleton):
+    'schema that matches any list whose items match a sequence of schemas'
     def __init__(self, subpatterns):
         self._pats = subpatterns
     def clear(self):
@@ -215,14 +332,16 @@ class _List(_Singleton):
         return [ p.rep() for p in self._pats ]
 
 class _AnyItems(_Span):
+    '''schema matching any number of any items'''
     def matchItem(self, item):
         return True
     def isSatisfied(self):
         return True
     def rep(self):
-        return "*"
+        return "_"
 
 class _NMatches(_Span):
+    'schema matching another schema a given number of times.'
     def __init__(self, alternatives, lo, hi):
         self.lo = lo
         self.hi = hi
@@ -258,6 +377,8 @@ class _NMatches(_Span):
         return result
 
 class _Unordered(_Span):
+    '''schema containing a number of subitems, all of which must match in
+       some order.'''
     def __init__(self, alternatives):
         self.alternatives = alternatives
     def clear(self):
@@ -283,14 +404,55 @@ class SchemaFormatError(Exception):
 
 _RE_PAT = re.compile(r'/((?:[\\.]|[^\\/]+)*)/([ilmstx]*)', re.I)
 
-def parseSchema(s, table):
-
+def parseSchema(s, table=None):
+    """Return a schema object to represent a possible set of s-expressions.
+       The syntax is:
+        "=string" matches a string itself.
+        "*" matches any number of items and can occur only at the end of a list.
+        "_" matches a single item.
+        "." matches any string.
+        "/re/" matches a regex.
+        ".name" matches a named schema stored in the map 'table'.
+
+        (i1 i2 i3)         matches a list of i1 followed by i2 followed by i3.
+
+        (:maybe  i1)       matches zero or one of i1.
+        (:oneof  i1 i2 i3) matches any one of the items i1, i2, i3.
+        (:anyof  i1 i2 i3) matches zero or more of the items i1, i2, i3.
+        (:someof i1 i2 i3) matches one or more of i1, i2, and i3.
+
+        (:unordered i1 i2 i3) matches all of i1, i2, and i3, in any order.
+
+        The matching algorithm is a stupid greedy algorithm.  If you need to
+        check stuff it can't handle, write a new thing.
+
+    >>> import sexp.parse
+    >>> P = sexp.parse.parse
+    >>> PS = lambda s: parseSchema(sexp.parse.parse(s))
+    >>> S1 = PS("(=hello _ . /.*geuse/)")
+    >>> S1.matches(P("(hello (my little) 11:friend from Betelgeuse)"))
+    True
+    >>> S1.matches(P("(hello (my little) (friend from) Betelgeuse)"))
+    False
+    >>> S1.matches(P("(hello (my little) 11:friend from Betelgeuse Prime)"))
+    False
+    >>> S1.matches(P("(hello (my little) friendfrom BetelgeusePrime)"))
+    False
+
+    >>> S2 = PS("(=greetings (:oneof =world =gentlebeings) *)")
+    >>> S2.matches(P("greetings"))
+    False
+    >>> S2.matches(P("(greetings gentlebeings)"))
+    True
+    >>> S2.matches(P("(greetings world please take us to (your leader))"))
+    True
+    """
     if isinstance(s, str):
         if not len(s):
             raise SchemaFormatError("Empty string encountered")
         if s == '*':
             return _AnyItems()
-        elif s == '?':
+        elif s == '_':
             return _AnyItem()
         elif s == '.':
             return _AnyString()
@@ -351,23 +513,3 @@ def parseSchema(s, table):
     else:
         return _List([ parseSchema(i, table) for i in s ])
 
-# Schema format:
-#   "=string" matches a string itself.
-#   "*" matches any number of items and can occur only at the end of a list.
-#   "?" matches a single item.
-#   "." matches any string.
-#   "/re/" matches a regex.
-#   ".name" matches a named schema
-#
-#   (i1 i2 i3)         matches a list of i1 followed by i2 followed by i3.
-#
-#   (:maybe  i1)       matches zero or one of i1.
-#   (:oneof  i1 i2 i3) matches any one of the items i1, i2, i3.
-#   (:anyof  i1 i2 i3) matches zero or more of the items i1, i2, i3.
-#   (:someof i1 i2 i3) matches one or more of i1, i2, and i3.
-#
-#   (:unordered i1 i2 i3) matches all of i1, i2, and i3, in any order.
-#
-# The matching algorithm is a stupid greedy algorithm.  If you need to
-# check stuff it can't handle, write a new thing.
-
author	Nick Mathewson <nickm@torproject.org>	2008-09-11 19:57:27 +0000
committer	Nick Mathewson <nickm@torproject.org>	2008-09-11 19:57:27 +0000
commit	c27349f728b2c8e749b7fd6fd13fad9c05c0cc04 (patch)
tree	b1485f7725e2a4600f19beabbe082a30a583cf40 /lib
parent	6dd05e0e0f3a95204fb1f93f433c4d51c3f4459b (diff)