Initial auto-updater commit: s-expression libray and format spec.

git-svn-id: file:///home/or/svnrepo/updater/trunk@16692 55e972cd-5a19-0410-ae62-a4d7a52db4cd
author: Nick Mathewson <nickm@torproject.org> 2008-08-30 16:33:47 +0000
committer: Nick Mathewson <nickm@torproject.org> 2008-08-30 16:33:47 +0000
commit: 7b434c8d229b867df4523a13ec97e30ec26b6b7b (patch)
tree: 99225347698179a361cab873f5f8bd4731493d12 /lib/sexp/access.py
1 files changed, 373 insertions, 0 deletions
diff --git a/lib/sexp/access.py b/lib/sexp/access.py
new file mode 100644
index 0000000..d9b40dc
--- /dev/null
+++ b/lib/sexp/access.py
@@ -0,0 +1,373 @@
+
+import re
+import sys
+
+def s_tag(s):
+    """
+    >>> s_tag("a string") is None
+    True
+    >>> s_tag(["a-tagged", "list"])
+    'a-tagged'
+    >>> s_tag([["untagged"], "list"]) is None
+    True
+    """
+    if len(s) and not isinstance(s, str) and isinstance(s[0],str):
+        return s[0]
+    else:
+        return None
+
+def s_child(s, tag):
+    for child in s:
+        if s_tag(child) == tag:
+            return child
+    return None
+
+def s_children(s, tag):
+    return (ch for ch in s if s_tag(ch) == tag)
+
+def s_descendants(s, tags=()):
+    stack = [ ]
+    push = stack.append
+    pop = stack.pop
+
+    idx = 0
+    while 1:
+        while idx == len(s):
+            try:
+                s, idx = pop()
+            except IndexError:
+                return
+        if isinstance(s[idx], str):
+            idx += 1
+            continue
+        if s_tag(s[idx]) in tags:
+            yield s[idx]
+
+
+class SExpr(list):
+    def __init__(self, stuff=()):
+        list.__init__(self, stuff)
+        self._d = None
+
+    def __getattr__(self, item):
+        if self._d is None: self._buildDict()
+        return self[self._d[item]]
+
+    def __getitem__(self, idx):
+        item = list.__getitem__(self, idx)
+        if type(item) in (list, tuple): #exact match only.
+            item = self[idx] = SExpr(item)
+        return item
+
+    def _buildDict(self):
+        self._d = d = {}
+        for idx in xrange(len(self)):
+            item = list.__getitem__(self, idx)
+            t = s_tag(item)
+            if t is not None:
+                d[t] = idx
+
+def _s_lookup_all(s, path, callback):
+
+    # XXXX: Watch out; ** gets pretty heavy pretty fast.
+
+    if isinstance(path, str):
+        path = path.split(".")
+
+    if len(path) == 0:
+        callback(s)
+        return
+
+    for p_idx in xrange(len(path)):
+        p_item = path[p_idx]
+
+        if p_item == '*':
+            for ch in s:
+                if not isinstance(ch, str):
+                    _s_lookup_all(s, path[p_idx+1:], callback)
+            return
+        elif p_item == '**':
+            for ch in s_descendants(s):
+                if not isinstance(ch, str):
+                    _s_lookup_all(s, path[p_idx+1:], callback)
+            return
+        elif p_item.startswith('**'):
+            for ch in s_descendants(s):
+                if s_tag(ch) == p_item[2:]:
+                    _s_lookup_all(s, path[p_idx+1:], callback)
+        else:
+            s = s_child(s, p_item)
+            if s is None:
+                return
+
+    callback(s)
+
+def s_lookup_all(s, path):
+    result = []
+    _s_lookup_all(s, path, result.append)
+    return result
+
+def s_lookup(s, path):
+    r = s_lookup_all(s, path)
+    if len(r):
+        return r[0]
+    return None
+
+class _Singleton:
+    def isSingleton(self):
+        return True
+    def matches(self, item):
+        raise NotImplemented()
+
+    def clear(self):
+        self._got = False
+    def matchItem(self, item):
+        if not self._got and self.matches(item):
+            self._got = True
+            return True
+        else:
+            return False
+    def isSatisfied(self):
+        return self._got
+
+class _Span:
+    def isSingleton(self):
+        return False
+    def clear(self):
+        pass
+    def matchItem(self, item):
+        raise NotImplemented()
+    def isSatisfied(self):
+        raise NotImplemented()
+
+class _AnyItem(_Singleton):
+    def matches(self,item):
+        return True
+    def rep(self):
+        return "?"
+class _AnyString(_Singleton):
+    def matches(self,item):
+        return isinstance(item, str)
+    def rep(self):
+        return "."
+class _ExactString(_Singleton):
+    def __init__(self, s):
+        self._s = s
+    def matches(self, item):
+        return item == self._s
+    def rep(self):
+        return "=%s"%self._s
+class _ReString(_Singleton):
+    def __init__(self, s, regex=None):
+        if regex is None:
+            regex = re.compile(s)
+        self._re = regex
+        self._s = s
+    def matches(self, item):
+        if not isinstance(item, str):
+            return False
+        m = self._re.match(item)
+        return m and m.end() == len(item)
+    def rep(self):
+        return "%s"%self._s
+class _List(_Singleton):
+    def __init__(self, subpatterns):
+        self._pats = subpatterns
+    def clear(self):
+        _Singleton.clear(self)
+        for p in self._pats:
+            p.clear()
+    def matches(self, item):
+        if isinstance(item, str):
+            return False
+
+        i_idx = 0
+        pat_idx = 0
+        while i_idx < len(item):
+            try:
+                subpat = self._pats[pat_idx]
+            except:
+                return False # Too many items.
+
+            if subpat.isSingleton():
+                if not subpat.matches(item[i_idx]):
+                    return False
+                i_idx += 1
+                pat_idx += 1
+            else:
+                subpat.clear()
+                while i_idx < len(item) and subpat.matchItem(item[i_idx]):
+                    i_idx += 1
+                if not subpat.isSatisfied():
+                    return False
+                pat_idx += 1
+
+        # Out of items, but we have more patterns.  Make sure they all accept
+        # 0 items.
+        if pat_idx < len(self._pats):
+            for subpat in self._pats[pat_idx:]:
+                subpat.clear()
+                if not subpat.isSatisfied():
+                    return False
+        return True
+
+    def rep(self):
+        return [ p.rep() for p in self._pats ]
+
+class _AnyItems(_Span):
+    def matchItem(self, item):
+        return True
+    def isSatisfied(self):
+        return True
+    def rep(self):
+        return "*"
+
+class _NMatches(_Span):
+    def __init__(self, alternatives, lo, hi):
+        self.lo = lo
+        self.hi = hi
+        self.count = 0
+        self.alternatives = alternatives
+        for a in alternatives:
+            if not a.isSingleton():
+                raise SchemaFormatError("Nexted span inside span")
+    def clear(self):
+        self.count = 0
+        for p in self.alternatives:
+            p.clear()
+    def matchItem(self, item):
+        if self.count == self.hi:
+            return False
+        for p in self.alternatives:
+            if p.matches(item):
+                self.count += 1
+                return True
+        return False
+    def isSatisfied(self):
+        return self.lo <= self.count <= self.hi
+
+    def rep(self):
+        name = { (1,1): ":oneof",
+                 (0,1): ":maybe",
+                 (0,sys.maxint): ":anyof",
+                 (1,sys.maxint): ":someof" }.get((self.lo, self.hi))
+        if name is None:
+            name = ":%d-%d"%(self.lo, self.hi)
+        result = [ name ]
+        result.extend(p.rep() for p in self.alternatives)
+        return result
+
+class _Unordered(_Span):
+    def __init__(self, alternatives):
+        self.alternatives = alternatives
+    def clear(self):
+        for p in self.alternatives:
+            p.clear()
+    def matchItem(self, item):
+        for p in self.alternatives:
+            if p.matchItem(item):
+                return True
+        return False
+    def isSatisfied(self):
+        for p in self.alternatives:
+            if not p.isSatisfied():
+                return False
+        return True
+    def rep(self):
+        result = [ ":unordered" ]
+        result.extend(p.rep() for p in self.alternatives)
+        return result
+
+class SchemaFormatError(Exception):
+    pass
+
+_RE_PAT = re.compile(r'/((?:[\\.]|[^\\/]+)*)/([ilmstx]*)', re.I)
+
+def parseSchema(s, table):
+
+    if isinstance(s, str):
+        if not len(s):
+            raise SchemaFormatError("Empty string encountered")
+        if s == '*':
+            return _AnyItems()
+        elif s == '?':
+            return _AnyItem()
+        elif s == '.':
+            return _AnyString()
+        elif s.startswith('='):
+            return _ExactString(s[1:])
+        elif s.startswith('.'):
+            try:
+                return table[s[1:]]
+            except KeyError:
+                raise SchemaFormatError("Unknown reference %s"%s)
+        else:
+            m = _RE_PAT.match(s)
+            if m:
+                flags = 0
+                for char in m.group(2):
+                    flags |= { "i":re.I, "l":re.L, "m":re.M, "s":re.S,
+                               "t":re.T, "x":re.X }[char.lower()]
+                try:
+                    p = re.compile(m.group(1), flags)
+                except re.error, e:
+                    raise SchemaFormatError("Couldn't compile %s"%s)
+
+                return _ReString(s, p)
+
+            raise SchemaFormatError("Confusing entry %s"%s)
+    elif len(s) and isinstance(s[0], str) and s[0].startswith(':'):
+        tag = s[0]
+
+        m = re.match(r'\:(\d*)(\-\d*)?$', tag)
+        if m:
+            g = m.groups()
+            if g[0]:
+                lo = int(g[0], 10)
+            else:
+                lo = 0
+            if g[1]:
+                if len(g[1]) > 1:
+                    hi = int(g[1][1:], 10)
+                else:
+                    hi = sys.maxint
+            else:
+                hi = lo
+        else:
+            try:
+                lo,hi = { ":maybe": (0,1),
+                          ":oneof": (1,1),
+                          ":anyof": (0,sys.maxint),
+                          ":someof":(1,sys.maxint),
+                          ":unordered": (None, None) }[tag]
+            except KeyError:
+                raise SchemaFormatError("Unknown tag %s"%tag)
+
+        subitems = [ parseSchema(i, table) for i in s[1:] ]
+        if lo is not None:
+            return _NMatches(subitems, lo, hi)
+        else:
+            return _Unordered(subitems)
+    else:
+        return _List([ parseSchema(i, table) for i in s ])
+
+# Schema format:
+#   "=string" matches a string itself.
+#   "*" matches any number of items and can occur only at the end of a list.
+#   "?" matches a single item.
+#   "." matches any string.
+#   "/re/" matches a regex.
+#   ".name" matches a named schema
+#
+#   (i1 i2 i3)         matches a list of i1 followed by i2 followed by i3.
+#
+#   (:maybe  i1)       matches zero or one of i1.
+#   (:oneof  i1 i2 i3) matches any one of the items i1, i2, i3.
+#   (:anyof  i1 i2 i3) matches zero or more of the items i1, i2, i3.
+#   (:someof i1 i2 i3) matches one or more of i1, i2, and i3.
+#
+#   (:unordered i1 i2 i3) matches all of i1, i2, and i3, in any order.
+#
+# The matching algorithm is a stupid greedy algorithm.  If you need to
+# check stuff it can't handle, write a new thing.
+
author	Nick Mathewson <nickm@torproject.org>	2008-08-30 16:33:47 +0000
committer	Nick Mathewson <nickm@torproject.org>	2008-08-30 16:33:47 +0000
commit	7b434c8d229b867df4523a13ec97e30ec26b6b7b (patch)
tree	99225347698179a361cab873f5f8bd4731493d12 /lib/sexp/access.py