Okay, so I'll admit that my vision of a future where all the world is an s-expression is probably no more than a figment of my imagination. Someday, though, somebody will want to parse spki in python, and they will sure be glad that svn preserves deleted files.

git-svn-id: file:///home/or/svnrepo/updater/trunk@17371 55e972cd-5a19-0410-ae62-a4d7a52db4cd
author: Nick Mathewson <nickm@torproject.org> 2008-11-23 04:31:53 +0000
committer: Nick Mathewson <nickm@torproject.org> 2008-11-23 04:31:53 +0000
commit: d19ceed522d80d0b3dba446933a5b9316dc48c0b (patch)
tree: cc6fbde2bc2432b3a3e591a96f803198668c1b63 /lib/sexp/parse.py
parent: 7f3418fcd091da3fb5cdc11c4820b43bb90d2d20 (diff)
1 files changed, 0 insertions, 210 deletions
diff --git a/lib/sexp/parse.py b/lib/sexp/parse.py
deleted file mode 100644
index a33b79a..0000000
--- a/lib/sexp/parse.py
+++ /dev/null
@@ -1,210 +0,0 @@
-
-import re
-import base64
-import binascii
-import re
-
-# Partial implementation of Rivest's proposed S-Expressions standard
-# as documented at
-#      http://people.csail.mit.edu/rivest/Sexp.txt
-#
-# It's slightly optimized.
-#
-# Not implemented:
-#    [display hints]
-#    {basic transport}
-
-__all__ = [ 'FormatError', 'parse' ]
-
-class FormatError(Exception):
-    """Raised when parsing fails."""
-    pass
-
-_TOKEN_PAT = r"[a-zA-Z\-\.\/\_\:\*\+\=][a-zA-Z0-9\-\.\/\_\:\*\+\=]*"
-# Regular expression to match a single lexeme from an encode s-expression.
-_LEXEME_START_RE = re.compile(
-    r""" \s* (?: (%s) |     # Grp 0: A token.
-                 ([0-9]*(?: [\:\|\#\{]  |   # Grp1 : start of string...
-                            \"(?:[^\\\"]+|\\.)*\"))  | # or qstring.
-                 ([\(\)])  # Grp 2: a paren of some kind.
-              )"""
-                              %_TOKEN_PAT,re.X|re.M|re.S)
-
-class _P:
-    """Helper class for parenthesis tokens."""
-    def __init__(self, val):
-        self.val = val
-    def __repr__(self):
-        return "_P(%r)"%self.val
-
-_OPEN_PAREN = _P("(")
-_CLOSE_PAREN = _P(")")
-del _P
-_SPACE_RE = re.compile(r'\s+')
-
-# Matches all characters in a string that we need to unquote.
-_UNQUOTE_CHAR_RE = re.compile(r'''
-     \\  (?: [abtnvfr] | \r \n ? | \n \r ? | [xX] [A-Fa-f0-9]{2} | [0-8]{1,3} )
-     ''')
-
-# Map from quoted representation to unquoted format.
-_UNQUOTE_CHAR_MAP = { 'a': '\a',
-                      'b': '\b',
-                      't': '\t',
-                      'n': '\n',
-                      'v': '\v',
-                      'f': '\f',
-                      'r': '\r' }
-def _unquoteChar(ch, _U=_UNQUOTE_CHAR_MAP):
-    ch = ch[1:]
-    try:
-        return _U[ch]
-    except KeyError:
-        pass
-    if ch[0] in "\n\r":
-        return ""
-    elif ch[0] in 'xX':
-        return chr(int(ch[1:], 16))
-    else:
-        i = int(ch[1:], 8)
-        if i >= 256:
-            raise FormatError("Octal character format out of range.")
-        return chr(i)
-
-def _lexItems(s):
-    """Generator that iterates over the lexical items in an encoded
-       s-expression.  Yields a string for strings, or the special objects
-       _OPEN_PAREN and _CLOSE_PAREN.
-
-       >>> list(_lexItems('(4:a)b   hello) (world 1:a 0: '))
-       [_P('('), 'a)b ', 'hello', _P(')'), _P('('), 'world', 'a', '']
-
-       >>> list(_lexItems('a b-c 1#20#2#2061##686877# |aGVsbG8gd29ybGQ|'))
-       ['a', 'b-c', ' ', ' a', 'hhw', 'hello world']
-
-       >>> list(_lexItems('#2 0# |aGVs\\nbG8 gd29yb   GQ|  '))
-       [' ', 'hello world']
-
-       >>> list(_lexItems('|YWJjZA==| x |YWJjZA| 3|Y   W J  j|'))
-       ['abcd', 'x', 'abcd', 'abc']
-
-       >>> list(_lexItems('("1""234""hello world" 3"abc" 4"    " )'))
-       [_P('('), '1', '234', 'hello world', 'abc', '    ', _P(')')]
-
-    """
-    s = s.strip()
-    while s:
-        m = _LEXEME_START_RE.match(s)
-        if not m:
-            raise FormatError("No pattern match at %r"%s[:30])
-        g = m.groups()
-        if g[2]:
-            if g[2] == "(":
-                yield _OPEN_PAREN
-            else:
-                yield _CLOSE_PAREN
-            s = s[m.end():]
-        elif g[0]:
-            # we have a token.  Go with that.
-            yield g[0]
-            s = s[m.end():]
-        else:
-            assert g[1]
-            lastChar = g[1][-1]
-            if lastChar == '"':
-                qidx = g[1].index('"')
-                quoted = g[1][qidx+1:-1] # All but quotes.
-                data = _UNQUOTE_CHAR_RE.sub(_unquoteChar, quoted)
-                if qidx != 0:
-                    num = int(g[1][:qidx], 10)
-                    if num != len(data):
-                        raise FormatError("Bad length on quoted string")
-                yield data
-                s = s[m.end():]
-                continue
-
-            num = g[1][:-1]
-            if len(num):
-                num = int(num, 10)
-            else:
-                num = None
-
-            if lastChar == ':':
-                if num is None:
-                    raise FormatError()
-                s = s[m.end():]
-                if len(s) < num:
-                    raise FormatError()
-                yield s[:num]
-                s = s[num:]
-            elif lastChar == '#':
-                s = s[m.end():]
-                try:
-                    nextHash = s.index('#')
-                except ValueError:
-                    raise FormatError("Unterminated # string")
-                dataStr = _SPACE_RE.sub("", s[:nextHash])
-                try:
-                    data = binascii.a2b_hex(dataStr)
-                except TypeError:
-                    raise FormatError("Bad hex string")
-                if num is not None and len(data) != num:
-                    raise FormatError("Bad number on hex string")
-                yield data
-                s = s[nextHash+1:]
-            elif lastChar == '|':
-                s = s[m.end():]
-                try:
-                    nextBar = s.index('|')
-                except ValueError:
-                    raise FormatError("Unterminated | string")
-                dataStr = _SPACE_RE.sub("", s[:nextBar])
-                # Re-pad.
-                mod = len(dataStr) % 4
-                if mod:
-                    dataStr += "=" * (4 - mod)
-                try:
-                    data = binascii.a2b_base64(dataStr)
-                except TypeError:
-                    raise FormatError("Bad base64 string")
-                if num is not None and len(data) != num:
-                    raise FormatError("Bad number on base64 string")
-                yield data
-                s = s[nextBar+1:]
-            else:
-                assert None
-
-def parse(s):
-    """
-       >>> parse("()")
-       []
-       >>> parse("(1:X3:abc1:d)")
-       ['X', 'abc', 'd']
-       >>> parse("(1:X((3:abc))1:d)")
-       ['X', [['abc']], 'd']
-       >>> parse("(a b (d\\ne f) (g) #ff00ff# |aGVsbG8gd29ybGQ|)")
-       ['a', 'b', ['d', 'e', 'f'], ['g'], '\\xff\\x00\\xff', 'hello world']
-
-    """
-    outermost = []
-    stack = [ ]
-    push = stack.append
-    pop = stack.pop
-    add = outermost.append
-
-    for item in _lexItems(s):
-        if item is _OPEN_PAREN:
-            next = []
-            add(next)
-            push(add)
-            add = next.append
-        elif item is _CLOSE_PAREN:
-            add = pop()
-        else:
-            # it's a string.
-            add(item)
-
-    if len(outermost) != 1:
-        raise FormatError("No enclosing parenthesis on list")
-    return outermost[0]
-
author	Nick Mathewson <nickm@torproject.org>	2008-11-23 04:31:53 +0000
committer	Nick Mathewson <nickm@torproject.org>	2008-11-23 04:31:53 +0000
commit	d19ceed522d80d0b3dba446933a5b9316dc48c0b (patch)
tree	cc6fbde2bc2432b3a3e591a96f803198668c1b63 /lib/sexp/parse.py
parent	7f3418fcd091da3fb5cdc11c4820b43bb90d2d20 (diff)