diff options
author | Nick Mathewson <nickm@torproject.org> | 2008-11-23 04:31:53 +0000 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2008-11-23 04:31:53 +0000 |
commit | d19ceed522d80d0b3dba446933a5b9316dc48c0b (patch) | |
tree | cc6fbde2bc2432b3a3e591a96f803198668c1b63 /lib/sexp/parse.py | |
parent | 7f3418fcd091da3fb5cdc11c4820b43bb90d2d20 (diff) |
Okay, so I'll admit that my vision of a future where all the world is an s-expression is probably no more than a figment of my imagination. Someday, though, somebody will want to parse spki in python, and they will sure be glad that svn preserves deleted files.
git-svn-id: file:///home/or/svnrepo/updater/trunk@17371 55e972cd-5a19-0410-ae62-a4d7a52db4cd
Diffstat (limited to 'lib/sexp/parse.py')
-rw-r--r-- | lib/sexp/parse.py | 210 |
1 files changed, 0 insertions, 210 deletions
diff --git a/lib/sexp/parse.py b/lib/sexp/parse.py deleted file mode 100644 index a33b79a..0000000 --- a/lib/sexp/parse.py +++ /dev/null @@ -1,210 +0,0 @@ - -import re -import base64 -import binascii -import re - -# Partial implementation of Rivest's proposed S-Expressions standard -# as documented at -# http://people.csail.mit.edu/rivest/Sexp.txt -# -# It's slightly optimized. -# -# Not implemented: -# [display hints] -# {basic transport} - -__all__ = [ 'FormatError', 'parse' ] - -class FormatError(Exception): - """Raised when parsing fails.""" - pass - -_TOKEN_PAT = r"[a-zA-Z\-\.\/\_\:\*\+\=][a-zA-Z0-9\-\.\/\_\:\*\+\=]*" -# Regular expression to match a single lexeme from an encode s-expression. -_LEXEME_START_RE = re.compile( - r""" \s* (?: (%s) | # Grp 0: A token. - ([0-9]*(?: [\:\|\#\{] | # Grp1 : start of string... - \"(?:[^\\\"]+|\\.)*\")) | # or qstring. - ([\(\)]) # Grp 2: a paren of some kind. - )""" - %_TOKEN_PAT,re.X|re.M|re.S) - -class _P: - """Helper class for parenthesis tokens.""" - def __init__(self, val): - self.val = val - def __repr__(self): - return "_P(%r)"%self.val - -_OPEN_PAREN = _P("(") -_CLOSE_PAREN = _P(")") -del _P -_SPACE_RE = re.compile(r'\s+') - -# Matches all characters in a string that we need to unquote. -_UNQUOTE_CHAR_RE = re.compile(r''' - \\ (?: [abtnvfr] | \r \n ? | \n \r ? | [xX] [A-Fa-f0-9]{2} | [0-8]{1,3} ) - ''') - -# Map from quoted representation to unquoted format. -_UNQUOTE_CHAR_MAP = { 'a': '\a', - 'b': '\b', - 't': '\t', - 'n': '\n', - 'v': '\v', - 'f': '\f', - 'r': '\r' } -def _unquoteChar(ch, _U=_UNQUOTE_CHAR_MAP): - ch = ch[1:] - try: - return _U[ch] - except KeyError: - pass - if ch[0] in "\n\r": - return "" - elif ch[0] in 'xX': - return chr(int(ch[1:], 16)) - else: - i = int(ch[1:], 8) - if i >= 256: - raise FormatError("Octal character format out of range.") - return chr(i) - -def _lexItems(s): - """Generator that iterates over the lexical items in an encoded - s-expression. Yields a string for strings, or the special objects - _OPEN_PAREN and _CLOSE_PAREN. - - >>> list(_lexItems('(4:a)b hello) (world 1:a 0: ')) - [_P('('), 'a)b ', 'hello', _P(')'), _P('('), 'world', 'a', ''] - - >>> list(_lexItems('a b-c 1#20#2#2061##686877# |aGVsbG8gd29ybGQ|')) - ['a', 'b-c', ' ', ' a', 'hhw', 'hello world'] - - >>> list(_lexItems('#2 0# |aGVs\\nbG8 gd29yb GQ| ')) - [' ', 'hello world'] - - >>> list(_lexItems('|YWJjZA==| x |YWJjZA| 3|Y W J j|')) - ['abcd', 'x', 'abcd', 'abc'] - - >>> list(_lexItems('("1""234""hello world" 3"abc" 4" " )')) - [_P('('), '1', '234', 'hello world', 'abc', ' ', _P(')')] - - """ - s = s.strip() - while s: - m = _LEXEME_START_RE.match(s) - if not m: - raise FormatError("No pattern match at %r"%s[:30]) - g = m.groups() - if g[2]: - if g[2] == "(": - yield _OPEN_PAREN - else: - yield _CLOSE_PAREN - s = s[m.end():] - elif g[0]: - # we have a token. Go with that. - yield g[0] - s = s[m.end():] - else: - assert g[1] - lastChar = g[1][-1] - if lastChar == '"': - qidx = g[1].index('"') - quoted = g[1][qidx+1:-1] # All but quotes. - data = _UNQUOTE_CHAR_RE.sub(_unquoteChar, quoted) - if qidx != 0: - num = int(g[1][:qidx], 10) - if num != len(data): - raise FormatError("Bad length on quoted string") - yield data - s = s[m.end():] - continue - - num = g[1][:-1] - if len(num): - num = int(num, 10) - else: - num = None - - if lastChar == ':': - if num is None: - raise FormatError() - s = s[m.end():] - if len(s) < num: - raise FormatError() - yield s[:num] - s = s[num:] - elif lastChar == '#': - s = s[m.end():] - try: - nextHash = s.index('#') - except ValueError: - raise FormatError("Unterminated # string") - dataStr = _SPACE_RE.sub("", s[:nextHash]) - try: - data = binascii.a2b_hex(dataStr) - except TypeError: - raise FormatError("Bad hex string") - if num is not None and len(data) != num: - raise FormatError("Bad number on hex string") - yield data - s = s[nextHash+1:] - elif lastChar == '|': - s = s[m.end():] - try: - nextBar = s.index('|') - except ValueError: - raise FormatError("Unterminated | string") - dataStr = _SPACE_RE.sub("", s[:nextBar]) - # Re-pad. - mod = len(dataStr) % 4 - if mod: - dataStr += "=" * (4 - mod) - try: - data = binascii.a2b_base64(dataStr) - except TypeError: - raise FormatError("Bad base64 string") - if num is not None and len(data) != num: - raise FormatError("Bad number on base64 string") - yield data - s = s[nextBar+1:] - else: - assert None - -def parse(s): - """ - >>> parse("()") - [] - >>> parse("(1:X3:abc1:d)") - ['X', 'abc', 'd'] - >>> parse("(1:X((3:abc))1:d)") - ['X', [['abc']], 'd'] - >>> parse("(a b (d\\ne f) (g) #ff00ff# |aGVsbG8gd29ybGQ|)") - ['a', 'b', ['d', 'e', 'f'], ['g'], '\\xff\\x00\\xff', 'hello world'] - - """ - outermost = [] - stack = [ ] - push = stack.append - pop = stack.pop - add = outermost.append - - for item in _lexItems(s): - if item is _OPEN_PAREN: - next = [] - add(next) - push(add) - add = next.append - elif item is _CLOSE_PAREN: - add = pop() - else: - # it's a string. - add(item) - - if len(outermost) != 1: - raise FormatError("No enclosing parenthesis on list") - return outermost[0] - |