summaryrefslogtreecommitdiff
path: root/lib/sexp/parse.py
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2008-11-23 04:31:53 +0000
committerNick Mathewson <nickm@torproject.org>2008-11-23 04:31:53 +0000
commitd19ceed522d80d0b3dba446933a5b9316dc48c0b (patch)
treecc6fbde2bc2432b3a3e591a96f803198668c1b63 /lib/sexp/parse.py
parent7f3418fcd091da3fb5cdc11c4820b43bb90d2d20 (diff)
Okay, so I'll admit that my vision of a future where all the world is an s-expression is probably no more than a figment of my imagination. Someday, though, somebody will want to parse spki in python, and they will sure be glad that svn preserves deleted files.
git-svn-id: file:///home/or/svnrepo/updater/trunk@17371 55e972cd-5a19-0410-ae62-a4d7a52db4cd
Diffstat (limited to 'lib/sexp/parse.py')
-rw-r--r--lib/sexp/parse.py210
1 files changed, 0 insertions, 210 deletions
diff --git a/lib/sexp/parse.py b/lib/sexp/parse.py
deleted file mode 100644
index a33b79a..0000000
--- a/lib/sexp/parse.py
+++ /dev/null
@@ -1,210 +0,0 @@
-
-import re
-import base64
-import binascii
-import re
-
-# Partial implementation of Rivest's proposed S-Expressions standard
-# as documented at
-# http://people.csail.mit.edu/rivest/Sexp.txt
-#
-# It's slightly optimized.
-#
-# Not implemented:
-# [display hints]
-# {basic transport}
-
-__all__ = [ 'FormatError', 'parse' ]
-
-class FormatError(Exception):
- """Raised when parsing fails."""
- pass
-
-_TOKEN_PAT = r"[a-zA-Z\-\.\/\_\:\*\+\=][a-zA-Z0-9\-\.\/\_\:\*\+\=]*"
-# Regular expression to match a single lexeme from an encode s-expression.
-_LEXEME_START_RE = re.compile(
- r""" \s* (?: (%s) | # Grp 0: A token.
- ([0-9]*(?: [\:\|\#\{] | # Grp1 : start of string...
- \"(?:[^\\\"]+|\\.)*\")) | # or qstring.
- ([\(\)]) # Grp 2: a paren of some kind.
- )"""
- %_TOKEN_PAT,re.X|re.M|re.S)
-
-class _P:
- """Helper class for parenthesis tokens."""
- def __init__(self, val):
- self.val = val
- def __repr__(self):
- return "_P(%r)"%self.val
-
-_OPEN_PAREN = _P("(")
-_CLOSE_PAREN = _P(")")
-del _P
-_SPACE_RE = re.compile(r'\s+')
-
-# Matches all characters in a string that we need to unquote.
-_UNQUOTE_CHAR_RE = re.compile(r'''
- \\ (?: [abtnvfr] | \r \n ? | \n \r ? | [xX] [A-Fa-f0-9]{2} | [0-8]{1,3} )
- ''')
-
-# Map from quoted representation to unquoted format.
-_UNQUOTE_CHAR_MAP = { 'a': '\a',
- 'b': '\b',
- 't': '\t',
- 'n': '\n',
- 'v': '\v',
- 'f': '\f',
- 'r': '\r' }
-def _unquoteChar(ch, _U=_UNQUOTE_CHAR_MAP):
- ch = ch[1:]
- try:
- return _U[ch]
- except KeyError:
- pass
- if ch[0] in "\n\r":
- return ""
- elif ch[0] in 'xX':
- return chr(int(ch[1:], 16))
- else:
- i = int(ch[1:], 8)
- if i >= 256:
- raise FormatError("Octal character format out of range.")
- return chr(i)
-
-def _lexItems(s):
- """Generator that iterates over the lexical items in an encoded
- s-expression. Yields a string for strings, or the special objects
- _OPEN_PAREN and _CLOSE_PAREN.
-
- >>> list(_lexItems('(4:a)b hello) (world 1:a 0: '))
- [_P('('), 'a)b ', 'hello', _P(')'), _P('('), 'world', 'a', '']
-
- >>> list(_lexItems('a b-c 1#20#2#2061##686877# |aGVsbG8gd29ybGQ|'))
- ['a', 'b-c', ' ', ' a', 'hhw', 'hello world']
-
- >>> list(_lexItems('#2 0# |aGVs\\nbG8 gd29yb GQ| '))
- [' ', 'hello world']
-
- >>> list(_lexItems('|YWJjZA==| x |YWJjZA| 3|Y W J j|'))
- ['abcd', 'x', 'abcd', 'abc']
-
- >>> list(_lexItems('("1""234""hello world" 3"abc" 4" " )'))
- [_P('('), '1', '234', 'hello world', 'abc', ' ', _P(')')]
-
- """
- s = s.strip()
- while s:
- m = _LEXEME_START_RE.match(s)
- if not m:
- raise FormatError("No pattern match at %r"%s[:30])
- g = m.groups()
- if g[2]:
- if g[2] == "(":
- yield _OPEN_PAREN
- else:
- yield _CLOSE_PAREN
- s = s[m.end():]
- elif g[0]:
- # we have a token. Go with that.
- yield g[0]
- s = s[m.end():]
- else:
- assert g[1]
- lastChar = g[1][-1]
- if lastChar == '"':
- qidx = g[1].index('"')
- quoted = g[1][qidx+1:-1] # All but quotes.
- data = _UNQUOTE_CHAR_RE.sub(_unquoteChar, quoted)
- if qidx != 0:
- num = int(g[1][:qidx], 10)
- if num != len(data):
- raise FormatError("Bad length on quoted string")
- yield data
- s = s[m.end():]
- continue
-
- num = g[1][:-1]
- if len(num):
- num = int(num, 10)
- else:
- num = None
-
- if lastChar == ':':
- if num is None:
- raise FormatError()
- s = s[m.end():]
- if len(s) < num:
- raise FormatError()
- yield s[:num]
- s = s[num:]
- elif lastChar == '#':
- s = s[m.end():]
- try:
- nextHash = s.index('#')
- except ValueError:
- raise FormatError("Unterminated # string")
- dataStr = _SPACE_RE.sub("", s[:nextHash])
- try:
- data = binascii.a2b_hex(dataStr)
- except TypeError:
- raise FormatError("Bad hex string")
- if num is not None and len(data) != num:
- raise FormatError("Bad number on hex string")
- yield data
- s = s[nextHash+1:]
- elif lastChar == '|':
- s = s[m.end():]
- try:
- nextBar = s.index('|')
- except ValueError:
- raise FormatError("Unterminated | string")
- dataStr = _SPACE_RE.sub("", s[:nextBar])
- # Re-pad.
- mod = len(dataStr) % 4
- if mod:
- dataStr += "=" * (4 - mod)
- try:
- data = binascii.a2b_base64(dataStr)
- except TypeError:
- raise FormatError("Bad base64 string")
- if num is not None and len(data) != num:
- raise FormatError("Bad number on base64 string")
- yield data
- s = s[nextBar+1:]
- else:
- assert None
-
-def parse(s):
- """
- >>> parse("()")
- []
- >>> parse("(1:X3:abc1:d)")
- ['X', 'abc', 'd']
- >>> parse("(1:X((3:abc))1:d)")
- ['X', [['abc']], 'd']
- >>> parse("(a b (d\\ne f) (g) #ff00ff# |aGVsbG8gd29ybGQ|)")
- ['a', 'b', ['d', 'e', 'f'], ['g'], '\\xff\\x00\\xff', 'hello world']
-
- """
- outermost = []
- stack = [ ]
- push = stack.append
- pop = stack.pop
- add = outermost.append
-
- for item in _lexItems(s):
- if item is _OPEN_PAREN:
- next = []
- add(next)
- push(add)
- add = next.append
- elif item is _CLOSE_PAREN:
- add = pop()
- else:
- # it's a string.
- add(item)
-
- if len(outermost) != 1:
- raise FormatError("No enclosing parenthesis on list")
- return outermost[0]
-