summaryrefslogtreecommitdiff
path: root/lib/sexp/encode.py
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2008-11-23 04:31:53 +0000
committerNick Mathewson <nickm@torproject.org>2008-11-23 04:31:53 +0000
commitd19ceed522d80d0b3dba446933a5b9316dc48c0b (patch)
treecc6fbde2bc2432b3a3e591a96f803198668c1b63 /lib/sexp/encode.py
parent7f3418fcd091da3fb5cdc11c4820b43bb90d2d20 (diff)
Okay, so I'll admit that my vision of a future where all the world is an s-expression is probably no more than a figment of my imagination. Someday, though, somebody will want to parse spki in python, and they will sure be glad that svn preserves deleted files.
git-svn-id: file:///home/or/svnrepo/updater/trunk@17371 55e972cd-5a19-0410-ae62-a4d7a52db4cd
Diffstat (limited to 'lib/sexp/encode.py')
-rw-r--r--lib/sexp/encode.py223
1 files changed, 0 insertions, 223 deletions
diff --git a/lib/sexp/encode.py b/lib/sexp/encode.py
deleted file mode 100644
index 1df6406..0000000
--- a/lib/sexp/encode.py
+++ /dev/null
@@ -1,223 +0,0 @@
-
-
-import base64
-import binascii
-import re
-import hashlib
-
-def _encodeHex(s):
- """
- Encode a string in hex format.
-
- >>> _encodeHex("Hello world")
- '#48656c6c6f20776f726c64#'
- >>> _encodeHex("")
- '##'
- """
- return "#%s#"%binascii.b2a_hex(s)
-
-def _encodeBase64(s):
- """
- Encode a string in base64 format, with embedded newlines.
-
- >>> _encodeBase64("")
- '||'
- >>> _encodeBase64("Hello world")
- '|SGVsbG8gd29ybGQ=|'
- >>> print _encodeBase64("Hello world")
- |SGVsbG8gd29ybGQ=|
- >>> _encodeBase64("Good night, sweet prince! A flock of angels "
- ... "sing thee to thy rest")
- '|R29vZCBuaWdodCwgc3dlZXQgcHJpbmNlISBBIGZsb2NrIG9mIGFuZ2VscyBzaW5nIHRoZWUgdG8g\\ndGh5IHJlc3Q=|'
-
- """
- return "|%s|"%base64.encodestring(s).strip()
-
-# Map from a character value to its representation in a quoted-string.
-_QUOTED_MAP = { '\b' : "\\b",
- '\t' : "\\t",
- '\v' : "\\v",
- '\n' : "\\n",
- '\f' : "\\f",
- '\r' : "\\r",
- '"' : "\"",
- '\b' : "\\b",
- '\\' : "\\", }
-for x in xrange(256):
- if 32 <= x <= 126:
- _QUOTED_MAP[chr(x)] = chr(x)
- elif not _QUOTED_MAP.has_key(chr(x)):
- _QUOTED_MAP[chr(x)] = "\\x%02x"%x
-del x
-
-
-_QUOTED_CHAR_RE = re.compile(r'[^\ -\~]')
-def _replaceQuotedChar(match, _Q=_QUOTED_MAP):
- """Helper function for replacing ."""
- return _Q[match.group(0)]
-
-def _encodeQuoted(s, _Q=_QUOTED_MAP):
- """
- >>> _encodeQuoted("")
- '""'
- >>> _encodeQuoted("Hello world")
- '"Hello world"'
- >>> print _encodeQuoted("Hello \xff\b")
- "Hello \\xff\\b"
- """
- # This implementation is a slower for the case where lots of stuff
- # needs quoting, but faster for the case where only some stuff
- # needs quoting. If more than about 1/4 of the characters need
- # quoting, then the commented-out version below is faster. Yes,
- # this is a stupid overoptimization.
- return '"%s"'%(_QUOTED_CHAR_RE.sub(_replaceQuotedChar, s))
-
- #return '"%s"'%("".join(map(_QUOTED_MAP.__getitem__, s)))
-
-def _encodeRaw(s):
- """
- Encode a string in the "raw" format used for canonical encodings.
-
- >>> _encodeRaw("")
- '0:'
- >>> _encodeRaw(" ")
- '1: '
- >>> _encodeRaw(" \\n")
- '2: \\n'
- """
- return "%d:%s"%(len(s),s)
-
-_TOKEN_PAT = r"[a-zA-Z\-\.\/\_\:\*\+\=][a-zA-Z0-9\-\.\/\_\:\*\+\=]*"
-
-_TOKEN_RE = re.compile(_TOKEN_PAT)
-def _writeToken(write,s):
- """Write a string in the token (unencoded) format. Only works for strings
- matching _TOKEN_RE.
- """
- assert _TOKEN_RE.match(s)
- return s
-
-def _encodeCleanest(s, indent=0):
- """Encode s in whatever format seems most human-readable."""
-
- if _TOKEN_RE.match(s):
- return s
- n = 0
- for ch in s:
- if _QUOTED_MAP[ch] != ch:
- n += 1
- if n > 3 and n > len(s)//4:
- if len(s) > 16:
- return _encodeBase64(s).replace("\n", " "*(indent+1)+"\n")
- else:
- return _encodeHex(s)
- else:
- return _encodeQuoted(s)
-
-def _encodePrettyPrint(s, write, indent=0, niceWidth=80):
- if isinstance(s, str):
- write(_encodeCleanest(s))
- return
- elif len(s) == 0:
- write("()")
- return
-
- if isinstance(s[0], str):
- parts = [ " "*indent, "(", _encodeCleanest(s), "\n" ]
- else:
- parts = [ "(" ]
-
-def _encodeCanonical(rep, append):
- """Given an s-expression in <rep>, encode it in canonical format,
- passing each part to the function "append" as it is done.
- """
- if isinstance(rep, str):
- append(_encodeRaw(rep))
- return
-
- append("(")
-
- stack = [ ]
- push = stack.append
- pop = stack.pop
- idx = 0
- while 1:
- while idx == len(rep):
- append(")")
- try:
- rep,idx = pop()
- except IndexError:
- return
- if isinstance(rep[idx], str):
- append(_encodeRaw(rep[idx]))
- idx += 1
- continue
- push((rep,idx+1))
- rep = rep[idx]
- idx = 0
- append("(")
-
-def encode_canonical(rep):
- """Return the canonical encoding of the s-expression <rep>.
-
- >>> encode_canonical("abc")
- '3:abc'
- >>> encode_canonical(["a"])
- '(1:a)'
- >>> encode_canonical(["a", "bc"])
- '(1:a2:bc)'
- >>> encode_canonical([[["X", "ab c"]], "d"])
- '(((1:X4:ab c))1:d)'
- """
- parts = []
- _encodeCanonical(rep, parts.append)
- return "".join(parts)
-
-def hash_canonical(rep, hashobj):
- """Given a hashlib hash object <hashobj>, adds the canonical
- encoding of the s-expression <rep> to hashobj.
-
- >>> import hashlib
- >>> s = hashlib.sha256()
- >>> s.update("(3:abc(6:hello 5:world)(1:9))")
- >>> s.hexdigest()
- '43f7726155f2700ff0d84240f3aaa9e5a1ee2e2c9e4702f7ac3ebcd45fd2f397'
- >>> s = hashlib.sha256()
- >>> hash_canonical(["abc", ["hello ", "world"], ["9"] ], s)
- >>> s.hexdigest()
- '43f7726155f2700ff0d84240f3aaa9e5a1ee2e2c9e4702f7ac3ebcd45fd2f397'
- """
- _encodeCanonical(rep, hashobj.update)
-
-def _encodePretty(rep, append, indent_step=2, niceWidth=80):
- stack = []
- idx = 0
- indent = 0
- append("(")
- pop = stack.pop
- push = stack.append
-
- while 1:
- while idx == len(rep):
- append(")")
- indent -= indent_step
- try:
- rep,idx = pop()
- except IndexError:
- append("\n")
- return
- else:
- append(" ")
- if isinstance(rep[idx], str):
- _encodePrettyPrint(rep[idx], append, indent, niceWidth)
- idx += 1
- if idx < len(rep):
- append(" ")
- continue
- push((rep,idx+1))
- rep = rep[idx]
- idx = 0
- indent += indent_step
- append("\n%s("%(" "*indent))
-
-