From d19ceed522d80d0b3dba446933a5b9316dc48c0b Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Sun, 23 Nov 2008 04:31:53 +0000 Subject: Okay, so I'll admit that my vision of a future where all the world is an s-expression is probably no more than a figment of my imagination. Someday, though, somebody will want to parse spki in python, and they will sure be glad that svn preserves deleted files. git-svn-id: file:///home/or/svnrepo/updater/trunk@17371 55e972cd-5a19-0410-ae62-a4d7a52db4cd --- lib/sexp/encode.py | 223 ----------------------------------------------------- 1 file changed, 223 deletions(-) delete mode 100644 lib/sexp/encode.py (limited to 'lib/sexp/encode.py') diff --git a/lib/sexp/encode.py b/lib/sexp/encode.py deleted file mode 100644 index 1df6406..0000000 --- a/lib/sexp/encode.py +++ /dev/null @@ -1,223 +0,0 @@ - - -import base64 -import binascii -import re -import hashlib - -def _encodeHex(s): - """ - Encode a string in hex format. - - >>> _encodeHex("Hello world") - '#48656c6c6f20776f726c64#' - >>> _encodeHex("") - '##' - """ - return "#%s#"%binascii.b2a_hex(s) - -def _encodeBase64(s): - """ - Encode a string in base64 format, with embedded newlines. - - >>> _encodeBase64("") - '||' - >>> _encodeBase64("Hello world") - '|SGVsbG8gd29ybGQ=|' - >>> print _encodeBase64("Hello world") - |SGVsbG8gd29ybGQ=| - >>> _encodeBase64("Good night, sweet prince! A flock of angels " - ... "sing thee to thy rest") - '|R29vZCBuaWdodCwgc3dlZXQgcHJpbmNlISBBIGZsb2NrIG9mIGFuZ2VscyBzaW5nIHRoZWUgdG8g\\ndGh5IHJlc3Q=|' - - """ - return "|%s|"%base64.encodestring(s).strip() - -# Map from a character value to its representation in a quoted-string. -_QUOTED_MAP = { '\b' : "\\b", - '\t' : "\\t", - '\v' : "\\v", - '\n' : "\\n", - '\f' : "\\f", - '\r' : "\\r", - '"' : "\"", - '\b' : "\\b", - '\\' : "\\", } -for x in xrange(256): - if 32 <= x <= 126: - _QUOTED_MAP[chr(x)] = chr(x) - elif not _QUOTED_MAP.has_key(chr(x)): - _QUOTED_MAP[chr(x)] = "\\x%02x"%x -del x - - -_QUOTED_CHAR_RE = re.compile(r'[^\ -\~]') -def _replaceQuotedChar(match, _Q=_QUOTED_MAP): - """Helper function for replacing .""" - return _Q[match.group(0)] - -def _encodeQuoted(s, _Q=_QUOTED_MAP): - """ - >>> _encodeQuoted("") - '""' - >>> _encodeQuoted("Hello world") - '"Hello world"' - >>> print _encodeQuoted("Hello \xff\b") - "Hello \\xff\\b" - """ - # This implementation is a slower for the case where lots of stuff - # needs quoting, but faster for the case where only some stuff - # needs quoting. If more than about 1/4 of the characters need - # quoting, then the commented-out version below is faster. Yes, - # this is a stupid overoptimization. - return '"%s"'%(_QUOTED_CHAR_RE.sub(_replaceQuotedChar, s)) - - #return '"%s"'%("".join(map(_QUOTED_MAP.__getitem__, s))) - -def _encodeRaw(s): - """ - Encode a string in the "raw" format used for canonical encodings. - - >>> _encodeRaw("") - '0:' - >>> _encodeRaw(" ") - '1: ' - >>> _encodeRaw(" \\n") - '2: \\n' - """ - return "%d:%s"%(len(s),s) - -_TOKEN_PAT = r"[a-zA-Z\-\.\/\_\:\*\+\=][a-zA-Z0-9\-\.\/\_\:\*\+\=]*" - -_TOKEN_RE = re.compile(_TOKEN_PAT) -def _writeToken(write,s): - """Write a string in the token (unencoded) format. Only works for strings - matching _TOKEN_RE. - """ - assert _TOKEN_RE.match(s) - return s - -def _encodeCleanest(s, indent=0): - """Encode s in whatever format seems most human-readable.""" - - if _TOKEN_RE.match(s): - return s - n = 0 - for ch in s: - if _QUOTED_MAP[ch] != ch: - n += 1 - if n > 3 and n > len(s)//4: - if len(s) > 16: - return _encodeBase64(s).replace("\n", " "*(indent+1)+"\n") - else: - return _encodeHex(s) - else: - return _encodeQuoted(s) - -def _encodePrettyPrint(s, write, indent=0, niceWidth=80): - if isinstance(s, str): - write(_encodeCleanest(s)) - return - elif len(s) == 0: - write("()") - return - - if isinstance(s[0], str): - parts = [ " "*indent, "(", _encodeCleanest(s), "\n" ] - else: - parts = [ "(" ] - -def _encodeCanonical(rep, append): - """Given an s-expression in , encode it in canonical format, - passing each part to the function "append" as it is done. - """ - if isinstance(rep, str): - append(_encodeRaw(rep)) - return - - append("(") - - stack = [ ] - push = stack.append - pop = stack.pop - idx = 0 - while 1: - while idx == len(rep): - append(")") - try: - rep,idx = pop() - except IndexError: - return - if isinstance(rep[idx], str): - append(_encodeRaw(rep[idx])) - idx += 1 - continue - push((rep,idx+1)) - rep = rep[idx] - idx = 0 - append("(") - -def encode_canonical(rep): - """Return the canonical encoding of the s-expression . - - >>> encode_canonical("abc") - '3:abc' - >>> encode_canonical(["a"]) - '(1:a)' - >>> encode_canonical(["a", "bc"]) - '(1:a2:bc)' - >>> encode_canonical([[["X", "ab c"]], "d"]) - '(((1:X4:ab c))1:d)' - """ - parts = [] - _encodeCanonical(rep, parts.append) - return "".join(parts) - -def hash_canonical(rep, hashobj): - """Given a hashlib hash object , adds the canonical - encoding of the s-expression to hashobj. - - >>> import hashlib - >>> s = hashlib.sha256() - >>> s.update("(3:abc(6:hello 5:world)(1:9))") - >>> s.hexdigest() - '43f7726155f2700ff0d84240f3aaa9e5a1ee2e2c9e4702f7ac3ebcd45fd2f397' - >>> s = hashlib.sha256() - >>> hash_canonical(["abc", ["hello ", "world"], ["9"] ], s) - >>> s.hexdigest() - '43f7726155f2700ff0d84240f3aaa9e5a1ee2e2c9e4702f7ac3ebcd45fd2f397' - """ - _encodeCanonical(rep, hashobj.update) - -def _encodePretty(rep, append, indent_step=2, niceWidth=80): - stack = [] - idx = 0 - indent = 0 - append("(") - pop = stack.pop - push = stack.append - - while 1: - while idx == len(rep): - append(")") - indent -= indent_step - try: - rep,idx = pop() - except IndexError: - append("\n") - return - else: - append(" ") - if isinstance(rep[idx], str): - _encodePrettyPrint(rep[idx], append, indent, niceWidth) - idx += 1 - if idx < len(rep): - append(" ") - continue - push((rep,idx+1)) - rep = rep[idx] - idx = 0 - indent += indent_step - append("\n%s("%(" "*indent)) - - -- cgit v1.2.3