From bbb6cb5fe6a61aa0975dcd9df11f9f648f6abba2 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Sun, 25 Jul 2010 15:13:04 -0400 Subject: More unit tests and documentation for the Thandy code --- lib/thandy/ServerCLI.py | 6 ++ lib/thandy/checkJson.py | 73 ++++++++++++++++++---- lib/thandy/download.py | 16 +++-- lib/thandy/encodeToXML.py | 7 +++ lib/thandy/formats.py | 153 +++++++++++++++++++++++++++++++++++++++++----- lib/thandy/keys.py | 136 +++++++++++++++++++++++++++++++++-------- lib/thandy/master_keys.py | 4 ++ lib/thandy/repository.py | 14 ++++- lib/thandy/tests.py | 2 + lib/thandy/util.py | 6 ++ 10 files changed, 359 insertions(+), 58 deletions(-) (limited to 'lib') diff --git a/lib/thandy/ServerCLI.py b/lib/thandy/ServerCLI.py index 9212846..4b6c1a6 100644 --- a/lib/thandy/ServerCLI.py +++ b/lib/thandy/ServerCLI.py @@ -15,6 +15,9 @@ def tstamp(): return time.strftime("%Y%m%d_%H%M%S", time.localtime()) def snarf(fname): + """Return a string containing the binary contents of the file named + "fname." + """ f = open(fname, 'rb') try: return f.read() @@ -22,6 +25,9 @@ def snarf(fname): f.close() def snarfObj(fname): + """Return a 2-tuple of (object, len), where the object stored in json + format in the file 'fname', and len is that file's length. + """ f = open(fname, 'r') try: length = os.fstat(f.fileno()).st_size diff --git a/lib/thandy/checkJson.py b/lib/thandy/checkJson.py index e63a1fc..ab33d00 100644 --- a/lib/thandy/checkJson.py +++ b/lib/thandy/checkJson.py @@ -1,13 +1,18 @@ # Copyright 2008 The Tor Project, Inc. See LICENSE for licensing information. +"""This file defines an object oriented pattern matching system used to + check decoded xJSON objects. +""" + import re import sys import thandy class Schema: - """A schema matches a set of possible Python objects, of types - that are encodable in JSON.""" + """A Schema matches a set of possible Python objects, of types + that are encodable in JSON. This is an abstract base type; + see implementations below.""" def matches(self, obj): """Return True if 'obj' matches this schema, False if it doesn't.""" try: @@ -73,7 +78,7 @@ class RE(Schema): class Str(Schema): """ - Matches a particular string. + Matches a particular string, and no other. >>> s = Str("Hi") >>> s.matches("Hi") @@ -113,7 +118,7 @@ class AnyStr(Schema): class OneOf(Schema): """ - Matches an object that matches any one of several schemas. + Matches an object that matches any one of several sub-schemas. >>> s = OneOf([ListOf(Int()), Str("Hello"), Str("bye")]) >>> s.matches(3) @@ -139,11 +144,19 @@ class OneOf(Schema): class AllOf(Schema): """Matches the intersection of a list of schemas. + + >>> s = AllOf([RE(r'.*end'), RE(r'begin.*')]) + >>> s.matches("an end") + False + >>> s.matches("begin well") + False + >>> s.matches("begin and end") + True """ def __init__(self, required): self._subschemas = required[:] - def checkMatche(self, obj): + def checkMatch(self, obj): for s in self._subschemas: s.checkMatch(obj) @@ -174,6 +187,10 @@ class ListOf(Schema): False """ def __init__(self, schema, minCount=0, maxCount=sys.maxint,listName="list"): + """Create a new ListOf schema to match anywhere from minCount to + maxCount objects conforming to 'schema'. When generating errors, + we will call this type 'listName'. + """ self._schema = schema self._minCount = minCount self._maxCount = maxCount @@ -240,6 +257,13 @@ class Struct(Schema): """ def __init__(self, subschemas, optschemas=[], allowMore=False, structName="list"): + """Create a new Struct schema to match lists that begin with + each item in 'subschemas' in order. If there are more elements + than items in subschemas, additional elements much match + the items in optschemas (if any). If there are more elements + than items in subschemas and optschemas put together, then + the object is only matched when allowMore is true. + """ self._subschemas = subschemas + optschemas self._min = len(subschemas) self._allowMore = allowMore @@ -276,6 +300,8 @@ class DictOf(Schema): False """ def __init__(self, keySchema, valSchema): + """Return a new DictSchema to match objects all of whose keys match + keySchema, and all of whose values match valSchema.""" self._keySchema = keySchema self._valSchema = valSchema def checkMatch(self, obj): @@ -306,8 +332,9 @@ class Opt: class Obj(Schema): """ - Matches a dict from specified keys to key-specific types. Unrecognized - keys are allowed. + Matches a dict from specified keys to key-specific types. All + keys are requied unless explicitly marked with Opt. + Unrecognized keys are always allowed. >>> s = Obj(a=AnyStr(), bc=Struct([Int(), Int()])) >>> s.matches({'a':"ZYYY", 'bc':[5,9]}) @@ -324,7 +351,6 @@ class Obj(Schema): self._objname = _objname self._required = d.items() - def checkMatch(self, obj): if not isinstance(obj, dict): raise thandy.FormatException("Wanted a %s; did not get a dict"% @@ -372,6 +398,7 @@ class TaggedObj(Schema): """ def __init__(self, tagName, tagIsOptional=False, ignoreUnrecognized=True, **tagvals): + #DOCDOC self._tagName = tagName self._tagOpt = tagIsOptional self._ignoreOthers = ignoreUnrecognized @@ -421,17 +448,26 @@ class Int(Schema): >>> Int(lo=10, hi=30).matches(5) False """ - def __init__(self, lo=-sys.maxint, hi=sys.maxint): + def __init__(self, lo=None, hi=None): + """Return a new Int schema to match items between lo and hi inclusive. + """ + if lo is not None and hi is not None: + assert lo <= hi self._lo = lo self._hi = hi + plo,phi=lo,hi + if plo is None: plo = "..." + if phi is None: phi = "..." + self._range = "[%s,%s]"%(plo,phi) def checkMatch(self, obj): if isinstance(obj, bool) or not isinstance(obj, (int, long)): # We need to check for bool as a special case, since bool # is for historical reasons a subtype of int. raise thandy.FormatException("Got %r instead of an integer"%obj) - elif not (self._lo <= obj <= self._hi): - raise thandy.FormatException("%r not in range [%r,%r]" - %(obj, self._lo, self._hi)) + elif (self._lo is not None and self._lo > obj) or ( + self._hi is not None and self._hi < obj): + raise thandy.FormatException("%r not in range %s" + %(obj, self._range)) class Bool(Schema): """ @@ -450,7 +486,20 @@ class Bool(Schema): raise thandy.FormatException("Got %r instead of a boolean"%obj) class Func(Schema): + """ + Matches an object based on the value of some boolen function + + >>> even = lambda x: (x%2)==0 + >>> s = Func(even, baseSchema=Int()) + >>> s.matches(99) + False + >>> s.matches(98) + True + >>> s.matches("ninety-eight") + False + """ def __init__(self, fn, baseSchema=None): + #DOCDOC self._fn = fn self._base = baseSchema def checkMatch(self, obj): diff --git a/lib/thandy/download.py b/lib/thandy/download.py index 8e774ec..52c6fe4 100644 --- a/lib/thandy/download.py +++ b/lib/thandy/download.py @@ -31,7 +31,7 @@ class DownloadManager: # Work queue of DownloadJobs that we intend to process once a thread # is free. self.downloadQueue = Queue.Queue() - # DOCDOC + # Work queue of functions that need to be run in the main thread. self.resultQueue = Queue.Queue() # List of worker threads. @@ -43,10 +43,10 @@ class DownloadManager: for t in self.threads: t.setDaemon(True) - # DOCDOC + # Used to remember the status of downloads to avoid too much retrying self.statusLog = DownloadStatusLog() - #DOCDOC + # Used to tell the main thread to raise an exception. self._raiseMe = None def start(self): @@ -85,13 +85,17 @@ class DownloadManager: def wait(self): """Pause until we have no active or pending jobs.""" while not self.finished(): + # Wait till somebody tells us to do something. self.done.acquire() self.done.wait() self.done.release() + # Did something go wrong? if self._raiseMe: raise self._raiseMe + # Suck functions out of resultQueue and run them until + # resultQueue is empty. try: while True: item = self.resultQueue.get(block=False) @@ -130,7 +134,7 @@ class DownloadManager: def _thread(self, idx): # Run in the background per thread. idx is the number of the thread. while True: - job = self.downloadQueue.get() # Grab job from queue. + job = self.downloadQueue.get() # Grab a job from queue. rp = job.getRelativePath() success = False try: @@ -215,7 +219,6 @@ _STATUS_LOG_SCHEMA = S.Obj( networkFailures=_FAIL_SCHEMA) del S - class DownloadStatusLog: """Tracks when we can retry downloading from various mirrors. @@ -708,7 +711,8 @@ def getConnection(url, useTor, have_length=None): if __name__ == '__main__': - # Trivial CLI to test out downloading. + # Trivial CLI to test out downloading: fetches files with urrlib2 + # as specified on the command line. Launches them all in parallel. import getopt options, args = getopt.getopt(sys.argv[1:], "", diff --git a/lib/thandy/encodeToXML.py b/lib/thandy/encodeToXML.py index c563d77..b2fe351 100644 --- a/lib/thandy/encodeToXML.py +++ b/lib/thandy/encodeToXML.py @@ -1,5 +1,12 @@ # Copyright 2008 The Tor Project, Inc. See LICENSE for licensing information. +"""This module converts JSon to XML. It is allegedly for people who have + libraries that can read XML, but nothing that can read JSon. + + Nick questions whether it's actually useful. Certainly, it's quite + unlikely that you'll be able to check any keys once you've converted it. +""" + import re import thandy diff --git a/lib/thandy/formats.py b/lib/thandy/formats.py index d6f646a..a59f696 100644 --- a/lib/thandy/formats.py +++ b/lib/thandy/formats.py @@ -15,9 +15,16 @@ import Crypto.Hash.SHA256 class KeyDB: """A KeyDB holds public keys, indexed by their key IDs.""" + ## Fields: + # _keys: a map from keyid to public key. def __init__(self): + """Create a new empty KeyDB.""" self._keys = {} def addKey(self, k): + """Insert a thandy.keys.PublicKey object, 'k', into this KeyDB. If + we already had this key, retain the old one, but add any roles in + the new key 'k'. + """ keyid = k.getKeyID() try: oldkey = self._keys[keyid] @@ -28,8 +35,12 @@ class KeyDB: pass self._keys[k.getKeyID()] = k def getKey(self, keyid): + """Return the key whose key ID is 'keyid'. If there is no such key, + raise KeyError.""" return self._keys[keyid] def getKeysByRole(self, role, path): + """Return a list of all keys that have the role 'role' set for files + in 'path'.""" results = [] for key in self._keys.itervalues(): for r,p in key.getRoles(): @@ -39,14 +50,17 @@ class KeyDB: return results def getKeysFuzzy(self, keyid): + """Return a list of all keys whose key IDs begin with 'keyid'.""" r = [] for k,v in self._keys.iteritems(): if k.startswith(keyid): r.append(v) return r def iterkeys(self): + """Return a new iterator of all the keys in this KeyDB.""" return self._keys.itervalues() +# Internal cache that maps role paths to regex objects that parse them. _rolePathCache = {} def rolePathMatches(rolePath, path): """Return true iff the relative path in the filesystem 'path' conforms @@ -109,7 +123,12 @@ class SignatureStatus: def checkSignatures(signed, keyDB, role=None, path=None): """Given an object conformant to SIGNED_SCHEMA and a set of public keys - in keyDB, verify the signed object in 'signed'.""" + in keyDB, verify the signed object is signed. If 'role' and 'path' + are provided, verify that the signing key has the correct role to + sign this document as stored in 'path'. + + Returns a SignatureStatus. + """ SIGNED_SCHEMA.checkMatch(signed) @@ -156,7 +175,10 @@ def checkSignatures(signed, keyDB, role=None, path=None): return SignatureStatus(goodSigs, badSigs, unknownSigs, tangentialSigs) -def canonical_str_encoder(s): +def _canonical_str_encoder(s): + """Helper for encodeCanonical: encodes a string as the byte sequence + expected for canonical JSON format. + """ s = '"%s"' % re.sub(r'(["\\])', r'\\\1', s) if isinstance(s, unicode): return s.encode("utf-8") @@ -168,7 +190,7 @@ def _encodeCanonical(obj, outf): # even let us replace the separators. if isinstance(obj, basestring): - outf(canonical_str_encoder(obj)) + outf(_canonical_str_encoder(obj)) elif obj is True: outf("true") elif obj is False: @@ -191,12 +213,12 @@ def _encodeCanonical(obj, outf): items = obj.items() items.sort() for k,v in items[:-1]: - outf(canonical_str_encoder(k)) + outf(_canonical_str_encoder(k)) outf(":") _encodeCanonical(v, outf) outf(",") k, v = items[-1] - outf(canonical_str_encoder(k)) + outf(_canonical_str_encoder(k)) outf(":") _encodeCanonical(v, outf) outf("}") @@ -206,11 +228,11 @@ def _encodeCanonical(obj, outf): def encodeCanonical(obj, outf=None): """Encode the object obj in canoncial JSon form, as specified at http://wiki.laptop.org/go/Canonical_JSON . It's a restricted - dialect of json in which keys are always lexically sorted, + dialect of JSON in which keys are always lexically sorted, there is no whitespace, floats aren't allowed, and only quote - and backslash get escaped. The result is encoded in UTF-8, - and the resulting bits are passed to outf (if provided), or joined - into a string and returned. + and backslash get escaped. The result is encoded in UTF-8, and + the resulting bytes are passed to outf (if provided) in several + calls, or joined into a string and returned. >>> encodeCanonical("") '""' @@ -222,6 +244,14 @@ def encodeCanonical(obj, outf=None): '{"A":[99]}' >>> encodeCanonical({"x" : 3, "y" : 2}) '{"x":3,"y":2}' + >>> total = 0 + >>> def increment(s): + ... global total + ... total += len(s) + ... + >>> encodeCanonical({"x" : 3, "y" : 2, 'z' : [99,3]}, outf=increment) + >>> total + 24 """ result = None @@ -236,10 +266,9 @@ def encodeCanonical(obj, outf=None): def getDigest(obj, digestObj=None): """Update 'digestObj' (typically a SHA256 object) with the digest of - the canonical json encoding of obj. If digestObj is none, - compute the SHA256 hash and return it. - - DOCDOC string equivalence. + obj, first encoding it in canonical form if it's a JSON object, + and taking its UTF-8 encoding if it's in unicode. If digestObj + is none, just compute and return the SHA256 hash. """ useTempDigestObj = (digestObj == None) if useTempDigestObj: @@ -291,6 +320,9 @@ def getFileDigest(f, digestObj=None): return digestObj.digest() def makeSignable(obj): + """Return a new JSON object of type 'signed' wrapping 'obj', and containing + no signatures. + """ return { 'signed' : obj, 'signatures' : [] } def sign(signed, key): @@ -349,13 +381,23 @@ def parseBase64(s): raise thandy.FormatException("Invalid base64 encoding") def parseHash(s): + """Parse a base64-encoded digest. + + (This is just like paseBase64, but it checks the size.) + """ h = parseBase64(s) if len(h) != Crypto.Hash.SHA256.digest_size: raise thandy.FormatException("Bad hash length") return h +# Abbreviate the thandy.checkJson module here, since we're going to be +# using all of its members a lot here. S = thandy.checkJson +######### +## These schemas describe, in OO constraint-checking form, all the Thandy +## data formats. + # A date, in YYYY-MM-DD HH:MM:SS format. TIME_SCHEMA = S.RE(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}') # A hash, base64-encoded @@ -370,6 +412,15 @@ RSAKEY_SCHEMA = S.Obj( _keytype=S.Str("rsa"), e=BASE64_SCHEMA, n=BASE64_SCHEMA) +# An RSA key with private-key informartion: subtype of RSAKEY_SCHEMA. +RSAKEY_PRIVATE_SCHEMA = S.Obj( + _keytype=S.Str("rsa"), + e=BASE64_SCHEMA, + n=BASE64_SCHEMA, + d=BASE64_SCHEMA, + p=BASE64_SCHEMA, + q=BASE64_SCHEMA, + u=BASE64_SCHEMA) # Any public key. PUBKEY_SCHEMA = S.Obj( _keytype=S.AnyStr()) @@ -393,6 +444,7 @@ SIGNED_SCHEMA = S.Obj( signed=S.Any(), signatures=S.ListOf(SIGNATURE_SCHEMA)) +# The name of a role ROLENAME_SCHEMA = S.AnyStr() # A role: indicates that a key is allowed to certify a kind of @@ -459,6 +511,7 @@ def checkWinRegistryKeyname(keyname): elif not key or not value: raise thandy.FormatException("Bad registry entry.") +# A string holding the name of a windows registry key REGISTRY_KEY_SCHEMA = S.Func(checkWinRegistryKeyname) CHECK_ITEM_SCHEMA = S.TaggedObj( @@ -516,6 +569,10 @@ PACKAGE_SCHEMA = S.Func(checkPackageFormatConsistency, PACKAGE_SCHEMA) ALL_ROLES = ('timestamp', 'mirrors', 'bundle', 'package', 'master') class Keylist(KeyDB): + """A list of keys, as extracted from a Thandy keys.txt JSon file. + + This class extends KeyDB, so you can acces keys more easily. + """ def __init__(self): KeyDB.__init__(self) @@ -542,6 +599,16 @@ class Keylist(KeyDB): self.addKey(key) class StampedInfo: + """This class holds a single entry in a timestamp file. Each + StampedInfo says when a file was last modified, and what its + hash was. It may also provide useful info about where to find it, + its version, its length, and so on. + """ + ## _ts -- the time when the file was last modified + ## _hash -- the hash of the most recent version of the file + ## _version -- version of the most recent file. May be None + ## _relpath -- where to find this file in the repository + ## _length -- the length of the file def __init__(self, ts, hash, version=None, relpath=None, length=None): self._ts = ts self._hash = hash @@ -565,6 +632,11 @@ class StampedInfo: return self._length class TimestampFile: + """This class holds all the fields parsed from a thandy timestamp file.""" + ## _time -- the time when this file was generated + ## _mirrorListInfo -- a StampedInfo for the keylist. + ## _keyListInfo -- a StampedInfo for the mirrorlist + ## _bundleInfo -- map from bundle name to StampedInfo def __init__(self, at, mirrorlistinfo, keylistinfo, bundleinfo): self._time = at self._mirrorListInfo = mirrorlistinfo @@ -575,6 +647,9 @@ class TimestampFile: def fromJSon(obj): # must be validated. at = parseTime(obj['at']) + # We slice these lists because we want to support old thandys + # that didn't include the length on these, and new ones that + # might include more fields m = StampedInfo.fromJSonFields(*obj['m'][:3]) k = StampedInfo.fromJSonFields(*obj['k'][:3]) b = {} @@ -606,6 +681,13 @@ class TimestampFile: return self._bundleInfo def readConfigFile(fname, needKeys=(), optKeys=(), preload={}): + """Read a configuration file from 'fname'. A configuration file is a + python script that runs in a temporary namespace prepopulated + with the contents of 'reload'. It is a thandy.FormatException + if the file finishes executation without setting every variable + listed in 'needKeys'. These settings, plus any variables whose names + are listed in 'optKeys', are returned in a new dict. + """ parsed = preload.copy() result = {} execfile(fname, parsed) @@ -625,6 +707,10 @@ def readConfigFile(fname, needKeys=(), optKeys=(), preload={}): return result def makePackageObj(config_fname, package_fname): + """Given a description of a thandy package in config_fname, and the + name of the one file (only one is supported now!) in package_fname, + return a new unsigned package object. + """ preload = {} shortDescs = {} longDescs = {} @@ -702,6 +788,12 @@ def makePackageObj(config_fname, package_fname): return result def makeBundleObj(config_fname, getPackage, getPackageLength): + """Given a description of a thandy bundle in config_fname, + return a new unsigned bundle object. getPackage must be a function + returning a package object for every package the bundle requires + when given the package's name as input. getPacakgeLength + must be a function returning the length of the package file. + """ packages = [] def ShortGloss(lang, val): packages[-1]['gloss'][lang] = val def LongGloss(lang, val): packages[-1]['longgloss'][lang] = val @@ -749,20 +841,39 @@ def makeBundleObj(config_fname, getPackage, getPackageLength): return result def versionIsNewer(v1, v2): + """Return true if version v1 is newer than v2. Both versions are + given as lists of version components. + >>> versionIsNewer([1,2,3], [1,2,3,4]) + False + >>> versionIsNewer([1,2,3,5], [1,2,3,4]) + True + >>> versionIsNewer([1,3,3,5], [1,2,3,5]) + True + """ return v1 > v2 def getBundleKey(bundlePath): """ + Return all parts of a bundle's "key" as used in a timestamp file, + given its full filename. + >>> getBundleKey("/bundleinfo/tor-browser/win32/some-file-name.txt") '/bundleinfo/tor-browser/win32/' """ - # No, we can't use "os.path.directory." That isn't os-independent. + # No, we can't use "os.path.directory" or "os.path.split". Those are + # OD-dependent, and all of our paths are in Unix format. idx = bundlePath.rindex("/") return bundlePath[:idx+1] def makeTimestampObj(mirrorlist_obj, mirrorlist_len, keylist_obj, keylist_len, bundle_objs): + """Return a new unsigned timestamp object for a given set of inputs, + where mirrorlist_obj and mirrorlist_len are a (signed, unencoded) + mirror list, and its length on disk; keylist_obj and keylist_len + are the same for the key list, and bundle_objs is a list of + (object, length) tuples for all the bundles. + """ result = { '_type' : 'Timestamp', 'at' : formatTime(time.time()) } result['m'] = [ mirrorlist_obj['ts'], @@ -784,6 +895,8 @@ def makeTimestampObj(mirrorlist_obj, mirrorlist_len, return result class MirrorInfo: + """A MirrorInfo holds the parsed value of a thandy mirror list's entry + for a single mirror.""" def __init__(self, name, urlbase, contents, weight): self._name = name self._urlbase = urlbase @@ -809,6 +922,9 @@ class MirrorInfo: 'weight' : self._weight } def makeMirrorListObj(mirror_fname): + """Return a new unsigned mirrorlist object for the mirrors described in + 'mirror_fname'. + """ mirrors = [] def Mirror(*a, **kw): mirrors.append(MirrorInfo(*a, **kw)) preload = {'Mirror' : Mirror} @@ -821,6 +937,9 @@ def makeMirrorListObj(mirror_fname): return result def makeKeylistObj(keylist_fname, includePrivate=False): + """Return a new unsigned keylist object for the keys described in + 'mirror_fname'. + """ keys = [] def Key(obj): keys.append(obj) preload = {'Key': Key} @@ -851,7 +970,11 @@ SCHEMAS_BY_TYPE = { } def checkSignedObj(obj, keydb=None): - # Returns signaturestatus, role, path on sucess. + """Given a signed object, check whether it is well-formed and correctly + signed with some key in keydb having the appropriate role. On + success, returns a SignatureStatus, the rule used to sign it, + and the object's path in the repository. + """ SIGNED_SCHEMA.checkMatch(obj) try: diff --git a/lib/thandy/keys.py b/lib/thandy/keys.py index 3385ddb..9eb98d0 100644 --- a/lib/thandy/keys.py +++ b/lib/thandy/keys.py @@ -1,6 +1,12 @@ # Copyright 2008 The Tor Project, Inc. See LICENSE for licensing information. -# These require PyCrypto. +"""thandy.keys -- + + This module defines functionality for public keys, and for a simple + encrypted keystore. +""" + +# These imports require PyCrypto. import Crypto.PublicKey.RSA import Crypto.Hash.SHA256 import Crypto.Cipher.AES @@ -18,28 +24,46 @@ import thandy.util json = thandy.util.importJSON() class PublicKey: - """Abstract base class for public keys.""" + """An abstract base class for public keys. A public key object + always implements some kind of public key, and may also contain + the corresponding private key data.""" + ## Fields: + # _roles: a list of (rolename, path) tuples that indicates which + # roles we consider this public key to have. def __init__(self): + """Constructor: Initialize a public key.""" self._roles = [] def format(self): + """Return this public key converted into a JSon object""" raise NotImplemented() def sign(self, obj=None, digest=None): """Sign either a JSon object provided in 'obj', or a digest provided in 'digest'. Return a list of (method name, base64-encoded - signature) tuple. + signature) tuples. Requires that this is a private key.""" raise NotImplemented() - def checkSignature(self, method, data, signature): + def checkSignature(self, method, signature, obj=None, digest=None): + """Check the base64-encoded signature in 'signature', which was + generating using the method with the name 'method', to see + if it is a correct signature made with this key for either + a JSon object provided in 'obj' or a digest provided in 'digest'. + + Returns True if the signature is value; False if it's invalid, and + UnknownMethod if we don't recognize 'method'. + """ # returns True, False, or raises UnknownMethod. raise NotImplemented() def getKeyID(self): + """Return a base-64-encoded key ID for this key. No two distinct + keys may share the same key ID. + """ raise NotImplemented() def getRoles(self): """Return a list of all roles supported by this key. A role is a doctype,pathPattern tuple. """ - return self._roles + return self._roles[:] def addRole(self, role, path): """Add a role to the list of roles supported by this key. A role is a permission to sign a given kind of document @@ -62,8 +86,14 @@ class PublicKey: return False if hex(1L).upper() == "0X1L": + # It looks like integers and longs aren't unified in this version + # of Python: converting a long to a hex string means there's a trailing + # 'L' we need to remove. def intToBinary(number): """Convert an int or long into a big-endian series of bytes. + + >>> intToBinary(92807287956601L) + 'Thandy' """ # This "convert-to-hex, then use binascii" approach may look silly, # but it's over 10x faster than the Crypto.Util.number approach. @@ -73,6 +103,8 @@ if hex(1L).upper() == "0X1L": h = "0"+h return binascii.a2b_hex(h) elif hex(1L).upper() == "0X1": + # It looks like integers and longs _are_ unified. Maybe this is Python 3? + # In any case, we don't need to remove the trailing L. def intToBinary(number): "Variant for future versions of pythons that don't append 'L'." h = hex(long(number)) @@ -87,18 +119,31 @@ else: def binaryToInt(binary): """Convert a big-endian series of bytes into a long. + + >>> binaryToInt('Hi') + 18537L """ return long(binascii.b2a_hex(binary), 16) def intToBase64(number): - """Convert an int or long to a big-endian base64-encoded value.""" + """Convert an int or long to a big-endian base64-encoded value. + + >>> intToBase64(0x4e16a777) + 'Thandw' + """ return thandy.formats.formatBase64(intToBinary(number)) def base64ToInt(number): - """Convert a big-endian base64-encoded value to a long.""" + """Convert a big-endian base64-encoded value to a long. + + >>> base64ToInt('Thandy') + 1310107511L + """ return binaryToInt(thandy.formats.parseBase64(number)) def _pkcs1_padding(m, size): + """Add PKCS padding to the message 'm', so that it's appropriate for + use with a public key of 'size' bytes.""" # I'd rather use OAEP+, but apparently PyCrypto barely supports # signature verification, and doesn't seem to support signature # verification with nondeterministic padding. "argh." @@ -108,13 +153,16 @@ def _pkcs1_padding(m, size): return r def _xor(a,b): - if a: - return not b - else: - return b + """Return true iff exactly one of and b are true. Used to check + some conditions.""" + return bool(a) ^ bool(b) class RSAKey(PublicKey): """ + An RSAKey is an implementation of the abstract class 'PublicKey' that + can sign documents and check signatures using RSA keys of arbitrary + size. + >>> k = RSAKey.generate(bits=512) >>> obj = k.format() >>> obj['_keytype'] @@ -136,27 +184,33 @@ class RSAKey(PublicKey): >>> k.checkSignature(method, sig, obj=s2) False """ + ## Fields: (beyond those inherited from PublicKey) + # key -- a PyCrypto RSA key, public or private. See Crypto.PublicKey.RSA. + # keyid -- the base64 key ID for this key, or 'None' if we haven't + # generated one yet. def __init__(self, key): + """Constructure: Initialize a new RSAKey from a PyCrypto RSA key.""" PublicKey.__init__(self) self.key = key self.keyid = None @staticmethod def generate(bits=2048): - """Generate a new RSA key, with modulus length 'bits'.""" + """Generate and return a new RSA key, with modulus length 'bits'.""" key = Crypto.PublicKey.RSA.generate(bits=bits, randfunc=os.urandom) return RSAKey(key) @staticmethod def fromJSon(obj): - """Construct an RSA key from the output of the format() method. + """Construct and return a RSAKey from the JSon object output of the + RSAKey.format() method. May raise thandy.FormatException. """ # obj must match RSAKEY_SCHEMA thandy.formats.RSAKEY_SCHEMA.checkMatch(obj) n = base64ToInt(obj['n']) e = base64ToInt(obj['e']) - if obj.has_key('d'): + if thandy.formats.RSAKEY_PRIVATE_SCHEMA.matches(obj): d = base64ToInt(obj['d']) p = base64ToInt(obj['p']) q = base64ToInt(obj['q']) @@ -173,11 +227,11 @@ class RSAKey(PublicKey): return result def isPrivateKey(self): - """Return true iff this key has private-key components""" + """Return True iff this key has private-key components""" return hasattr(self.key, 'd') def format(self, private=False, includeRoles=False): - """Return a new object to represent this key in json format. + """Return a new json object to represent this key in json format. If 'private', include private-key data. If 'includeRoles', include role information. """ @@ -205,6 +259,7 @@ class RSAKey(PublicKey): return self.keyid def sign(self, obj=None, digest=None): + # See PublicKey.sign for documentation assert _xor(obj == None, digest == None) method = "sha256-pkcs1" if digest == None: @@ -214,6 +269,7 @@ class RSAKey(PublicKey): return [ (method, sig) ] def checkSignature(self, method, sig, obj=None, digest=None): + # See PublicKey.checkSignature for documentation assert _xor(obj == None, digest == None) if method != "sha256-pkcs1": raise thandy.UnknownMethod(method) @@ -223,6 +279,7 @@ class RSAKey(PublicKey): m = _pkcs1_padding(digest, (self.key.size()+1) // 8) return bool(self.key.verify(m, (sig,))) +# Length of salt to pass to secretToKey SALTLEN=16 def secretToKey(salt, secret): @@ -233,14 +290,15 @@ def secretToKey(salt, secret): (The goal is to make offline password-guessing attacks harder by increasing the time required to convert a password to a key, and to - make precomputed password tables impossible to generate by ) + make precomputed password tables impossible to generate by using + a really huge salt.) """ assert len(salt) == SALTLEN+1 # The algorithm is basically, 'call the last byte of the salt the # "difficulty", and all other bytes of the salt S. Now make # an infinite stream of S|secret|S|secret|..., and hash the - # first N bytes of that, where N is determined by the difficulty. + # first N bytes of that, where N is determined by the difficulty.' # # Obviously, this wants a hash algorithm that's tricky to # parallelize. @@ -270,8 +328,10 @@ def secretToKey(salt, secret): return d.digest() def encryptSecret(secret, password, difficulty=0x80): - """Encrypt the secret 'secret' using the password 'password', - and return the encrypted result.""" + """Encrypt the secret 'secret' using the password 'password', and + return the encrypted result. The 'difficulty' parameter is a + one-byte value determining how hard to make the password-to-key + derivation""" # The encrypted format is: # "GKEY1" -- 5 octets, fixed, denotes data format. # SALT -- 17 bytes, used to hash password @@ -354,25 +414,44 @@ def decryptSecret(encrypted, password): return secret class KeyStore(thandy.formats.KeyDB): - """Helper to store private keys in an encrypted file.""" + """Helper class used to store private keys in a (usually) encrypted file. + + It implements thandy.formats.KeyDB, so you can add keys to it + and get keys from it in a useful indexed way. + """ + ## Fields: + # _loaded -- boolean: Have we loaded the keys from disk yet? + # _fname -- The filename that we use to store the keys. + # _passwd -- The password used to encrypt the keystore, or None if we + # haven't asked for it yet. + # _encrypted -- boolean: Should we treat this as an encrypted keystore? + # + # File format: + # A JSon object containing a single field, "keys", which in turn + # contains a list of json-encoded keys. This object is stored encrypted + # with encryptSecret. def __init__(self, fname, encrypted=True): thandy.formats.KeyDB.__init__(self) - self._loaded = None + self._loaded = False self._fname = fname self._passwd = None self._encrypted = encrypted def getpass(self, reprompt=False): + """If we have already asked for the passphrase, return it. + If 'reprompt' is true, ask for the password twice, to make sure + the user didn't mistype. Otherwise, only ask once. + """ if self._passwd != None: return self._passwd while 1: - sys.stderr.write("Password: ") + sys.stderr.write("Passphrase: ") pwd = getpass.getpass("") if not reprompt: return pwd - sys.stderr.write("Confirm: ") + sys.stderr.write(" Confirm: ") pwd2 = getpass.getpass("") if pwd == pwd2: return pwd @@ -380,6 +459,10 @@ class KeyStore(thandy.formats.KeyDB): print "Mismatch; try again." def load(self, password=None): + """Load the keyring into memory, decrypting as needed. + + May raise various exceptions on failure. + """ logging.info("Loading private keys from %r...", self._fname) if not os.path.exists(self._fname): logging.info("...no such file.") @@ -405,12 +488,17 @@ class KeyStore(thandy.formats.KeyDB): self._loaded = True def setPassword(self, passwd): + """Set the cached password to 'passwd'.""" self._passwd = passwd def clearPassword(self): + """Clear the cached password.""" self._passwd = None def save(self, password=None): + """Save the keyring to disk. Note that you must call this method, + or changes will not be persistent. + """ if not self._loaded and self._encrypted: self.load(password) diff --git a/lib/thandy/master_keys.py b/lib/thandy/master_keys.py index 4d014a7..4d0bdac 100644 --- a/lib/thandy/master_keys.py +++ b/lib/thandy/master_keys.py @@ -1,5 +1,9 @@ # Copyright 2008 The Tor Project, Inc. See LICENSE for licensing information. +""" This file is where a master keylist (and a default initial mirror list) + can currently ship with the Thandy code. +""" + MASTER_KEYS = [ { "_keytype": "rsa", diff --git a/lib/thandy/repository.py b/lib/thandy/repository.py index f5a4de2..bc4d73b 100644 --- a/lib/thandy/repository.py +++ b/lib/thandy/repository.py @@ -52,10 +52,13 @@ class RepositoryFile: self._mtime = None def clear(self): - """DOCDOC""" + """Clear all cached fields loaded from disk; the next time we need one, + we'll reload the file entirely. + """ self._main_obj = self._signed_obj = None self._sigStatus = None self._mtime = None + self._length = None def getRelativePath(self): """Return the filename for this item relative to the top of the @@ -118,6 +121,7 @@ class RepositoryFile: return signed_obj, main_obj def checkFile(self, fname, needhash=None): + """DOCDOC""" f = open(fname, 'r') try: s = f.read() @@ -167,7 +171,9 @@ class RepositoryFile: return self._sigStatus class PkgFile: + """DOCDOC""" def __init__(self, repository, relativePath, needHash): + """DOCDOC""" self._repository = repository self._relativePath = relativePath self._needHash = needHash @@ -175,22 +181,28 @@ class PkgFile: self._mtime = None def clear(self): + """DOCDOC""" self._mtime = None def load(self): + """DOCDOC""" pass def getRelativePath(self): + """DOCDOC""" return self._relativePath def getPath(self): + """DOCDOC""" fname = self._repository.getFilename(self._relativePath) return os.path.normpath(fname) def getExpectedHash(self): + """DOCDOC""" return self._needHash def checkFile(self, fname, needHash=None): + """DOCDOC""" if needHash: if thandy.formats.getFileDigest(fname) != needHash: raise thandy.FormatException("Digest for %s not as expected.") diff --git a/lib/thandy/tests.py b/lib/thandy/tests.py index a46258d..e55c07d 100644 --- a/lib/thandy/tests.py +++ b/lib/thandy/tests.py @@ -68,6 +68,7 @@ class CryptoTests(unittest.TestCase): ks = thandy.keys.KeyStore(fname) key1 = thandy.keys.RSAKey.generate(512) key2 = thandy.keys.RSAKey.generate(512) + key1.addRole('master', '**') ks.addKey(key1) ks.addKey(key2) ks.save(passwd) @@ -75,6 +76,7 @@ class CryptoTests(unittest.TestCase): ks2 = thandy.keys.KeyStore(fname) ks2.load(passwd) self.assertEquals(key1.key.n, ks2.getKey(key1.getKeyID()).key.n) + self.assertEquals(key1.getRoles(), [("master", "**")]) class UtilTests(unittest.TestCase): def setUp(self): diff --git a/lib/thandy/util.py b/lib/thandy/util.py index 90f251f..29c7b87 100644 --- a/lib/thandy/util.py +++ b/lib/thandy/util.py @@ -19,6 +19,11 @@ import thandy.master_keys _jsonModule = None def importJSON(): + """Load and return the json module. + + When everybody has Python 2.6 or later, we can just replace this with + 'import json; return json' + """ global _jsonModule if _jsonModule is not None: return _jsonModule @@ -200,6 +205,7 @@ def getRegistryValue(keyname): _controlLog = logging.getLogger("thandy-ctrl") def formatLogString(s): + """DOCDOC""" s = '"%s"' % re.sub(r'(["\\])', r'\\\1', s) s = s.replace("\n", "\\n") return s -- cgit v1.2.3