diff options
| author | Nick Mathewson <nickm@torproject.org> | 2010-07-25 15:13:04 -0400 | 
|---|---|---|
| committer | Nick Mathewson <nickm@torproject.org> | 2010-07-25 16:57:24 -0400 | 
| commit | bbb6cb5fe6a61aa0975dcd9df11f9f648f6abba2 (patch) | |
| tree | c97f164ecc7811ef3d6ba07d51a8c05a695d222d | |
| parent | 5f2d07ce78e6443592f84a803fe2adced49ada39 (diff) | |
More unit tests and documentation for the Thandy code
| -rw-r--r-- | lib/thandy/ServerCLI.py | 6 | ||||
| -rw-r--r-- | lib/thandy/checkJson.py | 73 | ||||
| -rw-r--r-- | lib/thandy/download.py | 16 | ||||
| -rw-r--r-- | lib/thandy/encodeToXML.py | 7 | ||||
| -rw-r--r-- | lib/thandy/formats.py | 153 | ||||
| -rw-r--r-- | lib/thandy/keys.py | 136 | ||||
| -rw-r--r-- | lib/thandy/master_keys.py | 4 | ||||
| -rw-r--r-- | lib/thandy/repository.py | 14 | ||||
| -rw-r--r-- | lib/thandy/tests.py | 2 | ||||
| -rw-r--r-- | lib/thandy/util.py | 6 | ||||
| -rw-r--r-- | specs/thandy-spec.txt | 1 | 
11 files changed, 359 insertions, 59 deletions
| diff --git a/lib/thandy/ServerCLI.py b/lib/thandy/ServerCLI.py index 9212846..4b6c1a6 100644 --- a/lib/thandy/ServerCLI.py +++ b/lib/thandy/ServerCLI.py @@ -15,6 +15,9 @@ def tstamp():      return time.strftime("%Y%m%d_%H%M%S", time.localtime())  def snarf(fname): +    """Return a string containing the binary contents of the file named +       "fname." +    """      f = open(fname, 'rb')      try:          return f.read() @@ -22,6 +25,9 @@ def snarf(fname):          f.close()  def snarfObj(fname): +    """Return a 2-tuple of (object, len), where the object stored in json +       format in the file 'fname', and len is that file's length. +    """      f = open(fname, 'r')      try:          length = os.fstat(f.fileno()).st_size diff --git a/lib/thandy/checkJson.py b/lib/thandy/checkJson.py index e63a1fc..ab33d00 100644 --- a/lib/thandy/checkJson.py +++ b/lib/thandy/checkJson.py @@ -1,13 +1,18 @@  # Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information. +"""This file defines an object oriented pattern matching system used to +   check decoded xJSON objects. +""" +  import re  import sys  import thandy  class Schema: -    """A schema matches a set of possible Python objects, of types -       that are encodable in JSON.""" +    """A Schema matches a set of possible Python objects, of types +       that are encodable in JSON.  This is an abstract base type; +       see implementations below."""      def matches(self, obj):          """Return True if 'obj' matches this schema, False if it doesn't."""          try: @@ -73,7 +78,7 @@ class RE(Schema):  class Str(Schema):      """ -       Matches a particular string. +       Matches a particular string, and no other.         >>> s = Str("Hi")         >>> s.matches("Hi") @@ -113,7 +118,7 @@ class AnyStr(Schema):  class OneOf(Schema):      """ -       Matches an object that matches any one of several schemas. +       Matches an object that matches any one of several sub-schemas.         >>> s = OneOf([ListOf(Int()), Str("Hello"), Str("bye")])         >>> s.matches(3) @@ -139,11 +144,19 @@ class OneOf(Schema):  class AllOf(Schema):      """Matches the intersection of a list of schemas. + +       >>> s = AllOf([RE(r'.*end'), RE(r'begin.*')]) +       >>> s.matches("an end") +       False +       >>> s.matches("begin well") +       False +       >>> s.matches("begin and end") +       True      """      def __init__(self, required):          self._subschemas = required[:] -    def checkMatche(self, obj): +    def checkMatch(self, obj):          for s in self._subschemas:              s.checkMatch(obj) @@ -174,6 +187,10 @@ class ListOf(Schema):         False      """      def __init__(self, schema, minCount=0, maxCount=sys.maxint,listName="list"): +        """Create a new ListOf schema to match anywhere from minCount to +           maxCount objects conforming to 'schema'.  When generating errors, +           we will call this type 'listName'. +        """          self._schema = schema          self._minCount = minCount          self._maxCount = maxCount @@ -240,6 +257,13 @@ class Struct(Schema):      """      def __init__(self, subschemas, optschemas=[], allowMore=False,                   structName="list"): +        """Create a new Struct schema to match lists that begin with +           each item in 'subschemas' in order.  If there are more elements +           than items in subschemas, additional elements much match +           the items in optschemas (if any).  If there are more elements +           than items in subschemas and optschemas put together, then +           the object is only matched when allowMore is true. +        """          self._subschemas = subschemas + optschemas          self._min = len(subschemas)          self._allowMore = allowMore @@ -276,6 +300,8 @@ class DictOf(Schema):         False      """      def __init__(self, keySchema, valSchema): +        """Return a new DictSchema to match objects all of whose keys match +           keySchema, and all of whose values match valSchema."""          self._keySchema = keySchema          self._valSchema = valSchema      def checkMatch(self, obj): @@ -306,8 +332,9 @@ class Opt:  class Obj(Schema):      """ -       Matches a dict from specified keys to key-specific types.  Unrecognized -       keys are allowed. +       Matches a dict from specified keys to key-specific types.  All +       keys are requied unless explicitly marked with Opt. +       Unrecognized keys are always allowed.         >>> s = Obj(a=AnyStr(), bc=Struct([Int(), Int()]))         >>> s.matches({'a':"ZYYY", 'bc':[5,9]}) @@ -324,7 +351,6 @@ class Obj(Schema):          self._objname = _objname          self._required = d.items() -      def checkMatch(self, obj):          if not isinstance(obj, dict):              raise thandy.FormatException("Wanted a %s; did not get a dict"% @@ -372,6 +398,7 @@ class TaggedObj(Schema):      """      def __init__(self, tagName, tagIsOptional=False, ignoreUnrecognized=True,                   **tagvals): +        #DOCDOC          self._tagName = tagName          self._tagOpt = tagIsOptional          self._ignoreOthers = ignoreUnrecognized @@ -421,17 +448,26 @@ class Int(Schema):         >>> Int(lo=10, hi=30).matches(5)         False      """ -    def __init__(self, lo=-sys.maxint, hi=sys.maxint): +    def __init__(self, lo=None, hi=None): +        """Return a new Int schema to match items between lo and hi inclusive. +        """ +        if lo is not None and hi is not None: +            assert lo <= hi          self._lo = lo          self._hi = hi +        plo,phi=lo,hi +        if plo is None: plo = "..." +        if phi is None: phi = "..." +        self._range = "[%s,%s]"%(plo,phi)      def checkMatch(self, obj):          if isinstance(obj, bool) or not isinstance(obj, (int, long)):              # We need to check for bool as a special case, since bool              # is for historical reasons a subtype of int.              raise thandy.FormatException("Got %r instead of an integer"%obj) -        elif not (self._lo <= obj <= self._hi): -            raise thandy.FormatException("%r not in range [%r,%r]" -                                         %(obj, self._lo, self._hi)) +        elif (self._lo is not None and self._lo > obj) or ( +            self._hi is not None and self._hi < obj): +            raise thandy.FormatException("%r not in range %s" +                                         %(obj, self._range))  class Bool(Schema):      """ @@ -450,7 +486,20 @@ class Bool(Schema):              raise thandy.FormatException("Got %r instead of a boolean"%obj)  class Func(Schema): +    """ +       Matches an object based on the value of some boolen function + +       >>> even = lambda x: (x%2)==0 +       >>> s = Func(even, baseSchema=Int()) +       >>> s.matches(99) +       False +       >>> s.matches(98) +       True +       >>> s.matches("ninety-eight") +       False +    """      def __init__(self, fn, baseSchema=None): +        #DOCDOC          self._fn = fn          self._base = baseSchema      def checkMatch(self, obj): diff --git a/lib/thandy/download.py b/lib/thandy/download.py index 8e774ec..52c6fe4 100644 --- a/lib/thandy/download.py +++ b/lib/thandy/download.py @@ -31,7 +31,7 @@ class DownloadManager:          # Work queue of DownloadJobs that we intend to process once a thread          # is free.          self.downloadQueue = Queue.Queue() -        # DOCDOC +        # Work queue of functions that need to be run in the main thread.          self.resultQueue = Queue.Queue()          # List of worker threads. @@ -43,10 +43,10 @@ class DownloadManager:          for t in self.threads:              t.setDaemon(True) -        # DOCDOC +        # Used to remember the status of downloads to avoid too much retrying          self.statusLog = DownloadStatusLog() -        #DOCDOC +        # Used to tell the main thread to raise an exception.          self._raiseMe = None      def start(self): @@ -85,13 +85,17 @@ class DownloadManager:      def wait(self):          """Pause until we have no active or pending jobs."""          while not self.finished(): +            # Wait till somebody tells us to do something.              self.done.acquire()              self.done.wait()              self.done.release() +            # Did something go wrong?              if self._raiseMe:                  raise self._raiseMe +            # Suck functions out of resultQueue and run them until +            # resultQueue is empty.              try:                  while True:                      item = self.resultQueue.get(block=False) @@ -130,7 +134,7 @@ class DownloadManager:      def _thread(self, idx):          # Run in the background per thread.  idx is the number of the thread.          while True: -            job = self.downloadQueue.get() # Grab job from queue. +            job = self.downloadQueue.get() # Grab a job from queue.              rp = job.getRelativePath()              success = False              try: @@ -215,7 +219,6 @@ _STATUS_LOG_SCHEMA = S.Obj(      networkFailures=_FAIL_SCHEMA)  del S -  class DownloadStatusLog:      """Tracks when we can retry downloading from various mirrors. @@ -708,7 +711,8 @@ def getConnection(url, useTor, have_length=None):  if __name__ == '__main__': -    # Trivial CLI to test out downloading. +    # Trivial CLI to test out downloading: fetches files with urrlib2 +    # as specified on the command line.  Launches them all in parallel.      import getopt      options, args = getopt.getopt(sys.argv[1:], "", diff --git a/lib/thandy/encodeToXML.py b/lib/thandy/encodeToXML.py index c563d77..b2fe351 100644 --- a/lib/thandy/encodeToXML.py +++ b/lib/thandy/encodeToXML.py @@ -1,5 +1,12 @@  # Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information. +"""This module converts JSon to XML.  It is allegedly for people who have +   libraries that can read XML, but nothing that can read JSon. + +   Nick questions whether it's actually useful.  Certainly, it's quite +   unlikely that you'll be able to check any keys once you've converted it. +""" +  import re  import thandy diff --git a/lib/thandy/formats.py b/lib/thandy/formats.py index d6f646a..a59f696 100644 --- a/lib/thandy/formats.py +++ b/lib/thandy/formats.py @@ -15,9 +15,16 @@ import Crypto.Hash.SHA256  class KeyDB:      """A KeyDB holds public keys, indexed by their key IDs.""" +    ## Fields: +    #   _keys: a map from keyid to public key.      def __init__(self): +        """Create a new empty KeyDB."""          self._keys = {}      def addKey(self, k): +        """Insert a thandy.keys.PublicKey object, 'k', into this KeyDB.  If +           we already had this key, retain the old one, but add any roles in +           the new key 'k'. +        """          keyid = k.getKeyID()          try:              oldkey = self._keys[keyid] @@ -28,8 +35,12 @@ class KeyDB:              pass          self._keys[k.getKeyID()] = k      def getKey(self, keyid): +        """Return the key whose key ID is 'keyid'.  If there is no such key, +           raise KeyError."""          return self._keys[keyid]      def getKeysByRole(self, role, path): +        """Return a list of all keys that have the role 'role' set for files +           in 'path'."""          results = []          for key in self._keys.itervalues():              for r,p in key.getRoles(): @@ -39,14 +50,17 @@ class KeyDB:          return results      def getKeysFuzzy(self, keyid): +        """Return a list of all keys whose key IDs begin with 'keyid'."""          r = []          for k,v in self._keys.iteritems():              if k.startswith(keyid):                  r.append(v)          return r      def iterkeys(self): +        """Return a new iterator of all the keys in this KeyDB."""          return self._keys.itervalues() +# Internal cache that maps role paths to regex objects that parse them.  _rolePathCache = {}  def rolePathMatches(rolePath, path):      """Return true iff the relative path in the filesystem 'path' conforms @@ -109,7 +123,12 @@ class SignatureStatus:  def checkSignatures(signed, keyDB, role=None, path=None):      """Given an object conformant to SIGNED_SCHEMA and a set of public keys -       in keyDB, verify the signed object in 'signed'.""" +       in keyDB, verify the signed object is signed.  If 'role' and 'path' +       are provided, verify that the signing key has the correct role to +       sign this document as stored in 'path'. + +       Returns a SignatureStatus. +    """      SIGNED_SCHEMA.checkMatch(signed) @@ -156,7 +175,10 @@ def checkSignatures(signed, keyDB, role=None, path=None):      return SignatureStatus(goodSigs, badSigs, unknownSigs, tangentialSigs) -def canonical_str_encoder(s): +def _canonical_str_encoder(s): +    """Helper for encodeCanonical: encodes a string as the byte sequence +       expected for canonical JSON format. +    """      s = '"%s"' % re.sub(r'(["\\])', r'\\\1', s)      if isinstance(s, unicode):          return s.encode("utf-8") @@ -168,7 +190,7 @@ def _encodeCanonical(obj, outf):      # even let us replace the separators.      if isinstance(obj, basestring): -        outf(canonical_str_encoder(obj)) +        outf(_canonical_str_encoder(obj))      elif obj is True:          outf("true")      elif obj is False: @@ -191,12 +213,12 @@ def _encodeCanonical(obj, outf):              items = obj.items()              items.sort()              for k,v in items[:-1]: -                outf(canonical_str_encoder(k)) +                outf(_canonical_str_encoder(k))                  outf(":")                  _encodeCanonical(v, outf)                  outf(",")              k, v = items[-1] -            outf(canonical_str_encoder(k)) +            outf(_canonical_str_encoder(k))              outf(":")              _encodeCanonical(v, outf)          outf("}") @@ -206,11 +228,11 @@ def _encodeCanonical(obj, outf):  def encodeCanonical(obj, outf=None):      """Encode the object obj in canoncial JSon form, as specified at         http://wiki.laptop.org/go/Canonical_JSON .  It's a restricted -       dialect of json in which keys are always lexically sorted, +       dialect of JSON in which keys are always lexically sorted,         there is no whitespace, floats aren't allowed, and only quote -       and backslash get escaped.  The result is encoded in UTF-8, -       and the resulting bits are passed to outf (if provided), or joined -       into a string and returned. +       and backslash get escaped.  The result is encoded in UTF-8, and +       the resulting bytes are passed to outf (if provided) in several +       calls, or joined into a string and returned.         >>> encodeCanonical("")         '""' @@ -222,6 +244,14 @@ def encodeCanonical(obj, outf=None):         '{"A":[99]}'         >>> encodeCanonical({"x" : 3, "y" : 2})         '{"x":3,"y":2}' +       >>> total = 0 +       >>> def increment(s): +       ...   global total +       ...   total += len(s) +       ... +       >>> encodeCanonical({"x" : 3, "y" : 2, 'z' : [99,3]}, outf=increment) +       >>> total +       24      """      result = None @@ -236,10 +266,9 @@ def encodeCanonical(obj, outf=None):  def getDigest(obj, digestObj=None):      """Update 'digestObj' (typically a SHA256 object) with the digest of -       the canonical json encoding of obj.  If digestObj is none, -       compute the SHA256 hash and return it. - -       DOCDOC string equivalence. +       obj, first encoding it in canonical form if it's a JSON object, +       and taking its UTF-8 encoding if it's in unicode.  If digestObj +       is none, just compute and return the SHA256 hash.      """      useTempDigestObj = (digestObj == None)      if useTempDigestObj: @@ -291,6 +320,9 @@ def getFileDigest(f, digestObj=None):          return digestObj.digest()  def makeSignable(obj): +    """Return a new JSON object of type 'signed' wrapping 'obj', and containing +       no signatures. +    """      return { 'signed' : obj, 'signatures' : [] }  def sign(signed, key): @@ -349,13 +381,23 @@ def parseBase64(s):          raise thandy.FormatException("Invalid base64 encoding")  def parseHash(s): +    """Parse a base64-encoded digest. + +       (This is just like paseBase64, but it checks the size.) +    """      h = parseBase64(s)      if len(h) != Crypto.Hash.SHA256.digest_size:          raise thandy.FormatException("Bad hash length")      return h +# Abbreviate the thandy.checkJson module here, since we're going to be +# using all of its members a lot here.  S = thandy.checkJson +######### +## These schemas describe, in OO constraint-checking form, all the Thandy +## data formats. +  # A date, in YYYY-MM-DD HH:MM:SS format.  TIME_SCHEMA = S.RE(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')  # A hash, base64-encoded @@ -370,6 +412,15 @@ RSAKEY_SCHEMA = S.Obj(      _keytype=S.Str("rsa"),      e=BASE64_SCHEMA,      n=BASE64_SCHEMA) +# An RSA key with private-key informartion: subtype of RSAKEY_SCHEMA. +RSAKEY_PRIVATE_SCHEMA = S.Obj( +    _keytype=S.Str("rsa"), +    e=BASE64_SCHEMA, +    n=BASE64_SCHEMA, +    d=BASE64_SCHEMA, +    p=BASE64_SCHEMA, +    q=BASE64_SCHEMA, +    u=BASE64_SCHEMA)  # Any public key.  PUBKEY_SCHEMA = S.Obj(      _keytype=S.AnyStr()) @@ -393,6 +444,7 @@ SIGNED_SCHEMA = S.Obj(      signed=S.Any(),      signatures=S.ListOf(SIGNATURE_SCHEMA)) +# The name of a role  ROLENAME_SCHEMA = S.AnyStr()  # A role: indicates that a key is allowed to certify a kind of @@ -459,6 +511,7 @@ def checkWinRegistryKeyname(keyname):      elif not key or not value:          raise thandy.FormatException("Bad registry entry.") +# A string holding the name of a windows registry key  REGISTRY_KEY_SCHEMA = S.Func(checkWinRegistryKeyname)  CHECK_ITEM_SCHEMA = S.TaggedObj( @@ -516,6 +569,10 @@ PACKAGE_SCHEMA = S.Func(checkPackageFormatConsistency, PACKAGE_SCHEMA)  ALL_ROLES = ('timestamp', 'mirrors', 'bundle', 'package', 'master')  class Keylist(KeyDB): +    """A list of keys, as extracted from a Thandy keys.txt JSon file. + +       This class extends KeyDB, so you can acces keys more easily. +    """      def __init__(self):          KeyDB.__init__(self) @@ -542,6 +599,16 @@ class Keylist(KeyDB):              self.addKey(key)  class StampedInfo: +    """This class holds a single entry in a timestamp file.  Each +       StampedInfo says when a file was last modified, and what its +       hash was.  It may also provide useful info about where to find it, +       its version, its length, and so on. +    """ +    ## _ts -- the time when the file was last modified +    ## _hash -- the hash of the most recent version of the file +    ## _version -- version of the most recent file. May be None +    ## _relpath -- where to find this file in the repository +    ## _length -- the length of the file      def __init__(self, ts, hash, version=None, relpath=None, length=None):          self._ts = ts          self._hash = hash @@ -565,6 +632,11 @@ class StampedInfo:          return self._length  class TimestampFile: +    """This class holds all the fields parsed from a thandy timestamp file.""" +    ## _time -- the time when this file was generated +    ## _mirrorListInfo -- a StampedInfo for the keylist. +    ## _keyListInfo -- a StampedInfo for the mirrorlist +    ## _bundleInfo -- map from bundle name to StampedInfo      def __init__(self, at, mirrorlistinfo, keylistinfo, bundleinfo):          self._time = at          self._mirrorListInfo = mirrorlistinfo @@ -575,6 +647,9 @@ class TimestampFile:      def fromJSon(obj):          # must be validated.          at = parseTime(obj['at']) +        # We slice these lists because we want to support old thandys +        # that didn't include the length on these, and new ones that +        # might include more fields          m = StampedInfo.fromJSonFields(*obj['m'][:3])          k = StampedInfo.fromJSonFields(*obj['k'][:3])          b = {} @@ -606,6 +681,13 @@ class TimestampFile:          return self._bundleInfo  def readConfigFile(fname, needKeys=(), optKeys=(), preload={}): +    """Read a configuration file from 'fname'.  A configuration file is a +       python script that runs in a temporary namespace prepopulated +       with the contents of 'reload'.  It is a thandy.FormatException +       if the file finishes executation without setting every variable +       listed in 'needKeys'.  These settings, plus any variables whose names +       are listed in 'optKeys', are returned in a new dict. +    """      parsed = preload.copy()      result = {}      execfile(fname, parsed) @@ -625,6 +707,10 @@ def readConfigFile(fname, needKeys=(), optKeys=(), preload={}):      return result  def makePackageObj(config_fname, package_fname): +    """Given a description of a thandy package in config_fname, and the +       name of the one file (only one is supported now!) in package_fname, +       return a new unsigned package object. +    """      preload = {}      shortDescs = {}      longDescs = {} @@ -702,6 +788,12 @@ def makePackageObj(config_fname, package_fname):      return result  def makeBundleObj(config_fname, getPackage, getPackageLength): +    """Given a description of a thandy  bundle in config_fname, +       return a new unsigned bundle object.  getPackage must be a function +       returning a package object for every package the bundle requires +       when given the package's name as input.  getPacakgeLength +       must be a function returning the length of the package file. +    """      packages = []      def ShortGloss(lang, val): packages[-1]['gloss'][lang] = val      def LongGloss(lang, val): packages[-1]['longgloss'][lang] = val @@ -749,20 +841,39 @@ def makeBundleObj(config_fname, getPackage, getPackageLength):      return result  def versionIsNewer(v1, v2): +    """Return true if version v1 is newer than v2.  Both versions are +       given as lists of version components. +       >>> versionIsNewer([1,2,3], [1,2,3,4]) +       False +       >>> versionIsNewer([1,2,3,5], [1,2,3,4]) +       True +       >>> versionIsNewer([1,3,3,5], [1,2,3,5]) +       True +    """      return v1 > v2  def getBundleKey(bundlePath):      """ +       Return all parts of a bundle's "key" as used in a timestamp file, +       given its full filename. +         >>> getBundleKey("/bundleinfo/tor-browser/win32/some-file-name.txt")         '/bundleinfo/tor-browser/win32/'      """ -    # No, we can't use "os.path.directory."  That isn't os-independent. +    # No, we can't use "os.path.directory" or "os.path.split".  Those are +    # OD-dependent, and all of our paths are in Unix format.      idx = bundlePath.rindex("/")      return bundlePath[:idx+1]  def makeTimestampObj(mirrorlist_obj, mirrorlist_len,                       keylist_obj, keylist_len,                       bundle_objs): +    """Return a new unsigned timestamp object for a given set of inputs, +       where mirrorlist_obj and mirrorlist_len are a (signed, unencoded) +       mirror list, and its length on disk; keylist_obj and keylist_len +       are the same for the key list, and bundle_objs is a list of +       (object, length) tuples for all the bundles. +    """      result = { '_type' : 'Timestamp',                 'at' : formatTime(time.time()) }      result['m'] = [ mirrorlist_obj['ts'], @@ -784,6 +895,8 @@ def makeTimestampObj(mirrorlist_obj, mirrorlist_len,      return result  class MirrorInfo: +    """A MirrorInfo holds the parsed value of a thandy mirror list's entry +       for a single mirror."""      def __init__(self, name, urlbase, contents, weight):          self._name = name          self._urlbase = urlbase @@ -809,6 +922,9 @@ class MirrorInfo:                   'weight' : self._weight }  def makeMirrorListObj(mirror_fname): +    """Return a new unsigned mirrorlist object for the mirrors described in +       'mirror_fname'. +    """      mirrors = []      def Mirror(*a, **kw): mirrors.append(MirrorInfo(*a, **kw))      preload = {'Mirror' : Mirror} @@ -821,6 +937,9 @@ def makeMirrorListObj(mirror_fname):      return result  def makeKeylistObj(keylist_fname, includePrivate=False): +    """Return a new unsigned keylist object for the keys described in +       'mirror_fname'. +    """      keys = []      def Key(obj): keys.append(obj)      preload = {'Key': Key} @@ -851,7 +970,11 @@ SCHEMAS_BY_TYPE = {      }  def checkSignedObj(obj, keydb=None): -    # Returns signaturestatus, role, path on sucess. +    """Given a signed object, check whether it is well-formed and correctly +       signed with some key in keydb having the appropriate role.  On +       success, returns a SignatureStatus, the rule used to sign it, +       and the object's path in the repository. +    """      SIGNED_SCHEMA.checkMatch(obj)      try: diff --git a/lib/thandy/keys.py b/lib/thandy/keys.py index 3385ddb..9eb98d0 100644 --- a/lib/thandy/keys.py +++ b/lib/thandy/keys.py @@ -1,6 +1,12 @@  # Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information. -# These require PyCrypto. +"""thandy.keys -- + +   This module defines functionality for public keys, and for a simple +   encrypted keystore. +""" + +# These imports require PyCrypto.  import Crypto.PublicKey.RSA  import Crypto.Hash.SHA256  import Crypto.Cipher.AES @@ -18,28 +24,46 @@ import thandy.util  json = thandy.util.importJSON()  class PublicKey: -    """Abstract base class for public keys.""" +    """An abstract base class for public keys.  A public key object +       always implements some kind of public key, and may also contain +       the corresponding private key data.""" +    ## Fields: +    # _roles: a list of (rolename, path) tuples that indicates which +    #     roles we consider this public key to have.      def __init__(self): +        """Constructor: Initialize a public key."""          self._roles = []      def format(self): +        """Return this public key converted into a JSon object"""          raise NotImplemented()      def sign(self, obj=None, digest=None):          """Sign either a JSon object provided in 'obj', or a digest provided             in 'digest'.  Return a list of (method name, base64-encoded -           signature) tuple. +           signature) tuples.             Requires that this is a private key."""          raise NotImplemented() -    def checkSignature(self, method, data, signature): +    def checkSignature(self, method, signature, obj=None, digest=None): +        """Check the base64-encoded signature in 'signature', which was +           generating using the method with the name 'method', to see +           if it is a correct signature made with this key for either +           a JSon object provided in 'obj' or a digest provided in 'digest'. + +           Returns True if the signature is value; False if it's invalid, and +           UnknownMethod if we don't recognize 'method'. +        """          # returns True, False, or raises UnknownMethod.          raise NotImplemented()      def getKeyID(self): +        """Return a base-64-encoded key ID for this key.  No two distinct +           keys may share the same key ID. +        """          raise NotImplemented()      def getRoles(self):          """Return a list of all roles supported by this key.  A role is             a doctype,pathPattern tuple.          """ -        return self._roles +        return self._roles[:]      def addRole(self, role, path):          """Add a role to the list of roles supported by this key.             A role is a permission to sign a given kind of document @@ -62,8 +86,14 @@ class PublicKey:          return False  if hex(1L).upper() == "0X1L": +    # It looks like integers and longs aren't unified in this version +    # of Python: converting a long to a hex string means there's a trailing +    # 'L' we need to remove.      def intToBinary(number):          """Convert an int or long into a big-endian series of bytes. + +           >>> intToBinary(92807287956601L) +           'Thandy'          """          # This "convert-to-hex, then use binascii" approach may look silly,          # but it's over 10x faster than the Crypto.Util.number approach. @@ -73,6 +103,8 @@ if hex(1L).upper() == "0X1L":              h = "0"+h          return binascii.a2b_hex(h)  elif hex(1L).upper() == "0X1": +    # It looks like integers and longs _are_ unified.  Maybe this is Python 3? +    # In any case, we don't need to remove the trailing L.      def intToBinary(number):          "Variant for future versions of pythons that don't append 'L'."          h = hex(long(number)) @@ -87,18 +119,31 @@ else:  def binaryToInt(binary):     """Convert a big-endian series of bytes into a long. + +      >>> binaryToInt('Hi') +      18537L     """     return long(binascii.b2a_hex(binary), 16)  def intToBase64(number): -    """Convert an int or long to a big-endian base64-encoded value.""" +    """Convert an int or long to a big-endian base64-encoded value. + +        >>> intToBase64(0x4e16a777) +        'Thandw' +    """      return thandy.formats.formatBase64(intToBinary(number))  def base64ToInt(number): -    """Convert a big-endian base64-encoded value to a long.""" +    """Convert a big-endian base64-encoded value to a long. + +        >>> base64ToInt('Thandy') +        1310107511L +    """      return binaryToInt(thandy.formats.parseBase64(number))  def _pkcs1_padding(m, size): +    """Add PKCS padding to the message 'm', so that it's appropriate for +       use with a public key of 'size' bytes."""      # I'd rather use OAEP+, but apparently PyCrypto barely supports      # signature verification, and doesn't seem to support signature      # verification with nondeterministic padding.  "argh." @@ -108,13 +153,16 @@ def _pkcs1_padding(m, size):      return r  def _xor(a,b): -    if a: -        return not b -    else: -        return b +    """Return true iff exactly one of and b are true.  Used to check +       some conditions.""" +    return bool(a) ^ bool(b)  class RSAKey(PublicKey):      """ +    An RSAKey is an implementation of the abstract class 'PublicKey' that +    can sign documents and check signatures using RSA keys of arbitrary +    size. +      >>> k = RSAKey.generate(bits=512)      >>> obj = k.format()      >>> obj['_keytype'] @@ -136,27 +184,33 @@ class RSAKey(PublicKey):      >>> k.checkSignature(method, sig, obj=s2)      False      """ +    ## Fields: (beyond those inherited from PublicKey) +    # key -- a PyCrypto RSA key, public or private.  See Crypto.PublicKey.RSA. +    # keyid -- the base64 key ID for this key, or 'None' if we haven't +    #   generated one yet.      def __init__(self, key): +        """Constructure: Initialize a new RSAKey from a PyCrypto RSA key."""          PublicKey.__init__(self)          self.key = key          self.keyid = None      @staticmethod      def generate(bits=2048): -        """Generate a new RSA key, with modulus length 'bits'.""" +        """Generate and return a new RSA key, with modulus length 'bits'."""          key = Crypto.PublicKey.RSA.generate(bits=bits, randfunc=os.urandom)          return RSAKey(key)      @staticmethod      def fromJSon(obj): -        """Construct an RSA key from the output of the format() method. +        """Construct and return a RSAKey from the JSon object output of the +           RSAKey.format() method.  May raise thandy.FormatException.          """          # obj must match RSAKEY_SCHEMA          thandy.formats.RSAKEY_SCHEMA.checkMatch(obj)          n = base64ToInt(obj['n'])          e = base64ToInt(obj['e']) -        if obj.has_key('d'): +        if thandy.formats.RSAKEY_PRIVATE_SCHEMA.matches(obj):              d = base64ToInt(obj['d'])              p = base64ToInt(obj['p'])              q = base64ToInt(obj['q']) @@ -173,11 +227,11 @@ class RSAKey(PublicKey):          return result      def isPrivateKey(self): -        """Return true iff this key has private-key components""" +        """Return True iff this key has private-key components"""          return hasattr(self.key, 'd')      def format(self, private=False, includeRoles=False): -        """Return a new object to represent this key in json format. +        """Return a new json object to represent this key in json format.             If 'private', include private-key data.  If 'includeRoles',             include role information.          """ @@ -205,6 +259,7 @@ class RSAKey(PublicKey):          return self.keyid      def sign(self, obj=None, digest=None): +        # See PublicKey.sign for documentation          assert _xor(obj == None, digest == None)          method = "sha256-pkcs1"          if digest == None: @@ -214,6 +269,7 @@ class RSAKey(PublicKey):          return [ (method, sig) ]      def checkSignature(self, method, sig, obj=None, digest=None): +        # See PublicKey.checkSignature for documentation          assert _xor(obj == None, digest == None)          if method != "sha256-pkcs1":              raise thandy.UnknownMethod(method) @@ -223,6 +279,7 @@ class RSAKey(PublicKey):          m = _pkcs1_padding(digest, (self.key.size()+1) // 8)          return bool(self.key.verify(m, (sig,))) +# Length of salt to pass to secretToKey  SALTLEN=16  def secretToKey(salt, secret): @@ -233,14 +290,15 @@ def secretToKey(salt, secret):         (The goal is to make offline password-guessing attacks harder by         increasing the time required to convert a password to a key, and to -       make precomputed password tables impossible to generate by ) +       make precomputed password tables impossible to generate by using +       a really huge salt.)      """      assert len(salt) == SALTLEN+1      # The algorithm is basically, 'call the last byte of the salt the      # "difficulty", and all other bytes of the salt S.  Now make      # an infinite stream of S|secret|S|secret|..., and hash the -    # first N bytes of that, where N is determined by the difficulty. +    # first N bytes of that, where N is determined by the difficulty.'      #      # Obviously, this wants a hash algorithm that's tricky to      # parallelize. @@ -270,8 +328,10 @@ def secretToKey(salt, secret):      return d.digest()  def encryptSecret(secret, password, difficulty=0x80): -    """Encrypt the secret 'secret' using the password 'password', -       and return the encrypted result.""" +    """Encrypt the secret 'secret' using the password 'password', and +       return the encrypted result.  The 'difficulty' parameter is a +       one-byte value determining how hard to make the password-to-key +       derivation"""      # The encrypted format is:      #    "GKEY1"  -- 5 octets, fixed, denotes data format.      #    SALT     -- 17 bytes, used to hash password @@ -354,25 +414,44 @@ def decryptSecret(encrypted, password):      return secret  class KeyStore(thandy.formats.KeyDB): -    """Helper to store private keys in an encrypted file.""" +    """Helper class used to store private keys in a (usually) encrypted file. + +       It implements thandy.formats.KeyDB, so you can add keys to it +       and get keys from it in a useful indexed way. +    """ +    ## Fields: +    # _loaded -- boolean: Have we loaded the keys from disk yet? +    # _fname -- The filename that we use to store the keys. +    # _passwd -- The password used to encrypt the keystore, or None if we +    #    haven't asked for it yet. +    # _encrypted -- boolean: Should we treat this as an encrypted keystore? +    # +    # File format: +    #   A JSon object containing a single field, "keys", which in turn +    #   contains a list of json-encoded keys.  This object is stored encrypted +    #   with encryptSecret.      def __init__(self, fname, encrypted=True):          thandy.formats.KeyDB.__init__(self) -        self._loaded = None +        self._loaded = False          self._fname = fname          self._passwd = None          self._encrypted = encrypted      def getpass(self, reprompt=False): +        """If we have already asked for the passphrase, return it. +           If 'reprompt' is true, ask for the password twice,  to make sure +           the user didn't mistype.  Otherwise, only ask once. +        """          if self._passwd != None:              return self._passwd          while 1: -            sys.stderr.write("Password: ") +            sys.stderr.write("Passphrase: ")              pwd = getpass.getpass("")              if not reprompt:                  return pwd -            sys.stderr.write("Confirm: ") +            sys.stderr.write("   Confirm: ")              pwd2 = getpass.getpass("")              if pwd == pwd2:                  return pwd @@ -380,6 +459,10 @@ class KeyStore(thandy.formats.KeyDB):                  print "Mismatch; try again."      def load(self, password=None): +        """Load the keyring into memory, decrypting as needed. + +           May raise various exceptions on failure. +        """          logging.info("Loading private keys from %r...", self._fname)          if not os.path.exists(self._fname):              logging.info("...no such file.") @@ -405,12 +488,17 @@ class KeyStore(thandy.formats.KeyDB):          self._loaded = True      def setPassword(self, passwd): +        """Set the cached password to 'passwd'."""          self._passwd = passwd      def clearPassword(self): +        """Clear the cached password."""          self._passwd = None      def save(self, password=None): +        """Save the keyring to disk.  Note that you must call this method, +           or changes will not be persistent. +        """          if not self._loaded and self._encrypted:              self.load(password) diff --git a/lib/thandy/master_keys.py b/lib/thandy/master_keys.py index 4d014a7..4d0bdac 100644 --- a/lib/thandy/master_keys.py +++ b/lib/thandy/master_keys.py @@ -1,5 +1,9 @@  # Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information. +""" This file is where a master keylist (and a default initial mirror list) +    can currently ship with the Thandy code. +""" +  MASTER_KEYS = [      {          "_keytype": "rsa", diff --git a/lib/thandy/repository.py b/lib/thandy/repository.py index f5a4de2..bc4d73b 100644 --- a/lib/thandy/repository.py +++ b/lib/thandy/repository.py @@ -52,10 +52,13 @@ class RepositoryFile:          self._mtime = None      def clear(self): -        """DOCDOC""" +        """Clear all cached fields loaded from disk; the next time we need one, +           we'll reload the file entirely. +        """          self._main_obj = self._signed_obj = None          self._sigStatus = None          self._mtime = None +        self._length = None      def getRelativePath(self):          """Return the filename for this item relative to the top of the @@ -118,6 +121,7 @@ class RepositoryFile:          return signed_obj, main_obj      def checkFile(self, fname, needhash=None): +        """DOCDOC"""          f = open(fname, 'r')          try:              s = f.read() @@ -167,7 +171,9 @@ class RepositoryFile:          return self._sigStatus  class PkgFile: +    """DOCDOC"""      def __init__(self, repository, relativePath, needHash): +        """DOCDOC"""          self._repository = repository          self._relativePath = relativePath          self._needHash = needHash @@ -175,22 +181,28 @@ class PkgFile:          self._mtime = None      def clear(self): +        """DOCDOC"""          self._mtime = None      def load(self): +        """DOCDOC"""          pass      def getRelativePath(self): +        """DOCDOC"""          return self._relativePath      def getPath(self): +        """DOCDOC"""          fname = self._repository.getFilename(self._relativePath)          return os.path.normpath(fname)      def getExpectedHash(self): +        """DOCDOC"""          return self._needHash      def checkFile(self, fname, needHash=None): +        """DOCDOC"""          if needHash:              if thandy.formats.getFileDigest(fname) != needHash:                  raise thandy.FormatException("Digest for %s not as expected.") diff --git a/lib/thandy/tests.py b/lib/thandy/tests.py index a46258d..e55c07d 100644 --- a/lib/thandy/tests.py +++ b/lib/thandy/tests.py @@ -68,6 +68,7 @@ class CryptoTests(unittest.TestCase):          ks = thandy.keys.KeyStore(fname)          key1 = thandy.keys.RSAKey.generate(512)          key2 = thandy.keys.RSAKey.generate(512) +        key1.addRole('master', '**')          ks.addKey(key1)          ks.addKey(key2)          ks.save(passwd) @@ -75,6 +76,7 @@ class CryptoTests(unittest.TestCase):          ks2 = thandy.keys.KeyStore(fname)          ks2.load(passwd)          self.assertEquals(key1.key.n, ks2.getKey(key1.getKeyID()).key.n) +        self.assertEquals(key1.getRoles(), [("master", "**")])  class UtilTests(unittest.TestCase):      def setUp(self): diff --git a/lib/thandy/util.py b/lib/thandy/util.py index 90f251f..29c7b87 100644 --- a/lib/thandy/util.py +++ b/lib/thandy/util.py @@ -19,6 +19,11 @@ import thandy.master_keys  _jsonModule = None  def importJSON(): +    """Load and return the json module. + +       When everybody has Python 2.6 or later, we can just replace this with +       'import json; return json' +    """      global _jsonModule      if _jsonModule is not None:          return _jsonModule @@ -200,6 +205,7 @@ def getRegistryValue(keyname):  _controlLog = logging.getLogger("thandy-ctrl")  def formatLogString(s): +    """DOCDOC"""      s = '"%s"' % re.sub(r'(["\\])', r'\\\1', s)      s = s.replace("\n", "\\n")      return s diff --git a/specs/thandy-spec.txt b/specs/thandy-spec.txt index eae908b..0db9df1 100644 --- a/specs/thandy-spec.txt +++ b/specs/thandy-spec.txt @@ -216,7 +216,6 @@      /meta/keys.txt           Signed by the root keys; indicates keys and roles. -         [???? I'm using the txt extension here.  Is that smart?]      /meta/mirrors.txt | 
