From 5a6c54aeb95fcfdc70bef20e4a24a0bceed9ba45 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 15 Dec 2008 21:18:19 +0000 Subject: Implement lengths in thandy objects, mostly: Accept them, and when they're present, don't fetch more bytes than specified, since that would be dangerous. Include lengths in every generated object type except for the timestamp, since that would break exising code. git-svn-id: file:///home/or/svnrepo/updater/trunk@17629 55e972cd-5a19-0410-ae62-a4d7a52db4cd --- TODO | 13 ++++++++++++- lib/thandy/SignerCLI.py | 5 ++++- lib/thandy/download.py | 18 ++++++++++++++++-- lib/thandy/formats.py | 43 +++++++++++++++++++++++++++++-------------- lib/thandy/repository.py | 18 +++++++++++++++++- specs/thandy-spec.txt | 21 ++++++++++++++++----- 6 files changed, 94 insertions(+), 24 deletions(-) diff --git a/TODO b/TODO index ce1f126..10e8627 100644 --- a/TODO +++ b/TODO @@ -28,7 +28,18 @@ o Decouple install from check: they are not necessarily related. any cached yet. - Security stuff that we should do that needs format changes. - 2 Whenever we list a hash in a metafile, also list a file length. + . Whenever we list a hash in a metafile, also list a file length. + o Implement parsing; use length, when present, as a maximum + believable value to make sure we don't download too much + o Include lengths in generated packages and bundles + . Specify use of length field. + - Once everybody has been wanted to update their clients, include + lengths in timestamp files. + - Make lengths mandatory + - Maybe make lengths enforced for purposes other than a maximum + during fetch. + - Maybe stop early if Content-Length is greater than the expected + length. - Think more about issues 4, 7(A,B,C) diff --git a/lib/thandy/SignerCLI.py b/lib/thandy/SignerCLI.py index 7427474..b3e4a8b 100644 --- a/lib/thandy/SignerCLI.py +++ b/lib/thandy/SignerCLI.py @@ -92,17 +92,20 @@ def makebundle(args): configFile = args[0] packages = {} + packageLen = {} for pkgFile in args[1:]: print "Loading", pkgFile f = open(pkgFile, 'r') p = json.load(f) f.close() + packageLen = os.stat(pkgFile).st_size _, r, _ = thandy.formats.checkSignedObj(p) if r != 'package': print pkgFile, "was not a package" packages[p['signed']['name']] = p['signed'] - bundleObj = thandy.formats.makeBundleObj(configFile, packages.__getitem__) + bundleObj = thandy.formats.makeBundleObj(configFile, packages.__getitem__, + packageLen.__getitem__) signable = thandy.formats.makeSignable(bundleObj) ks = getKeyStore() diff --git a/lib/thandy/download.py b/lib/thandy/download.py index 0c0d383..1d969e5 100644 --- a/lib/thandy/download.py +++ b/lib/thandy/download.py @@ -345,8 +345,8 @@ class DownloadStatusLog: class DownloadJob: """Abstract base class. Represents a thing to be downloaded, and the knowledge of how to download it.""" - def __init__(self, targetPath, tmpPath, wantHash=None, repoFile=None, - useTor=False): + def __init__(self, targetPath, tmpPath, wantHash=None, + repoFile=None, useTor=False, wantLength=None): """Create a new DownloadJob. When it is finally downloaded, store it in targetPath. Store partial results in tmpPath; if there is already a file in tmpPath, assume that it is an @@ -357,6 +357,7 @@ class DownloadJob: self._destPath = targetPath self._tmpPath = tmpPath self._wantHash = wantHash + self._wantLength = wantLength self._repoFile = repoFile self._useTor = useTor @@ -470,6 +471,12 @@ class DownloadJob: have_length = os.stat(self._tmpPath).st_size logging.info("Have stalled file for %s with %s bytes", url, have_length) + if self._wantLength != None: + if self._wantLength >= have_length: + logging.warn("Stalled file is too long; removing it") + self._removeTmpFile() + haveStalled = False + have_length = None else: have_length = None @@ -506,6 +513,13 @@ class DownloadJob: total += len(c) logging.debug("Got %s/%s bytes from %s", total, expectLength, url) + if self._wantLength != None and total > self._wantLength: + logging.warn("Read too many bytes from %s; got %s, but " + "wanted %s", url, total, self._wantLength) + break + + if self._wantLength != None and total != self._wantLength: + logging.warn("Length wrong on file %s", url) finally: if f_in is not None: diff --git a/lib/thandy/formats.py b/lib/thandy/formats.py index 9f136e5..0dc293c 100644 --- a/lib/thandy/formats.py +++ b/lib/thandy/formats.py @@ -375,6 +375,7 @@ SIG_METHOD_SCHEMA = S.AnyStr() RELPATH_SCHEMA = PATH_PATTERN_SCHEMA = S.AnyStr() URL_SCHEMA = S.AnyStr() VERSION_SCHEMA = S.ListOf(S.Any()) #XXXX WRONG +LENGTH_SCHEMA = S.Int(lo=0) # A single signature of an object. Indicates the signature, the id of the # signing key, and the signing method. @@ -392,7 +393,7 @@ ROLENAME_SCHEMA = S.AnyStr() # A role: indicates that a key is allowed to certify a kind of # document at a certain place in the repo. -ROLE_SCHEMA = S.Struct([ROLENAME_SCHEMA, PATH_PATTERN_SCHEMA]) +ROLE_SCHEMA = S.Struct([ROLENAME_SCHEMA, PATH_PATTERN_SCHEMA], allowMore=True) # A Keylist: indicates a list of live keys and their roles. KEYLIST_SCHEMA = S.Obj( @@ -415,12 +416,12 @@ MIRRORLIST_SCHEMA = S.Obj( TIMESTAMP_SCHEMA = S.Obj( _type = S.Str("Timestamp"), at = TIME_SCHEMA, - m = S.Struct([TIME_SCHEMA, HASH_SCHEMA]), - k = S.Struct([TIME_SCHEMA, HASH_SCHEMA]), + m = S.Struct([TIME_SCHEMA, HASH_SCHEMA], [LENGTH_SCHEMA], allowMore=True), + k = S.Struct([TIME_SCHEMA, HASH_SCHEMA], [LENGTH_SCHEMA], allowMore=True), b = S.DictOf(keySchema=S.AnyStr(), valSchema= - S.Struct([ VERSION_SCHEMA, RELPATH_SCHEMA, TIME_SCHEMA, HASH_SCHEMA ])) - ) + S.Struct([ VERSION_SCHEMA, RELPATH_SCHEMA, TIME_SCHEMA, HASH_SCHEMA ], [LENGTH_SCHEMA], allowMore=True)) + ) # A Bundle: lists a bunch of packages that should be updated in tandem BUNDLE_SCHEMA = S.Obj( @@ -436,6 +437,7 @@ BUNDLE_SCHEMA = S.Obj( version=VERSION_SCHEMA, path=RELPATH_SCHEMA, hash=HASH_SCHEMA, + length=S.Opt(LENGTH_SCHEMA), order=S.Struct([S.Int(), S.Int(), S.Int()]), optional=S.Opt(S.Bool()), gloss=S.DictOf(S.AnyStr(), S.AnyStr()), @@ -479,7 +481,8 @@ OBSOLETE_RPM_FORMAT_ITEM_SCHEMA = S.Obj( ITEM_INFO_SCHEMA = S.AllOf([CHECK_ITEM_SCHEMA, INSTALL_ITEM_SCHEMA]) -ITEM_SCHEMA = S.Struct([RELPATH_SCHEMA, HASH_SCHEMA], [ITEM_INFO_SCHEMA], +ITEM_SCHEMA = S.Struct([RELPATH_SCHEMA, HASH_SCHEMA], + [ITEM_INFO_SCHEMA, LENGTH_SCHEMA], allowMore=True) def checkPackageFormatConsistency(obj): @@ -578,17 +581,18 @@ class Keylist(KeyDB): self.addKey(key) class StampedInfo: - def __init__(self, ts, hash, version=None, relpath=None): + def __init__(self, ts, hash, version=None, relpath=None, length=None): self._ts = ts self._hash = hash self._version = version self._relpath = relpath + self._length = length @staticmethod - def fromJSonFields(timeStr, hashStr): + def fromJSonFields(timeStr, hashStr, length=None): t = parseTime(timeStr) h = parseHash(hashStr) - return StampedInfo(t, h) + return StampedInfo(t, h, length=length) def getHash(self): return self._hash @@ -596,6 +600,9 @@ class StampedInfo: def getRelativePath(self): return self._relpath + def getLength(self): + return self._length + class TimestampFile: def __init__(self, at, mirrorlistinfo, keylistinfo, bundleinfo): self._time = at @@ -607,15 +614,18 @@ class TimestampFile: def fromJSon(obj): # must be validated. at = parseTime(obj['at']) - m = StampedInfo.fromJSonFields(*obj['m'][:2]) - k = StampedInfo.fromJSonFields(*obj['k'][:2]) + m = StampedInfo.fromJSonFields(*obj['m'][:3]) + k = StampedInfo.fromJSonFields(*obj['k'][:3]) b = {} for name, bundle in obj['b'].iteritems(): v = bundle[0] rp = bundle[1] t = parseTime(bundle[2]) h = parseHash(bundle[3]) - b[name] = StampedInfo(t, h, v, rp) + ln = None + if len(bundle) > 4: + ln = bundle[4] + b[name] = StampedInfo(t, h, v, rp, ln) return TimestampFile(at, m, k, b) @@ -672,6 +682,8 @@ def makePackageObj(config_fname, package_fname): f = open(package_fname, 'rb') digest = getFileDigest(f) + f.close() + f_len = os.stat(package_fname).st_size # Check fields! extra = {} @@ -681,7 +693,7 @@ def makePackageObj(config_fname, package_fname): 'location' : r['location'], #DOCDOC 'version' : r['version'], 'format' : r['format'], - 'files' : [ [ r['relpath'], formatHash(digest), extra ] ], + 'files' : [ [ r['relpath'], formatHash(digest), extra, f_len ] ], 'shortdesc' : shortDescs, 'longdesc' : longDescs } @@ -725,7 +737,7 @@ def makePackageObj(config_fname, package_fname): return result -def makeBundleObj(config_fname, getPackage): +def makeBundleObj(config_fname, getPackage, getPackageLength): packages = [] def ShortGloss(lang, val): packages[-1]['gloss'][lang] = val def LongGloss(lang, val): packages[-1]['longgloss'][lang] = val @@ -763,6 +775,9 @@ def makeBundleObj(config_fname, getPackage): raise thandy.FormatException("No such package as %s"%p['name']) p['hash'] = formatHash(getDigest(pkginfo)) + length = getPackageLength(p['name']) + if length != None: + p['length'] = length if p['path'] == None: p['path'] = pkginfo['location'] if p['version'] == None: diff --git a/lib/thandy/repository.py b/lib/thandy/repository.py index 3dc2864..bce79fb 100644 --- a/lib/thandy/repository.py +++ b/lib/thandy/repository.py @@ -33,6 +33,9 @@ class RepositoryFile: self._signedFormat = signedFormat self._needSigs = needSigs + # The length of the file as stored on disk. + self._length = None + # The contents of the file, parsed. None if we haven't loaded # the file. self._main_obj = None @@ -70,6 +73,7 @@ class RepositoryFile: f = None fd = os.open(fname, os.O_RDONLY) try: + self._length = os.fstat(fd).st_size f = os.fdopen(fd, 'r') except: os.close(fd) @@ -295,7 +299,8 @@ class LocalRepository: return None def getFilesToUpdate(self, now=None, trackingBundles=(), hashDict=None, - usePackageSystem=True, installableDict=None): + lengthDict=None, usePackageSystem=True, + installableDict=None): """Return a set of relative paths for all files that we need to fetch. Assumes that we care about the bundles 'trackingBundles'. @@ -312,6 +317,9 @@ class LocalRepository: if installableDict == None: installableDict = {} + if lengthDict == None: + lengthDict = {} + pkgItems = None need = set() @@ -370,6 +378,10 @@ class LocalRepository: ts.getKeylistInfo().getHash() hashDict[self._mirrorlistFile.getRelativePath()] = \ ts.getMirrorlistInfo().getHash() + lengthDict[self._keylistFile.getRelativePath()] = \ + ts.getKeylistInfo().getLength() + lengthDict[self._mirrorlistFile.getRelativePath()] = \ + ts.getMirrorlistInfo().getLength() h_kf = thandy.formats.getDigest(self._keylistFile.get()) h_expected = ts.getKeylistInfo().getHash() @@ -407,6 +419,7 @@ class LocalRepository: rp = binfo.getRelativePath() h_expected = binfo.getHash() hashDict[rp] = h_expected + lengthDict[rp] = binfo.getLength() bfile = self.getBundleFile(rp) try: bfile.load() @@ -440,6 +453,7 @@ class LocalRepository: pfile = self.getPackageFile(rp) h_expected = thandy.formats.parseHash(pkginfo['hash']) hashDict[rp] = h_expected + lengthDict[rp] = pkginfo.get('length') try: pfile.load() except OSError: @@ -498,6 +512,8 @@ class LocalRepository: h_expected = thandy.formats.parseHash(h) hashDict[rp] = h_expected + if len(f) > 3: + lengthDict[rp] = h[3] fn = self.getFilename(rp) try: h_got = thandy.formats.getFileDigest(fn) diff --git a/specs/thandy-spec.txt b/specs/thandy-spec.txt index 3726c94..5cc4fa6 100644 --- a/specs/thandy-spec.txt +++ b/specs/thandy-spec.txt @@ -397,10 +397,10 @@ { "_type" : Timestamp, "at" : TIME, - "m" : [ TIME, HASH ], - "k" : [ TIME, HASH ], + "m" : [ TIME, HASH, LENGTH ], + "k" : [ TIME, HASH, LENGTH ], "b" : { NAME : - [ [ Version, Path, Time, Hash ] ] } + [ [ Version, Path, Time, Hash, (Length) ] ] } } TIME is when the timestamp was signed. MIRRORLISTHASH is the digest @@ -409,6 +409,8 @@ bundles and their locations and hashes. The "name" of a bundle (in this context) is the directory component of the bundle's path. + The LENGTH field may be absent on very old timestamp files. + 3.6. File formats: bundle files { "_type" : "Bundle", @@ -423,6 +425,7 @@ "version" : VERSION, "path" : PATH, "hash" : HASH, + ("length" : LENGTH), "order" : [ INST, UPDATE, REMOVE ], ("optional : BOOL, ) "gloss" : { LANG : TEXT }, @@ -446,6 +449,9 @@ language. The UI should display the must appropriate language to the user. + The LENGTH field is required on all new bundles, but may be absent + on very old ones. + 3.7. File formats: package files { "_type" : "Package", @@ -454,13 +460,14 @@ "version" : VERSION, "format" : FMT, "ts" : TIME, - "files" : [ [ PATH, HASH, INFO ], ... ], + "files" : [ [ PATH, HASH, INFO, (LENGTH) ], ... ], "shortdesc" : { LANG : DESC, ... }, "longdesc" : { LANG : DESC, ... }, } Most elements are self-explanatory. To interpret the 'INFO' entry - for each installable file, see section 6. + for each installable file, see section 6. The LENGTH field is + required on all new packages, but may be absent on very old ones. No two package files in the same repository should have the same name and version. If a package needs to be changed, the version @@ -508,6 +515,10 @@ Clients SHOULD cache at least the latest versions they have received of all files. + When dowloading a file, if the client knows what that file's length + should be, it SHOULD NOT accept a longer file, and SHOULD NOT + continue the download past the file length. + 4.1.1. Download preferences Users should be able to specify that packages must be only -- cgit v1.2.3