summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2008-12-15 21:18:19 +0000
committerNick Mathewson <nickm@torproject.org>2008-12-15 21:18:19 +0000
commit5a6c54aeb95fcfdc70bef20e4a24a0bceed9ba45 (patch)
treec711d682c8349a5b2b1f2553b3825f058224a97b
parenteed069baf58952623ea035637eef154e10fa2038 (diff)
Implement lengths in thandy objects, mostly:
Accept them, and when they're present, don't fetch more bytes than specified, since that would be dangerous. Include lengths in every generated object type except for the timestamp, since that would break exising code. git-svn-id: file:///home/or/svnrepo/updater/trunk@17629 55e972cd-5a19-0410-ae62-a4d7a52db4cd
-rw-r--r--TODO13
-rw-r--r--lib/thandy/SignerCLI.py5
-rw-r--r--lib/thandy/download.py18
-rw-r--r--lib/thandy/formats.py43
-rw-r--r--lib/thandy/repository.py18
-rw-r--r--specs/thandy-spec.txt21
6 files changed, 94 insertions, 24 deletions
diff --git a/TODO b/TODO
index ce1f126..10e8627 100644
--- a/TODO
+++ b/TODO
@@ -28,7 +28,18 @@ o Decouple install from check: they are not necessarily related.
any cached yet.
- Security stuff that we should do that needs format changes.
- 2 Whenever we list a hash in a metafile, also list a file length.
+ . Whenever we list a hash in a metafile, also list a file length.
+ o Implement parsing; use length, when present, as a maximum
+ believable value to make sure we don't download too much
+ o Include lengths in generated packages and bundles
+ . Specify use of length field.
+ - Once everybody has been wanted to update their clients, include
+ lengths in timestamp files.
+ - Make lengths mandatory
+ - Maybe make lengths enforced for purposes other than a maximum
+ during fetch.
+ - Maybe stop early if Content-Length is greater than the expected
+ length.
- Think more about issues 4, 7(A,B,C)
diff --git a/lib/thandy/SignerCLI.py b/lib/thandy/SignerCLI.py
index 7427474..b3e4a8b 100644
--- a/lib/thandy/SignerCLI.py
+++ b/lib/thandy/SignerCLI.py
@@ -92,17 +92,20 @@ def makebundle(args):
configFile = args[0]
packages = {}
+ packageLen = {}
for pkgFile in args[1:]:
print "Loading", pkgFile
f = open(pkgFile, 'r')
p = json.load(f)
f.close()
+ packageLen = os.stat(pkgFile).st_size
_, r, _ = thandy.formats.checkSignedObj(p)
if r != 'package':
print pkgFile, "was not a package"
packages[p['signed']['name']] = p['signed']
- bundleObj = thandy.formats.makeBundleObj(configFile, packages.__getitem__)
+ bundleObj = thandy.formats.makeBundleObj(configFile, packages.__getitem__,
+ packageLen.__getitem__)
signable = thandy.formats.makeSignable(bundleObj)
ks = getKeyStore()
diff --git a/lib/thandy/download.py b/lib/thandy/download.py
index 0c0d383..1d969e5 100644
--- a/lib/thandy/download.py
+++ b/lib/thandy/download.py
@@ -345,8 +345,8 @@ class DownloadStatusLog:
class DownloadJob:
"""Abstract base class. Represents a thing to be downloaded, and the
knowledge of how to download it."""
- def __init__(self, targetPath, tmpPath, wantHash=None, repoFile=None,
- useTor=False):
+ def __init__(self, targetPath, tmpPath, wantHash=None,
+ repoFile=None, useTor=False, wantLength=None):
"""Create a new DownloadJob. When it is finally downloaded,
store it in targetPath. Store partial results in tmpPath;
if there is already a file in tmpPath, assume that it is an
@@ -357,6 +357,7 @@ class DownloadJob:
self._destPath = targetPath
self._tmpPath = tmpPath
self._wantHash = wantHash
+ self._wantLength = wantLength
self._repoFile = repoFile
self._useTor = useTor
@@ -470,6 +471,12 @@ class DownloadJob:
have_length = os.stat(self._tmpPath).st_size
logging.info("Have stalled file for %s with %s bytes", url,
have_length)
+ if self._wantLength != None:
+ if self._wantLength >= have_length:
+ logging.warn("Stalled file is too long; removing it")
+ self._removeTmpFile()
+ haveStalled = False
+ have_length = None
else:
have_length = None
@@ -506,6 +513,13 @@ class DownloadJob:
total += len(c)
logging.debug("Got %s/%s bytes from %s",
total, expectLength, url)
+ if self._wantLength != None and total > self._wantLength:
+ logging.warn("Read too many bytes from %s; got %s, but "
+ "wanted %s", url, total, self._wantLength)
+ break
+
+ if self._wantLength != None and total != self._wantLength:
+ logging.warn("Length wrong on file %s", url)
finally:
if f_in is not None:
diff --git a/lib/thandy/formats.py b/lib/thandy/formats.py
index 9f136e5..0dc293c 100644
--- a/lib/thandy/formats.py
+++ b/lib/thandy/formats.py
@@ -375,6 +375,7 @@ SIG_METHOD_SCHEMA = S.AnyStr()
RELPATH_SCHEMA = PATH_PATTERN_SCHEMA = S.AnyStr()
URL_SCHEMA = S.AnyStr()
VERSION_SCHEMA = S.ListOf(S.Any()) #XXXX WRONG
+LENGTH_SCHEMA = S.Int(lo=0)
# A single signature of an object. Indicates the signature, the id of the
# signing key, and the signing method.
@@ -392,7 +393,7 @@ ROLENAME_SCHEMA = S.AnyStr()
# A role: indicates that a key is allowed to certify a kind of
# document at a certain place in the repo.
-ROLE_SCHEMA = S.Struct([ROLENAME_SCHEMA, PATH_PATTERN_SCHEMA])
+ROLE_SCHEMA = S.Struct([ROLENAME_SCHEMA, PATH_PATTERN_SCHEMA], allowMore=True)
# A Keylist: indicates a list of live keys and their roles.
KEYLIST_SCHEMA = S.Obj(
@@ -415,12 +416,12 @@ MIRRORLIST_SCHEMA = S.Obj(
TIMESTAMP_SCHEMA = S.Obj(
_type = S.Str("Timestamp"),
at = TIME_SCHEMA,
- m = S.Struct([TIME_SCHEMA, HASH_SCHEMA]),
- k = S.Struct([TIME_SCHEMA, HASH_SCHEMA]),
+ m = S.Struct([TIME_SCHEMA, HASH_SCHEMA], [LENGTH_SCHEMA], allowMore=True),
+ k = S.Struct([TIME_SCHEMA, HASH_SCHEMA], [LENGTH_SCHEMA], allowMore=True),
b = S.DictOf(keySchema=S.AnyStr(),
valSchema=
- S.Struct([ VERSION_SCHEMA, RELPATH_SCHEMA, TIME_SCHEMA, HASH_SCHEMA ]))
- )
+ S.Struct([ VERSION_SCHEMA, RELPATH_SCHEMA, TIME_SCHEMA, HASH_SCHEMA ], [LENGTH_SCHEMA], allowMore=True))
+ )
# A Bundle: lists a bunch of packages that should be updated in tandem
BUNDLE_SCHEMA = S.Obj(
@@ -436,6 +437,7 @@ BUNDLE_SCHEMA = S.Obj(
version=VERSION_SCHEMA,
path=RELPATH_SCHEMA,
hash=HASH_SCHEMA,
+ length=S.Opt(LENGTH_SCHEMA),
order=S.Struct([S.Int(), S.Int(), S.Int()]),
optional=S.Opt(S.Bool()),
gloss=S.DictOf(S.AnyStr(), S.AnyStr()),
@@ -479,7 +481,8 @@ OBSOLETE_RPM_FORMAT_ITEM_SCHEMA = S.Obj(
ITEM_INFO_SCHEMA = S.AllOf([CHECK_ITEM_SCHEMA, INSTALL_ITEM_SCHEMA])
-ITEM_SCHEMA = S.Struct([RELPATH_SCHEMA, HASH_SCHEMA], [ITEM_INFO_SCHEMA],
+ITEM_SCHEMA = S.Struct([RELPATH_SCHEMA, HASH_SCHEMA],
+ [ITEM_INFO_SCHEMA, LENGTH_SCHEMA],
allowMore=True)
def checkPackageFormatConsistency(obj):
@@ -578,17 +581,18 @@ class Keylist(KeyDB):
self.addKey(key)
class StampedInfo:
- def __init__(self, ts, hash, version=None, relpath=None):
+ def __init__(self, ts, hash, version=None, relpath=None, length=None):
self._ts = ts
self._hash = hash
self._version = version
self._relpath = relpath
+ self._length = length
@staticmethod
- def fromJSonFields(timeStr, hashStr):
+ def fromJSonFields(timeStr, hashStr, length=None):
t = parseTime(timeStr)
h = parseHash(hashStr)
- return StampedInfo(t, h)
+ return StampedInfo(t, h, length=length)
def getHash(self):
return self._hash
@@ -596,6 +600,9 @@ class StampedInfo:
def getRelativePath(self):
return self._relpath
+ def getLength(self):
+ return self._length
+
class TimestampFile:
def __init__(self, at, mirrorlistinfo, keylistinfo, bundleinfo):
self._time = at
@@ -607,15 +614,18 @@ class TimestampFile:
def fromJSon(obj):
# must be validated.
at = parseTime(obj['at'])
- m = StampedInfo.fromJSonFields(*obj['m'][:2])
- k = StampedInfo.fromJSonFields(*obj['k'][:2])
+ m = StampedInfo.fromJSonFields(*obj['m'][:3])
+ k = StampedInfo.fromJSonFields(*obj['k'][:3])
b = {}
for name, bundle in obj['b'].iteritems():
v = bundle[0]
rp = bundle[1]
t = parseTime(bundle[2])
h = parseHash(bundle[3])
- b[name] = StampedInfo(t, h, v, rp)
+ ln = None
+ if len(bundle) > 4:
+ ln = bundle[4]
+ b[name] = StampedInfo(t, h, v, rp, ln)
return TimestampFile(at, m, k, b)
@@ -672,6 +682,8 @@ def makePackageObj(config_fname, package_fname):
f = open(package_fname, 'rb')
digest = getFileDigest(f)
+ f.close()
+ f_len = os.stat(package_fname).st_size
# Check fields!
extra = {}
@@ -681,7 +693,7 @@ def makePackageObj(config_fname, package_fname):
'location' : r['location'], #DOCDOC
'version' : r['version'],
'format' : r['format'],
- 'files' : [ [ r['relpath'], formatHash(digest), extra ] ],
+ 'files' : [ [ r['relpath'], formatHash(digest), extra, f_len ] ],
'shortdesc' : shortDescs,
'longdesc' : longDescs
}
@@ -725,7 +737,7 @@ def makePackageObj(config_fname, package_fname):
return result
-def makeBundleObj(config_fname, getPackage):
+def makeBundleObj(config_fname, getPackage, getPackageLength):
packages = []
def ShortGloss(lang, val): packages[-1]['gloss'][lang] = val
def LongGloss(lang, val): packages[-1]['longgloss'][lang] = val
@@ -763,6 +775,9 @@ def makeBundleObj(config_fname, getPackage):
raise thandy.FormatException("No such package as %s"%p['name'])
p['hash'] = formatHash(getDigest(pkginfo))
+ length = getPackageLength(p['name'])
+ if length != None:
+ p['length'] = length
if p['path'] == None:
p['path'] = pkginfo['location']
if p['version'] == None:
diff --git a/lib/thandy/repository.py b/lib/thandy/repository.py
index 3dc2864..bce79fb 100644
--- a/lib/thandy/repository.py
+++ b/lib/thandy/repository.py
@@ -33,6 +33,9 @@ class RepositoryFile:
self._signedFormat = signedFormat
self._needSigs = needSigs
+ # The length of the file as stored on disk.
+ self._length = None
+
# The contents of the file, parsed. None if we haven't loaded
# the file.
self._main_obj = None
@@ -70,6 +73,7 @@ class RepositoryFile:
f = None
fd = os.open(fname, os.O_RDONLY)
try:
+ self._length = os.fstat(fd).st_size
f = os.fdopen(fd, 'r')
except:
os.close(fd)
@@ -295,7 +299,8 @@ class LocalRepository:
return None
def getFilesToUpdate(self, now=None, trackingBundles=(), hashDict=None,
- usePackageSystem=True, installableDict=None):
+ lengthDict=None, usePackageSystem=True,
+ installableDict=None):
"""Return a set of relative paths for all files that we need
to fetch. Assumes that we care about the bundles
'trackingBundles'.
@@ -312,6 +317,9 @@ class LocalRepository:
if installableDict == None:
installableDict = {}
+ if lengthDict == None:
+ lengthDict = {}
+
pkgItems = None
need = set()
@@ -370,6 +378,10 @@ class LocalRepository:
ts.getKeylistInfo().getHash()
hashDict[self._mirrorlistFile.getRelativePath()] = \
ts.getMirrorlistInfo().getHash()
+ lengthDict[self._keylistFile.getRelativePath()] = \
+ ts.getKeylistInfo().getLength()
+ lengthDict[self._mirrorlistFile.getRelativePath()] = \
+ ts.getMirrorlistInfo().getLength()
h_kf = thandy.formats.getDigest(self._keylistFile.get())
h_expected = ts.getKeylistInfo().getHash()
@@ -407,6 +419,7 @@ class LocalRepository:
rp = binfo.getRelativePath()
h_expected = binfo.getHash()
hashDict[rp] = h_expected
+ lengthDict[rp] = binfo.getLength()
bfile = self.getBundleFile(rp)
try:
bfile.load()
@@ -440,6 +453,7 @@ class LocalRepository:
pfile = self.getPackageFile(rp)
h_expected = thandy.formats.parseHash(pkginfo['hash'])
hashDict[rp] = h_expected
+ lengthDict[rp] = pkginfo.get('length')
try:
pfile.load()
except OSError:
@@ -498,6 +512,8 @@ class LocalRepository:
h_expected = thandy.formats.parseHash(h)
hashDict[rp] = h_expected
+ if len(f) > 3:
+ lengthDict[rp] = h[3]
fn = self.getFilename(rp)
try:
h_got = thandy.formats.getFileDigest(fn)
diff --git a/specs/thandy-spec.txt b/specs/thandy-spec.txt
index 3726c94..5cc4fa6 100644
--- a/specs/thandy-spec.txt
+++ b/specs/thandy-spec.txt
@@ -397,10 +397,10 @@
{ "_type" : Timestamp,
"at" : TIME,
- "m" : [ TIME, HASH ],
- "k" : [ TIME, HASH ],
+ "m" : [ TIME, HASH, LENGTH ],
+ "k" : [ TIME, HASH, LENGTH ],
"b" : { NAME :
- [ [ Version, Path, Time, Hash ] ] }
+ [ [ Version, Path, Time, Hash, (Length) ] ] }
}
TIME is when the timestamp was signed. MIRRORLISTHASH is the digest
@@ -409,6 +409,8 @@
bundles and their locations and hashes. The "name" of a bundle (in
this context) is the directory component of the bundle's path.
+ The LENGTH field may be absent on very old timestamp files.
+
3.6. File formats: bundle files
{ "_type" : "Bundle",
@@ -423,6 +425,7 @@
"version" : VERSION,
"path" : PATH,
"hash" : HASH,
+ ("length" : LENGTH),
"order" : [ INST, UPDATE, REMOVE ],
("optional : BOOL, )
"gloss" : { LANG : TEXT },
@@ -446,6 +449,9 @@
language. The UI should display the must appropriate language to the
user.
+ The LENGTH field is required on all new bundles, but may be absent
+ on very old ones.
+
3.7. File formats: package files
{ "_type" : "Package",
@@ -454,13 +460,14 @@
"version" : VERSION,
"format" : FMT,
"ts" : TIME,
- "files" : [ [ PATH, HASH, INFO ], ... ],
+ "files" : [ [ PATH, HASH, INFO, (LENGTH) ], ... ],
"shortdesc" : { LANG : DESC, ... },
"longdesc" : { LANG : DESC, ... },
}
Most elements are self-explanatory. To interpret the 'INFO' entry
- for each installable file, see section 6.
+ for each installable file, see section 6. The LENGTH field is
+ required on all new packages, but may be absent on very old ones.
No two package files in the same repository should have the same
name and version. If a package needs to be changed, the version
@@ -508,6 +515,10 @@
Clients SHOULD cache at least the latest versions they have received
of all files.
+ When dowloading a file, if the client knows what that file's length
+ should be, it SHOULD NOT accept a longer file, and SHOULD NOT
+ continue the download past the file length.
+
4.1.1. Download preferences
Users should be able to specify that packages must be only