diff options
Diffstat (limited to 'lib/thandy/download.py')
-rw-r--r-- | lib/thandy/download.py | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/lib/thandy/download.py b/lib/thandy/download.py new file mode 100644 index 0000000..0b22cfa --- /dev/null +++ b/lib/thandy/download.py @@ -0,0 +1,127 @@ + + +import urllib2 +import httplib +import random + +import threading, Queue + +import thandy.util + +class Downloads: + def __init__(self, n_threads=2): + self._lock = threading.RLock() + self.downloads = {} + self.haveDownloaded = {} + self.downloadQueue = Queue.Queue() + self.threads = [ threading.Thread(target=self._thread) ] + for t in self.threads: + t.setDaemon(True) + + def start(self): + for t in self.threads: + t.start() + + def isCurrentlyDownloading(self, relPath): + self._lock.acquire() + try: + return self.downloads.has_key(relPath) + finally: + self._lock.release() + + def isRedundant(self, relPath): + self._lock.acquire() + try: + return (self.downloads.has_key(relPath) or + self.haveDownloaded.has_key(relPath)) + finally: + self._lock.release() + + def addDownloadJob(self, job): + rp = job.getRelativePath() + self._lock.acquire() + self.downloads[rp] = job + self._lock.release() + self.downloadQueue.put(job) + + def _thread(self): + while True: + job = self.downloadQueue.get() + job.download() + rp = job.getRelativePath() + self._lock.acquire() + try: + del self.downloads[rp] + self.haveDownloaded[rp] = True + finally: + self._lock.release() + +class DownloadJob: + def __init__(self, relPath, destPath, mirrorlist=None, + wantHash=None, canStall=False): + self._relPath = relPath + self._wantHash = wantHash + self._mirrorList = mirrorlist + self._destPath = destPath + + tmppath = thandy.util.userFilename("tmp") + if relPath.startswith("/"): + relPath = relPath[1:] + self._tmppath = os.path.join(tmppath, relPath) + + d = os.path.dirname(self._tmppath) + if not os.path.exists(d): + os.makedirs(d, 0700) + + def getRelativePath(self): + return self._relPath + + def haveStalledFile(self): + return os.path.exists(self._tmppath) + + def getURL(self, mirrorlist=None): + if mirrorlist is None: + mirrorlist = self._mirrorList + weightSoFar = 0 + usable = [] + + for m in mirrorlist['mirrors']: + for c in m['contents']: + # CHECK FOR URL SUITABILITY XXXXX + + if thandy.formats.rolePathMatches(c, self._relPath): + weightSoFar += m['weight'] + usable.append( (weightSoFar, m) ) + break + + wTarget = random.randint(0, weightSoFar) + mirror = None + # Could use bisect here instead + for w, m in mirrorlist: + if w >= wTarget: + mirror = m + break + + return m['urlbase'] + self._relPath + + def download(self): + # XXXX RESUME + + f_in = urllib2.urlopen(self.getURL()) + f_out = open(self._tmpPath, 'w') + while True: + c = f_in.read(1024) + if not c: + break + f_out.write(c) + f_in.close() + f_out.close() + # XXXXX retry on failure + + if self._wantHash: + gotHash = thandy.formats.getFileDigest(self._tmpPath) + if gotHash != self._wantHash: + # XXXX Corrupt file. + pass + + thandy.utils.moveFile(self._tmpPath, self._destPath) |