summaryrefslogtreecommitdiff
path: root/lib/thandy/download.py
blob: bf7dc4331811e072f9f521178b414c1ab400a604 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information.

import urllib2
import httplib
import random

import threading, Queue

import thandy.util

class Downloads:
    def __init__(self, n_threads=2):
        self._lock = threading.RLock()
        self.downloads = {}
        self.haveDownloaded = {}
        self.downloadQueue = Queue.Queue()
        self.threads = [ threading.Thread(target=self._thread) ]
        for t in self.threads:
            t.setDaemon(True)

    def start(self):
        for t in self.threads:
            t.start()

    def isCurrentlyDownloading(self, relPath):
        self._lock.acquire()
        try:
            return self.downloads.has_key(relPath)
        finally:
            self._lock.release()

    def isRedundant(self, relPath):
        self._lock.acquire()
        try:
            return (self.downloads.has_key(relPath) or
                    self.haveDownloaded.has_key(relPath))
        finally:
            self._lock.release()

    def addDownloadJob(self, job):
        rp = job.getRelativePath()
        self._lock.acquire()
        self.downloads[rp] = job
        self._lock.release()
        self.downloadQueue.put(job)

    def _thread(self):
        while True:
            job = self.downloadQueue.get()
            job.download()
            rp = job.getRelativePath()
            self._lock.acquire()
            try:
                del self.downloads[rp]
                self.haveDownloaded[rp] = True
            finally:
                self._lock.release()

class DownloadJob:
    def __init__(self, relPath, destPath, mirrorlist=None,
                 wantHash=None, canStall=False):
        self._relPath = relPath
        self._wantHash = wantHash
        self._mirrorList = mirrorlist
        self._destPath = destPath

        tmppath = thandy.util.userFilename("tmp")
        if relPath.startswith("/"):
            relPath = relPath[1:]
        self._tmppath = os.path.join(tmppath, relPath)

        d = os.path.dirname(self._tmppath)
        if not os.path.exists(d):
            os.makedirs(d, 0700)

    def getRelativePath(self):
        return self._relPath

    def haveStalledFile(self):
        return os.path.exists(self._tmppath)

    def getURL(self, mirrorlist=None):
        if mirrorlist is None:
            mirrorlist = self._mirrorList
        weightSoFar = 0
        usable = []

        for m in mirrorlist['mirrors']:
            for c in m['contents']:
                # CHECK FOR URL SUITABILITY XXXXX

                if thandy.formats.rolePathMatches(c, self._relPath):
                    weightSoFar += m['weight']
                    usable.append( (weightSoFar, m) )
                    break

        wTarget = random.randint(0, weightSoFar)
        mirror = None
        # Could use bisect here instead
        for w, m in mirrorlist:
            if w >= wTarget:
                mirror = m
                break

        return m['urlbase'] + self._relPath

    def download(self):
        # XXXX RESUME

        f_in = urllib2.urlopen(self.getURL())
        f_out = open(self._tmpPath, 'w')
        while True:
            c = f_in.read(1024)
            if not c:
                break
            f_out.write(c)
        f_in.close()
        f_out.close()
        # XXXXX retry on failure

        if self._wantHash:
            gotHash = thandy.formats.getFileDigest(self._tmpPath)
            if gotHash != self._wantHash:
                # XXXX Corrupt file.
                pass

        thandy.utils.moveFile(self._tmpPath, self._destPath)