Altough we specify maxPersistentPerHost, Twisted won't stop opening
connections after that. This limit is used just to keep the size of
persistent connections pool under control. Additional connections will be
made as non persistent. So, if we ask 10000 requests, it will open 10000
connections immediately and leave 10 open after all finished.
For checking this behavior, see getConnection from Twisted source:
http://twistedmatrix.com/trac/browser/tags/releases/twisted-15.2.1/twisted/web/client.py#L1203
I tested this by using http_target from soledad without a local database to
download all encrypted docs from one account with 1700 of them. The
program just hangs and crashes with 1000+ connections and "Too many
files open" warnings.
With this fix, it was able to download normally, respecting the
maxPersistentPerHost as a limiter. :)
from zope.interface import implements
from twisted.internet import reactor
from zope.interface import implements
from twisted.internet import reactor
+from twisted.internet import defer
from twisted.internet.defer import succeed
from twisted.web.client import Agent
from twisted.internet.defer import succeed
from twisted.web.client import Agent
"""
HTTP client done the twisted way, with a main focus on pinning the SSL
certificate.
"""
HTTP client done the twisted way, with a main focus on pinning the SSL
certificate.
+
+ By default, it uses a shared connection pool. If you want a dedicated
+ one, create and pass on __init__ pool parameter.
+ Please note that this client will limit the maximum amount of connections
+ by using a DeferredSemaphore.
+ This limit is equal to the maxPersistentPerHost used on pool and is needed
+ in order to avoid resource abuse on huge requests batches.
"""
def __init__(self, cert_file=None, pool=_pool):
"""
def __init__(self, cert_file=None, pool=_pool):
:param cert_file: The path to the certificate file, if None given the
system's CAs will be used.
:type cert_file: str
:param cert_file: The path to the certificate file, if None given the
system's CAs will be used.
:type cert_file: str
+ :param pool: An optional dedicated connection pool to override the
+ default shared one.
+ :type pool: HTTPConnectionPool
"""
policy = get_compatible_ssl_context_factory(cert_file)
"""
policy = get_compatible_ssl_context_factory(cert_file)
reactor,
policy,
pool=pool)
reactor,
policy,
pool=pool)
+ self._semaphore = defer.DeferredSemaphore(pool.maxPersistentPerHost)
def request(self, url, method='GET', body=None, headers={}):
"""
def request(self, url, method='GET', body=None, headers={}):
"""
"""
if body:
body = HTTPClient.StringBodyProducer(body)
"""
if body:
body = HTTPClient.StringBodyProducer(body)
- d = self._agent.request(
- method, url, headers=Headers(headers), bodyProducer=body)
+ d = self._semaphore.run(self._agent.request,
+ method, url, headers=Headers(headers),
+ bodyProducer=body)
d.addCallback(readBody)
return d
d.addCallback(readBody)
return d