summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Shyba <victor.shyba@gmail.com>2015-11-14 03:38:20 -0300
committerVictor Shyba <victor.shyba@gmail.com>2015-12-03 17:30:48 -0300
commitbcbb9ccd1d4a281b6922340c12ec01b09d636380 (patch)
tree28097e3773c0723f573bb07181a6a1990e31a48f
parent577abee147c98592753bcdc68e1693d1f4ab5a08 (diff)
[feat] put all docs at once
Using _bulk_docs api from CouchDB we can put all docs at a single request. Also, prefetching all ids removes the need to HEAD requests during the batch.
-rw-r--r--common/src/leap/soledad/common/backend.py6
-rw-r--r--common/src/leap/soledad/common/couch/__init__.py51
2 files changed, 43 insertions, 14 deletions
diff --git a/common/src/leap/soledad/common/backend.py b/common/src/leap/soledad/common/backend.py
index b083163e..9f5950b2 100644
--- a/common/src/leap/soledad/common/backend.py
+++ b/common/src/leap/soledad/common/backend.py
@@ -60,9 +60,15 @@ class SoledadBackend(CommonBackend):
def batch_start(self):
self.batching = True
self.after_batch_callbacks = {}
+ self._database.batch_start()
+ if not self._cache:
+ # batching needs cache
+ self._cache = {}
+ self._get_generation() # warm up gen info
def batch_end(self):
self.batching = False
+ self._database.batch_end()
for name in self.after_batch_callbacks:
self.after_batch_callbacks[name]()
self.after_batch_callbacks = None
diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py
index fb3d57af..f3437ae1 100644
--- a/common/src/leap/soledad/common/couch/__init__.py
+++ b/common/src/leap/soledad/common/couch/__init__.py
@@ -153,10 +153,22 @@ class CouchDatabase(object):
self._url = url
self._dbname = dbname
self._database = self.get_couch_database(url, dbname)
+ self.batching = False
+ self.batch_docs = []
if ensure_ddocs:
self.ensure_ddocs_on_db()
self.ensure_security_ddoc(database_security)
+ def batch_start(self):
+ self.batching = True
+ ids = set(row.id for row in self._database.view('_all_docs'))
+ self.batched_ids = ids
+
+ def batch_end(self):
+ self.batching = False
+ self._database.update(self.batch_docs)
+ self.batch_docs = []
+
def get_couch_database(self, url, dbname):
"""
Generate a couchdb.Database instance given a url and dbname.
@@ -339,6 +351,8 @@ class CouchDatabase(object):
"""
# get document with all attachments (u1db content and eventual
# conflicts)
+ if self.batching and doc_id not in self.batched_ids:
+ return None
if doc_id not in self._database:
return None
result = self.json_from_resource([doc_id], attachments=True)
@@ -691,20 +705,29 @@ class CouchDatabase(object):
if old_doc is not None and hasattr(old_doc, 'couch_rev'):
couch_doc['_rev'] = old_doc.couch_rev
# prepare the multipart PUT
- buf = StringIO()
- envelope = MultipartWriter(buf)
- envelope.add('application/json', json.dumps(couch_doc))
- for part in parts:
- envelope.add('application/octet-stream', part)
- envelope.close()
- # try to save and fail if there's a revision conflict
- try:
- resource = self._new_resource()
- resource.put_json(
- doc.doc_id, body=str(buf.getvalue()),
- headers=envelope.headers)
- except ResourceConflict:
- raise RevisionConflict()
+ if not self.batching:
+ buf = StringIO()
+ envelope = MultipartWriter(buf)
+ envelope.add('application/json', json.dumps(couch_doc))
+ for part in parts:
+ envelope.add('application/octet-stream', part)
+ envelope.close()
+ # try to save and fail if there's a revision conflict
+ try:
+ resource = self._new_resource()
+ resource.put_json(
+ doc.doc_id, body=str(buf.getvalue()),
+ headers=envelope.headers)
+ except ResourceConflict:
+ raise RevisionConflict()
+ else:
+ for name, attachment in attachments.items():
+ del attachment['follows']
+ del attachment['length']
+ index = 0 if name is 'u1db_content' else 1
+ attachment['data'] = binascii.b2a_base64(parts[index]).strip()
+ couch_doc['attachments'] = attachments
+ self.batch_docs.append(couch_doc)
return transactions[-1][1]
def _new_resource(self, *path):