From bcbb9ccd1d4a281b6922340c12ec01b09d636380 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 14 Nov 2015 03:38:20 -0300 Subject: [feat] put all docs at once Using _bulk_docs api from CouchDB we can put all docs at a single request. Also, prefetching all ids removes the need to HEAD requests during the batch. --- common/src/leap/soledad/common/backend.py | 6 +++ common/src/leap/soledad/common/couch/__init__.py | 51 +++++++++++++++++------- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/common/src/leap/soledad/common/backend.py b/common/src/leap/soledad/common/backend.py index b083163e..9f5950b2 100644 --- a/common/src/leap/soledad/common/backend.py +++ b/common/src/leap/soledad/common/backend.py @@ -60,9 +60,15 @@ class SoledadBackend(CommonBackend): def batch_start(self): self.batching = True self.after_batch_callbacks = {} + self._database.batch_start() + if not self._cache: + # batching needs cache + self._cache = {} + self._get_generation() # warm up gen info def batch_end(self): self.batching = False + self._database.batch_end() for name in self.after_batch_callbacks: self.after_batch_callbacks[name]() self.after_batch_callbacks = None diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index fb3d57af..f3437ae1 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -153,10 +153,22 @@ class CouchDatabase(object): self._url = url self._dbname = dbname self._database = self.get_couch_database(url, dbname) + self.batching = False + self.batch_docs = [] if ensure_ddocs: self.ensure_ddocs_on_db() self.ensure_security_ddoc(database_security) + def batch_start(self): + self.batching = True + ids = set(row.id for row in self._database.view('_all_docs')) + self.batched_ids = ids + + def batch_end(self): + self.batching = False + self._database.update(self.batch_docs) + self.batch_docs = [] + def get_couch_database(self, url, dbname): """ Generate a couchdb.Database instance given a url and dbname. @@ -339,6 +351,8 @@ class CouchDatabase(object): """ # get document with all attachments (u1db content and eventual # conflicts) + if self.batching and doc_id not in self.batched_ids: + return None if doc_id not in self._database: return None result = self.json_from_resource([doc_id], attachments=True) @@ -691,20 +705,29 @@ class CouchDatabase(object): if old_doc is not None and hasattr(old_doc, 'couch_rev'): couch_doc['_rev'] = old_doc.couch_rev # prepare the multipart PUT - buf = StringIO() - envelope = MultipartWriter(buf) - envelope.add('application/json', json.dumps(couch_doc)) - for part in parts: - envelope.add('application/octet-stream', part) - envelope.close() - # try to save and fail if there's a revision conflict - try: - resource = self._new_resource() - resource.put_json( - doc.doc_id, body=str(buf.getvalue()), - headers=envelope.headers) - except ResourceConflict: - raise RevisionConflict() + if not self.batching: + buf = StringIO() + envelope = MultipartWriter(buf) + envelope.add('application/json', json.dumps(couch_doc)) + for part in parts: + envelope.add('application/octet-stream', part) + envelope.close() + # try to save and fail if there's a revision conflict + try: + resource = self._new_resource() + resource.put_json( + doc.doc_id, body=str(buf.getvalue()), + headers=envelope.headers) + except ResourceConflict: + raise RevisionConflict() + else: + for name, attachment in attachments.items(): + del attachment['follows'] + del attachment['length'] + index = 0 if name is 'u1db_content' else 1 + attachment['data'] = binascii.b2a_base64(parts[index]).strip() + couch_doc['attachments'] = attachments + self.batch_docs.append(couch_doc) return transactions[-1][1] def _new_resource(self, *path): -- cgit v1.2.3