diff options
author | Victor Shyba <victor.shyba@gmail.com> | 2016-09-22 05:03:59 -0300 |
---|---|---|
committer | Victor Shyba <victor1984@riseup.net> | 2016-11-18 15:55:52 -0300 |
commit | ec6ccbdeab1cba6534cb22ed247041b6da821884 (patch) | |
tree | 0f53f657f92f75c64a0b71c1f516fa5379e4dabf | |
parent | d774de2b603a63c61d297c402bea59908831a7e7 (diff) |
[feature] batch based on payload size
batch is slower than usual insert for a single doc, so, if a document
exceeds the buffer, commit the batch (if any) and put the huge load by
traditional insert.
refactor coming.
-rw-r--r-- | common/src/leap/soledad/common/couch/__init__.py | 3 | ||||
-rw-r--r-- | server/src/leap/soledad/server/sync.py | 17 |
2 files changed, 17 insertions, 3 deletions
diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 6f233b26..2e6f734e 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -791,7 +791,8 @@ class CouchDatabase(object): attachment['data'] = binascii.b2a_base64( parts[index]).strip() couch_doc['_attachments'] = attachments - gen_doc = self._allocate_new_generation(doc.doc_id, transaction_id, save=False) + gen_doc = self._allocate_new_generation( + doc.doc_id, transaction_id, save=False) self.batch_docs[doc.doc_id] = couch_doc self.batch_docs[gen_doc['_id']] = gen_doc last_gen, last_trans_id = self.batch_generation diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 6fcfe240..e12ebf8a 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -118,6 +118,8 @@ class SyncExchange(sync.SyncExchange): return_doc_cb(doc, gen, trans_id) def batched_insert_from_source(self, entries, sync_id): + if not entries: + return self._db.batch_start() for entry in entries: doc, gen, trans_id, number_of_docs, doc_idx = entry @@ -241,8 +243,19 @@ class SyncResource(http_app.SyncResource): """ doc = ServerDocument(id, rev) doc._json = content - self._staging_size += len(content or '') - self._staging.append((doc, gen, trans_id, number_of_docs, doc_idx)) + if (len(content or '') > (8192 * 1024) / 4) or number_of_docs < 4: + self.sync_exch.batched_insert_from_source(self._staging, + self._sync_id) + self._staging = [] + self._staging_size = 0 + self.sync_exch.insert_doc_from_source( + doc, gen, trans_id, + number_of_docs=number_of_docs, + doc_idx=doc_idx, + sync_id=self._sync_id) + else: + self._staging_size += len(content or '') + self._staging.append((doc, gen, trans_id, number_of_docs, doc_idx)) if self._staging_size > 8192 * 1024 or doc_idx == number_of_docs: self.sync_exch.batched_insert_from_source(self._staging, self._sync_id) |