From b774387754ecae77d3ae00de2a9e072cef2eb2e7 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 17 Sep 2016 04:26:08 -0300 Subject: [feature] make reading attachments optional Will put a file object on doc json string if read_content is False, otherwise it will fetch and fill as usual. This is useful for improving server througput on sync download stream by receiving a bulk-get without attachments and consume the file-objects as they come. --- common/src/leap/soledad/common/backend.py | 4 +-- common/src/leap/soledad/common/couch/__init__.py | 42 ++++++++---------------- server/src/leap/soledad/server/sync.py | 6 ++-- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/common/src/leap/soledad/common/backend.py b/common/src/leap/soledad/common/backend.py index f4f48f86..5c995d38 100644 --- a/common/src/leap/soledad/common/backend.py +++ b/common/src/leap/soledad/common/backend.py @@ -570,7 +570,7 @@ class SoledadBackend(CommonBackend): self._put_doc(cur_doc, doc) def get_docs(self, doc_ids, check_for_conflicts=True, - include_deleted=False): + include_deleted=False, read_content=True): """ Get the JSON content for many documents. @@ -588,7 +588,7 @@ class SoledadBackend(CommonBackend): :rtype: iterable """ return self._database.get_docs(doc_ids, check_for_conflicts, - include_deleted) + include_deleted, read_content) def _prune_conflicts(self, doc, doc_vcr): """ diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index 1a95e590..f19b0acb 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -296,31 +296,14 @@ class CouchDatabase(object): generation, _ = self.get_generation_info() results = list( - self._get_docs(None, True, include_deleted)) + self.get_docs(None, True, include_deleted)) return (generation, results) def get_docs(self, doc_ids, check_for_conflicts=True, - include_deleted=False): + include_deleted=False, read_content=True): """ Get the JSON content for many documents. - :param doc_ids: A list of document identifiers or None for all. - :type doc_ids: list - :param check_for_conflicts: If set to False, then the conflict check - will be skipped, and 'None' will be - returned instead of True/False. - :type check_for_conflicts: bool - :param include_deleted: If set to True, deleted documents will be - returned with empty content. Otherwise deleted - documents will not be included in the results. - :return: iterable giving the Document object for each document id - in matching doc_ids order. - :rtype: iterable - """ - return self._get_docs(doc_ids, check_for_conflicts, include_deleted) - - def _get_docs(self, doc_ids, check_for_conflicts, include_deleted): - """ Use couch's `_all_docs` view to get the documents indicated in `doc_ids`, @@ -344,12 +327,12 @@ class CouchDatabase(object): view = self._database.view("_all_docs", **params) for row in view.rows: result = copy.deepcopy(row['doc']) - attachment_file_names = result['_attachments'].keys() - result['_attachments'] = {} - for file_name in attachment_file_names: - result['_attachments'][file_name] = { - 'data': self._database.get_attachment(result, file_name) - } + for file_name in result.get('_attachments', {}).keys(): + data = self._database.get_attachment(result, file_name) + if data: + if read_content: + data = data.read() + result['_attachments'][file_name] = {'data': data} doc = self.__parse_doc_from_couch( result, result['_id'], check_for_conflicts=check_for_conflicts, decode=False) @@ -416,8 +399,8 @@ class CouchDatabase(object): self.batch_docs.clear() return rev - def __parse_doc_from_couch(self, result, doc_id, decode=True, - check_for_conflicts=False): + def __parse_doc_from_couch(self, result, doc_id, + check_for_conflicts=False, decode=True): # restrict to u1db documents if 'u1db_rev' not in result: return None @@ -437,10 +420,11 @@ class CouchDatabase(object): and '_attachments' in result \ and 'u1db_conflicts' in result['_attachments']: if decode: - conflicts = json.loads(binascii.a2b_base64( - result['_attachments']['u1db_conflicts']['data'])) + conflicts = binascii.a2b_base64( + result['_attachments']['u1db_conflicts']['data']) else: conflicts = result['_attachments']['u1db_conflicts']['data'] + conflicts = json.loads(conflicts) doc.set_conflicts(self._build_conflicts(doc.doc_id, conflicts)) # store couch revision doc.couch_rev = result['_rev'] diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 77d4b840..6f2ffe9f 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -102,10 +102,12 @@ class SyncExchange(sync.SyncExchange): :return: None """ changes_to_return = self.changes_to_return - # return docs, including conflicts + # return docs, including conflicts. + # content as a file-object (will be read when writing) changed_doc_ids = [doc_id for doc_id, _, _ in changes_to_return] docs = self._db.get_docs( - changed_doc_ids, check_for_conflicts=False, include_deleted=True) + changed_doc_ids, check_for_conflicts=False, + include_deleted=True, read_content=False) docs_by_gen = izip( docs, (gen for _, gen, _ in changes_to_return), -- cgit v1.2.3