From fa595231b5ea346dfd9e06c364854469397fca3f Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 14 Nov 2015 21:53:14 -0300 Subject: [feat] checks staged docs inside batch This commit adds checking for consistency on batch. When a doc is needed during a batched sync and it doesnt exists on database, current code will make a partial batch to avoid processing like it doesnt exist. --- common/src/leap/soledad/common/couch/__init__.py | 50 ++++++++++++++++++++---- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/common/src/leap/soledad/common/couch/__init__.py b/common/src/leap/soledad/common/couch/__init__.py index f3437ae1..a922fd48 100644 --- a/common/src/leap/soledad/common/couch/__init__.py +++ b/common/src/leap/soledad/common/couch/__init__.py @@ -154,7 +154,7 @@ class CouchDatabase(object): self._dbname = dbname self._database = self.get_couch_database(url, dbname) self.batching = False - self.batch_docs = [] + self.batch_docs = {} if ensure_ddocs: self.ensure_ddocs_on_db() self.ensure_security_ddoc(database_security) @@ -166,8 +166,7 @@ class CouchDatabase(object): def batch_end(self): self.batching = False - self._database.update(self.batch_docs) - self.batch_docs = [] + self.__perform_batch() def get_couch_database(self, url, dbname): """ @@ -197,7 +196,8 @@ class CouchDatabase(object): """ for ddoc_name in ['docs', 'syncs', 'transactions']: try: - self.json_from_resource(['_design', ddoc_name, '_info'], + self.json_from_resource(['_design'] + + ddoc_name.split('/') + ['_info'], check_missing_ddoc=False) except ResourceNotFound: ddoc = json.loads( @@ -349,15 +349,49 @@ class CouchDatabase(object): :return: The document. :rtype: ServerDocument """ - # get document with all attachments (u1db content and eventual - # conflicts) + doc_from_batch = self.__check_batch_before_get(doc_id) + if doc_from_batch: + return doc_from_batch if self.batching and doc_id not in self.batched_ids: return None if doc_id not in self._database: return None + # get document with all attachments (u1db content and eventual + # conflicts) result = self.json_from_resource([doc_id], attachments=True) return self.__parse_doc_from_couch(result, doc_id, check_for_conflicts) + def __check_batch_before_get(self, doc_id): + """ + If doc_id is staged for batching, then we need to commit the batch + before going ahead. This avoids consistency problems, like trying to + get a document that isn't persisted and processing like it is missing. + + :param doc_id: The unique document identifier + :type doc_id: str + """ + if doc_id in self.batch_docs: + couch_doc = self.batch_docs[doc_id] + rev = self.__perform_batch(doc_id) + couch_doc['_rev'] = rev + self.batched_ids.add(doc_id) + return self.__parse_doc_from_couch(couch_doc, doc_id, True) + return None + + def __perform_batch(self, doc_id=None): + status = self._database.update(self.batch_docs.values()) + rev = None + for ok, stored_doc_id, rev_or_error in status: + if not ok: + error = rev_or_error + if type(error) is ResourceConflict: + raise RevisionConflict + raise error + elif doc_id == stored_doc_id: + rev = rev_or_error + self.batch_docs.clear() + return rev + def __parse_doc_from_couch(self, result, doc_id, check_for_conflicts=False): # restrict to u1db documents @@ -726,8 +760,8 @@ class CouchDatabase(object): del attachment['length'] index = 0 if name is 'u1db_content' else 1 attachment['data'] = binascii.b2a_base64(parts[index]).strip() - couch_doc['attachments'] = attachments - self.batch_docs.append(couch_doc) + couch_doc['_attachments'] = attachments + self.batch_docs[doc.doc_id] = couch_doc return transactions[-1][1] def _new_resource(self, *path): -- cgit v1.2.3