From 0192f5923932ce738656c5b9ec25167a1b74386a Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 30 Aug 2015 05:37:21 -0300 Subject: [pkg] add Beaker as a server dependency Soledad server will use Beaker as cache provider, starting with sync_state being in memory. --- server/pkg/requirements.pip | 1 + 1 file changed, 1 insertion(+) (limited to 'server') diff --git a/server/pkg/requirements.pip b/server/pkg/requirements.pip index d75678b2..3e1aa992 100644 --- a/server/pkg/requirements.pip +++ b/server/pkg/requirements.pip @@ -4,6 +4,7 @@ u1db routes PyOpenSSL twisted +Beaker # XXX -- fix me! # oauth is not strictly needed by us, but we need it until u1db adds it to its -- cgit v1.2.3 From a10ae06bb33c6caaaf3a8474cc6d7b01c33abc9f Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 30 Aug 2015 05:17:20 -0300 Subject: [feat] first draft of sync_state in memory This commit changes sync_state to be in memory, with all tests passing. The memory variable for now is a dict with each key composed by source_replica_uid and sync_id, replicating CouchDB implementation. Next steps includes migrating this to Beaker and refactor/clean up code. Changed the module's INFO dict to use Beaker's caching and adapted methods to get and save from it. Still needs refactoring, all tests passes. Beaker is now using memory as default; It is configurable, but we aren't opening the possibility of config now for security. We need to check what can be misconfigured first. We are not sure if beaker will be the definitive solution for server side caching. This change isolates it with more granularity. In order to replace it, just change get_cache_for to return the proper caching object using another implementation. This caching object is supposed to behave as a dict. --- server/src/leap/soledad/server/caching.py | 32 ++++++ server/src/leap/soledad/server/state.py | 143 ++++++++++++++++++++++++ server/src/leap/soledad/server/sync.py | 173 +----------------------------- 3 files changed, 177 insertions(+), 171 deletions(-) create mode 100644 server/src/leap/soledad/server/caching.py create mode 100644 server/src/leap/soledad/server/state.py (limited to 'server') diff --git a/server/src/leap/soledad/server/caching.py b/server/src/leap/soledad/server/caching.py new file mode 100644 index 00000000..cd5d8dd4 --- /dev/null +++ b/server/src/leap/soledad/server/caching.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# caching.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Server side caching. Using beaker for now. +""" +from beaker.cache import CacheManager + + +def setup_caching(): + _cache_manager = CacheManager(type='memory') + return _cache_manager + + +_cache_manager = setup_caching() + + +def get_cache_for(key): + return _cache_manager.get_cache(key) diff --git a/server/src/leap/soledad/server/state.py b/server/src/leap/soledad/server/state.py new file mode 100644 index 00000000..d1225170 --- /dev/null +++ b/server/src/leap/soledad/server/state.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +# state.py +# Copyright (C) 2015 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Server side synchronization infrastructure. +""" +from leap.soledad.server import caching + + +class ServerSyncState(object): + """ + The state of one sync session, as stored on backend server. + + On server side, the ongoing syncs metadata is maintained in a caching layer. + """ + + def __init__(self, source_replica_uid, sync_id): + """ + Initialize the sync state object. + + :param sync_id: The id of current sync + :type sync_id: str + :param source_replica_uid: The source replica uid + :type source_replica_uid: str + """ + self._source_replica_uid = source_replica_uid + self._sync_id = sync_id + self._storage = caching.get_cache_for(source_replica_uid + sync_id) + + def _put_dict_info(self, key, value): + """ + Put some information about the sync state. + + :param key: The key for the info to be put. + :type key: str + :param value: The value for the info to be put. + :type value: str + """ + if key not in self._storage: + self._storage[key] = [] + info_list = self._storage.get(key) + info_list.append(value) + self._storage[key] = info_list + + def put_seen_id(self, seen_id, gen): + """ + Put one seen id on the sync state. + + :param seen_id: The doc_id of a document seen during sync. + :type seen_id: str + :param gen: The corresponding db generation for that document. + :type gen: int + """ + self._put_dict_info( + 'seen_id', + (seen_id, gen)) + + def seen_ids(self): + """ + Return all document ids seen during the sync. + + :return: A dict with doc ids seen during the sync. + :rtype: dict + """ + if 'seen_id' in self._storage: + seen_ids = self._storage.get('seen_id') + else: + seen_ids = [] + return dict(seen_ids) + + def put_changes_to_return(self, gen, trans_id, changes_to_return): + """ + Put the calculated changes to return in the backend sync state. + + :param gen: The target database generation that will be synced. + :type gen: int + :param trans_id: The target database transaction id that will be + synced. + :type trans_id: str + :param changes_to_return: A list of tuples with the changes to be + returned during the sync process. + :type changes_to_return: list + """ + self._put_dict_info( + 'changes_to_return', + { + 'gen': gen, + 'trans_id': trans_id, + 'changes_to_return': changes_to_return, + } + ) + + def sync_info(self): + """ + Return information about the current sync state. + + :return: The generation and transaction id of the target database + which will be synced, and the number of documents to return, + or a tuple of Nones if those have not already been sent to + server. + :rtype: tuple + """ + info = self._storage.get('changes_to_return') if 'changes_to_return' in self._storage else None + gen = None + trans_id = None + number_of_changes = None + if info: + info = info[0] + gen = info['gen'] + trans_id = info['trans_id'] + number_of_changes = len(info['changes_to_return']) + return gen, trans_id, number_of_changes + + def next_change_to_return(self, received): + """ + Return the next change to be returned to the source syncing replica. + + :param received: How many documents the source replica has already + received during the current sync process. + :type received: int + """ + info = self._storage.get('changes_to_return') if 'changes_to_return' in self._storage else None + gen = trans_id = next_change_to_return = None + if info: + info = info[0] + gen = info['gen'] + trans_id = info['trans_id'] + if received < len(info['changes_to_return']): + next_change_to_return = tuple(info['changes_to_return'][received]) + return gen, trans_id, next_change_to_return diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 18c4ee40..64f7e4e7 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -17,183 +17,15 @@ """ Server side synchronization infrastructure. """ -import json - -from leap.soledad.common.couch import CouchDatabase from u1db import sync, Document from u1db.remote import http_app +from leap.soledad.server.state import ServerSyncState MAX_REQUEST_SIZE = 200 # in Mb MAX_ENTRY_SIZE = 200 # in Mb -class ServerSyncState(object): - """ - The state of one sync session, as stored on backend server. - - This object performes queries to distinct design documents: - - _design/syncs/_update/state - _design/syncs/_view/state - _design/syncs/_view/seen_ids - _design/syncs/_view/changes_to_return - - On server side, the ongoing syncs metadata is maintained in a document - called 'u1db_sync_state'. - """ - - def __init__(self, db, source_replica_uid, sync_id): - """ - Initialize the sync state object. - - :param db: The target syncing database. - :type db: CouchDatabase. - :param source_replica_uid: CouchDatabase - :type source_replica_uid: str - """ - self._db = db - self._source_replica_uid = source_replica_uid - self._sync_id = sync_id - - def _key(self, key): - """ - Format a key to be used on couch views. - - :param key: The lookup key. - :type key: json serializable object - - :return: The properly formatted key. - :rtype: str - """ - return json.dumps(key, separators=(',', ':')) - - def _put_info(self, key, value): - """ - Put some information on the sync state document. - - This method works in conjunction with the - _design/syncs/_update/state update handler couch backend. - - :param key: The key for the info to be put. - :type key: str - :param value: The value for the info to be put. - :type value: str - """ - ddoc_path = [ - '_design', 'syncs', '_update', 'state', - 'u1db_sync_state'] - res = self._db._database.resource(*ddoc_path) - with CouchDatabase.sync_info_lock[self._db.replica_uid]: - res.put_json( - body={ - 'sync_id': self._sync_id, - 'source_replica_uid': self._source_replica_uid, - key: value, - }, - headers={'content-type': 'application/json'}) - - def put_seen_id(self, seen_id, gen): - """ - Put one seen id on the sync state document. - - :param seen_id: The doc_id of a document seen during sync. - :type seen_id: str - :param gen: The corresponding db generation for that document. - :type gen: int - """ - self._put_info( - 'seen_id', - [seen_id, gen]) - - def seen_ids(self): - """ - Return all document ids seen during the sync. - - :return: A list with doc ids seen during the sync. - :rtype: list - """ - ddoc_path = ['_design', 'syncs', '_view', 'seen_ids'] - resource = self._db._database.resource(*ddoc_path) - response = resource.get_json( - key=self._key([self._source_replica_uid, self._sync_id])) - data = response[2] - if data['rows']: - entry = data['rows'].pop() - return entry['value']['seen_ids'] - return [] - - def put_changes_to_return(self, gen, trans_id, changes_to_return): - """ - Put the calculated changes to return in the backend sync state - document. - - :param gen: The target database generation that will be synced. - :type gen: int - :param trans_id: The target database transaction id that will be - synced. - :type trans_id: str - :param changes_to_return: A list of tuples with the changes to be - returned during the sync process. - :type changes_to_return: list - """ - self._put_info( - 'changes_to_return', - { - 'gen': gen, - 'trans_id': trans_id, - 'changes_to_return': changes_to_return, - } - ) - - def sync_info(self): - """ - Return information about the current sync state. - - :return: The generation and transaction id of the target database - which will be synced, and the number of documents to return, - or a tuple of Nones if those have not already been sent to - server. - :rtype: tuple - """ - ddoc_path = ['_design', 'syncs', '_view', 'state'] - resource = self._db._database.resource(*ddoc_path) - response = resource.get_json( - key=self._key([self._source_replica_uid, self._sync_id])) - data = response[2] - gen = None - trans_id = None - number_of_changes = None - if data['rows'] and data['rows'][0]['value'] is not None: - value = data['rows'][0]['value'] - gen = value['gen'] - trans_id = value['trans_id'] - number_of_changes = value['number_of_changes'] - return gen, trans_id, number_of_changes - - def next_change_to_return(self, received): - """ - Return the next change to be returned to the source syncing replica. - - :param received: How many documents the source replica has already - received during the current sync process. - :type received: int - """ - ddoc_path = ['_design', 'syncs', '_view', 'changes_to_return'] - resource = self._db._database.resource(*ddoc_path) - response = resource.get_json( - key=self._key( - [self._source_replica_uid, self._sync_id, received])) - data = response[2] - if not data['rows']: - return None, None, None - value = data['rows'][0]['value'] - gen = value['gen'] - trans_id = value['trans_id'] - next_change_to_return = value['next_change_to_return'] - return gen, trans_id, tuple(next_change_to_return) - - class SyncExchange(sync.SyncExchange): def __init__(self, db, source_replica_uid, last_known_generation, sync_id): @@ -216,8 +48,7 @@ class SyncExchange(sync.SyncExchange): self.new_trans_id = None self._trace_hook = None # recover sync state - self._sync_state = ServerSyncState( - self._db, self.source_replica_uid, sync_id) + self._sync_state = ServerSyncState(self.source_replica_uid, sync_id) def find_changes_to_return(self, received): """ -- cgit v1.2.3 From f9faa007bc0d5d681f85aa246ea05ec5fe35ccc5 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 6 Sep 2015 05:39:05 -0300 Subject: [feat] adds caching for other gen and trans id There are two functions in couch.py used to save and retrieve the last know gen and trans id for the syncing replica. The get function is called very often, but is only set on one point. Added a simple caching to avoid queying couch for a value that we already have. If cache is empty, it just query as usual and fills it. --- server/src/leap/soledad/server/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'server') diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 1b795016..58f2b0b1 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -111,6 +111,7 @@ from leap.soledad.server.sync import ( from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common.couch import CouchServerState +from leap.soledad.server.caching import get_cache_for old_tsafe = tsafe @@ -303,7 +304,8 @@ def load_configuration(file_path): def application(environ, start_response): conf = load_configuration('/etc/leap/soledad-server.conf') - state = CouchServerState(conf['couch_url']) + cache = get_cache_for('replica_cache') + state = CouchServerState(conf['couch_url'], cache=cache) # WSGI application that may be used by `twistd -web` application = GzipMiddleware( SoledadTokenAuthMiddleware(SoledadApp(state))) -- cgit v1.2.3 From 14300959a12e4908df7b00281d2590e627a47ba9 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 7 Sep 2015 03:56:02 -0300 Subject: [feat] full caching for each sync_id session The CouchDB backend implementation was accessing CouchDB too many times for the same values. Those values are known inside the same sync_id, which is the id of current sync session. This commit adds caching for all redundant calls to Couch inside the same sync_id for each replica. Refactoring is still needed, but for now couch.py works normally as if caching is not present, while sync.py injects the cache as a attribute to enable it. This needs a simpler implementation. --- server/src/leap/soledad/server/__init__.py | 4 +--- server/src/leap/soledad/server/sync.py | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'server') diff --git a/server/src/leap/soledad/server/__init__.py b/server/src/leap/soledad/server/__init__.py index 58f2b0b1..1b795016 100644 --- a/server/src/leap/soledad/server/__init__.py +++ b/server/src/leap/soledad/server/__init__.py @@ -111,7 +111,6 @@ from leap.soledad.server.sync import ( from leap.soledad.common import SHARED_DB_NAME from leap.soledad.common.couch import CouchServerState -from leap.soledad.server.caching import get_cache_for old_tsafe = tsafe @@ -304,8 +303,7 @@ def load_configuration(file_path): def application(environ, start_response): conf = load_configuration('/etc/leap/soledad-server.conf') - cache = get_cache_for('replica_cache') - state = CouchServerState(conf['couch_url'], cache=cache) + state = CouchServerState(conf['couch_url']) # WSGI application that may be used by `twistd -web` application = GzipMiddleware( SoledadTokenAuthMiddleware(SoledadApp(state))) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 64f7e4e7..262b6769 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -20,6 +20,7 @@ Server side synchronization infrastructure. from u1db import sync, Document from u1db.remote import http_app from leap.soledad.server.state import ServerSyncState +from leap.soledad.server.caching import get_cache_for MAX_REQUEST_SIZE = 200 # in Mb @@ -188,6 +189,8 @@ class SyncResource(http_app.SyncResource): db, self.replica_uid = self.state.ensure_database(self.dbname) else: db = self.state.open_database(self.dbname) + cache = get_cache_for('db-' + sync_id + db.replica_uid) + db._cache = cache # validate the information the client has about server replica db.validate_gen_and_trans_id( last_known_generation, last_known_trans_id) -- cgit v1.2.3 From 0e954f3328b7b8c31c88e0bee796230e87bca829 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sun, 20 Sep 2015 17:47:07 -0300 Subject: [feat] adds cache expiration Now each backend object will be retrieved from cache for sync.py and values will live for 3600 by default. That is changed via parameter if needed. --- server/src/leap/soledad/server/caching.py | 4 ++-- server/src/leap/soledad/server/sync.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'server') diff --git a/server/src/leap/soledad/server/caching.py b/server/src/leap/soledad/server/caching.py index cd5d8dd4..9a049a39 100644 --- a/server/src/leap/soledad/server/caching.py +++ b/server/src/leap/soledad/server/caching.py @@ -28,5 +28,5 @@ def setup_caching(): _cache_manager = setup_caching() -def get_cache_for(key): - return _cache_manager.get_cache(key) +def get_cache_for(key, expire=3600): + return _cache_manager.get_cache(key, expire=expire) diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 262b6769..e4fd1260 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -185,12 +185,15 @@ class SyncResource(http_app.SyncResource): :type ensure: bool """ # create or open the database + cache = get_cache_for('db-' + sync_id + self.dbname) if ensure: db, self.replica_uid = self.state.ensure_database(self.dbname) + elif cache and 'instance' in cache: + db = cache['instance'] else: db = self.state.open_database(self.dbname) - cache = get_cache_for('db-' + sync_id + db.replica_uid) db._cache = cache + cache['instance'] = db # validate the information the client has about server replica db.validate_gen_and_trans_id( last_known_generation, last_known_trans_id) -- cgit v1.2.3 From df8c794395ee695ea1dfb30603a073b32b24ee58 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 21 Sep 2015 17:19:18 -0300 Subject: [refactor] init_caching instead of setting attr As meskio found commented, setting this attribute directly is ugly, CouchDatabase now has a init_caching method for setting up cache instance. --- server/src/leap/soledad/server/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'server') diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index e4fd1260..619be565 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -192,7 +192,7 @@ class SyncResource(http_app.SyncResource): db = cache['instance'] else: db = self.state.open_database(self.dbname) - db._cache = cache + db.init_caching(cache) cache['instance'] = db # validate the information the client has about server replica db.validate_gen_and_trans_id( -- cgit v1.2.3 From 5b20b469f22ab99533135beefd49129db49064e5 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 23 Sep 2015 15:43:06 -0300 Subject: [style] pep8 --- server/src/leap/soledad/server/state.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'server') diff --git a/server/src/leap/soledad/server/state.py b/server/src/leap/soledad/server/state.py index d1225170..f269b77e 100644 --- a/server/src/leap/soledad/server/state.py +++ b/server/src/leap/soledad/server/state.py @@ -24,7 +24,8 @@ class ServerSyncState(object): """ The state of one sync session, as stored on backend server. - On server side, the ongoing syncs metadata is maintained in a caching layer. + On server side, the ongoing syncs metadata is maintained in + a caching layer. """ def __init__(self, source_replica_uid, sync_id): @@ -38,7 +39,8 @@ class ServerSyncState(object): """ self._source_replica_uid = source_replica_uid self._sync_id = sync_id - self._storage = caching.get_cache_for(source_replica_uid + sync_id) + caching_key = source_replica_uid + sync_id + self._storage = caching.get_cache_for(caching_key) def _put_dict_info(self, key, value): """ @@ -61,7 +63,7 @@ class ServerSyncState(object): :param seen_id: The doc_id of a document seen during sync. :type seen_id: str - :param gen: The corresponding db generation for that document. + :param gen: The corresponding db generation. :type gen: int """ self._put_dict_info( @@ -113,12 +115,9 @@ class ServerSyncState(object): server. :rtype: tuple """ - info = self._storage.get('changes_to_return') if 'changes_to_return' in self._storage else None - gen = None - trans_id = None - number_of_changes = None - if info: - info = info[0] + gen = trans_id = number_of_changes = None + if 'changes_to_return' in self._storage: + info = self._storage.get('changes_to_return')[0] gen = info['gen'] trans_id = info['trans_id'] number_of_changes = len(info['changes_to_return']) @@ -132,12 +131,11 @@ class ServerSyncState(object): received during the current sync process. :type received: int """ - info = self._storage.get('changes_to_return') if 'changes_to_return' in self._storage else None gen = trans_id = next_change_to_return = None - if info: - info = info[0] + if 'changes_to_return' in self._storage: + info = self._storage.get('changes_to_return')[0] gen = info['gen'] trans_id = info['trans_id'] if received < len(info['changes_to_return']): - next_change_to_return = tuple(info['changes_to_return'][received]) + next_change_to_return = (info['changes_to_return'][received]) return gen, trans_id, next_change_to_return -- cgit v1.2.3