From fe0628eaccfa71ac20b0878dbc01f48e1a209d2d Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 3 Jun 2014 13:34:47 -0300 Subject: Use a fresh resource for multipart puts (#5739). --- common/src/leap/soledad/common/couch.py | 45 ++++++++++++++------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 3bc1f543..b51b32f3 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -40,7 +40,9 @@ from couchdb.http import ( ResourceConflict, ResourceNotFound, ServerError, - Session as CouchHTTPSession, + Session, + urljoin as couch_urljoin, + Resource, ) from u1db import query_parser, vectorclock from u1db.errors import ( @@ -333,17 +335,6 @@ class MultipartWriter(object): self.headers[name] = value -class Session(CouchHTTPSession): - """ - An HTTP session that can be closed. - """ - - def close_connections(self): - for key, conns in list(self.conns.items()): - for conn in conns: - conn.close() - - @contextmanager def couch_server(url): """ @@ -359,7 +350,6 @@ def couch_server(url): session = Session(timeout=COUCH_TIMEOUT) server = Server(url=url, session=session) yield server - session.close_connections() class CouchDatabase(CommonBackend): @@ -511,7 +501,6 @@ class CouchDatabase(CommonBackend): """ with couch_server(self._url) as server: del(server[self._dbname]) - self.close_connections() def close(self): """ @@ -520,20 +509,12 @@ class CouchDatabase(CommonBackend): :return: True if db was succesfully closed. :rtype: bool """ - self.close_connections() self._url = None self._full_commit = None self._session = None self._database = None return True - def close_connections(self): - """ - Close all open connections to the couch server. - """ - if self._session is not None: - self._session.close_connections() - def __del__(self): """ Close the database upon garbage collection. @@ -897,11 +878,9 @@ class CouchDatabase(CommonBackend): envelope.close() # try to save and fail if there's a revision conflict try: - self._database.resource.put_json( + resource = self._new_resource() + resource.put_json( doc.doc_id, body=buf.getvalue(), headers=envelope.headers) - # What follows is a workaround for an ugly bug. See: - # https://leap.se/code/issues/5448 - self.close_connections() except ResourceConflict: raise RevisionConflict() @@ -1473,6 +1452,20 @@ class CouchDatabase(CommonBackend): continue yield t._doc + def _new_resource(self, *path): + """ + Return a new resource for accessing a couch database. + + :return: A resource for accessing a couch database. + :rtype: couchdb.http.Resource + """ + # Workaround for: https://leap.se/code/issues/5448 + url = couch_urljoin(self._database.resource.url, *path) + resource = Resource(url, Session(timeout=COUCH_TIMEOUT)) + resource.credentials = self._database.resource.credentials + resource.headers = self._database.resource.headers.copy() + return resource + class CouchSyncTarget(CommonSyncTarget): """ -- cgit v1.2.3 From fec4e0624d3bb80d19d106cf84d20a71d58623bc Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 3 Jun 2014 13:37:56 -0300 Subject: Fix package build script. --- scripts/build_debian_package.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_debian_package.sh b/scripts/build_debian_package.sh index cc62c3ac..1ec9b00a 100755 --- a/scripts/build_debian_package.sh +++ b/scripts/build_debian_package.sh @@ -26,7 +26,7 @@ export GIT_DIR=${workdir}/soledad/.git export GIT_WORK_TREE=${workdir}/soledad git remote add leapcode ${SOLEDAD_MAIN_REPO} git fetch leapcode -git checkout debian +git checkout -b debian leapcode/debian git merge --no-edit ${branch} (cd ${workdir}/soledad && debuild -uc -us) echo "Packages generated in ${workdir}" -- cgit v1.2.3 From 9bf2160ca7ecc0e10aac7f85cdc4967696a03042 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 3 Jun 2014 13:49:30 -0300 Subject: Add password option to client db script. --- scripts/db_access/client_side_db.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py index 2bf4ab5e..9aadd5fe 100644 --- a/scripts/db_access/client_side_db.py +++ b/scripts/db_access/client_side_db.py @@ -118,30 +118,37 @@ def get_soledad_instance(username, provider, passphrase, basedir): auth_token=token) +class ValidateUserHandle(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') + res = m.match(values) + if res == None: + parser.error('User handle should have the form user@provider.') + setattr(namespace, 'username', res.groups()[0]) + setattr(namespace, 'provider', res.groups()[1]) + + # main program if __name__ == '__main__': - class ValidateUserHandle(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') - res = m.match(values) - if res == None: - parser.error('User handle should have the form user@provider.') - setattr(namespace, 'username', res.groups()[0]) - setattr(namespace, 'provider', res.groups()[1]) - # parse command line parser = argparse.ArgumentParser() parser.add_argument( 'user@provider', action=ValidateUserHandle, help='the user handle') parser.add_argument( - '-b', dest='basedir', required=False, default=None, help='the user handle') + '-b', dest='basedir', required=False, default=None, + help='soledad base directory') + parser.add_argument( + '-p', dest='passphrase', required=False, default=None, + help='the user passphrase') args = parser.parse_args() # get the password - passphrase = getpass.getpass( - 'Password for %s@%s: ' % (args.username, args.provider)) + passphrase = args.passphrase + if passphrase is None: + passphrase = getpass.getpass( + 'Password for %s@%s: ' % (args.username, args.provider)) # get the basedir basedir = args.basedir -- cgit v1.2.3 From a0cbdc8bbba4369c20bb5b285e464c23d6954e17 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 3 Jun 2014 16:13:22 -0300 Subject: Remove close_connections() from tests. --- common/src/leap/soledad/common/tests/test_couch.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index a1fa9568..3b1e5a06 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -219,7 +219,6 @@ def copy_couch_database_for_test(test, db): new_couch_db.put_attachment(new_doc, att, filename=att_name) # cleanup connections to prevent file descriptor leaking - session.close_connections() return new_db @@ -253,7 +252,6 @@ class CouchTests(test_backends.AllDatabaseTests, CouchDBTestCase): session = couch.Session() server = Server(url=self._url, session=session) del(server[self._dbname]) - session.close_connections() else: self.db.delete_database() test_backends.AllDatabaseTests.tearDown(self) -- cgit v1.2.3 From 7d9d827a5f66993863ca0c532c01ad3bf2c4353e Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 3 Jun 2014 18:43:56 -0300 Subject: Replace client sync state by a sync_id. --- client/src/leap/soledad/client/sqlcipher.py | 42 +---- client/src/leap/soledad/client/sync.py | 176 +++------------------ client/src/leap/soledad/client/target.py | 56 +++---- .../soledad/common/ddocs/syncs/updates/state.js | 10 +- .../ddocs/syncs/views/changes_to_return/map.js | 7 +- .../common/ddocs/syncs/views/seen_ids/map.js | 6 +- .../soledad/common/ddocs/syncs/views/state/map.js | 5 +- server/src/leap/soledad/server/sync.py | 28 ++-- 8 files changed, 84 insertions(+), 246 deletions(-) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 74351116..26238af6 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -57,7 +57,7 @@ from pysqlcipher import dbapi2 from u1db.backends import sqlite_backend from u1db import errors as u1db_errors -from leap.soledad.client.sync import Synchronizer, ClientSyncState +from leap.soledad.client.sync import Synchronizer from leap.soledad.client.target import SoledadSyncTarget from leap.soledad.common.document import SoledadDocument @@ -889,45 +889,5 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): if self._db_handle is not None: self._db_handle.close() - def _get_stored_sync_state(self): - """ - Retrieve the currently stored sync state. - - :return: The current stored sync state or None if there's no stored - state. - :rtype: dict or None - """ - c = self._db_handle.cursor() - c.execute("SELECT value FROM u1db_config" - " WHERE name = 'sync_state'") - val = c.fetchone() - if val is None: - return None - return json.loads(val[0]) - - def _set_stored_sync_state(self, state): - """ - Stored the sync state. - - :param state: The sync state to be stored or None to delete any stored - state. - :type state: dict or None - """ - c = self._db_handle.cursor() - if state is None: - c.execute("DELETE FROM u1db_config" - " WHERE name = 'sync_state'") - else: - c.execute("INSERT OR REPLACE INTO u1db_config" - " VALUES ('sync_state', ?)", - (json.dumps(state),)) - - stored_sync_state = property( - _get_stored_sync_state, _set_stored_sync_state, - doc="The current sync state dict.") - - @property - def sync_state(self): - return ClientSyncState(self) sqlite_backend.SQLiteDatabase.register_implementation(SQLCipherDatabase) diff --git a/client/src/leap/soledad/client/sync.py b/client/src/leap/soledad/client/sync.py index 5285d540..56e63416 100644 --- a/client/src/leap/soledad/client/sync.py +++ b/client/src/leap/soledad/client/sync.py @@ -27,103 +27,6 @@ from u1db import errors from u1db.sync import Synchronizer as U1DBSynchronizer -class ClientSyncState(object): - """ - The state of the current sync session, as stored on the client. - """ - - _private_attrs = [ - '_db', - ] - - _public_attrs = { - 'target_replica_uid': None, - 'target_gen': None, - 'target_trans_id': None, - 'target_my_gen': None, - 'target_my_trans_id': None, - 'target_last_known_gen': None, - 'target_last_known_trans_id': None, - 'my_gen': None, - 'changes': None, - 'sent': 0, - 'received': 0, - } - - @property - def _public_attr_keys(self): - return self._public_attrs.keys() - - def __init__(self, db=None): - """ - Initialize the client sync state. - - :param db: The database where to fetch/store the sync state. - :type db: SQLCipherDatabase - """ - self._db = db - self._init_state() - - def __setattr__(self, attr, val): - """ - Prevent setting arbitrary attributes. - - :param attr: The attribute name. - :type attr: str - :param val: The value to be set. - :type val: anything - """ - if attr not in self._public_attr_keys + self._private_attrs: - raise Exception - object.__setattr__(self, attr, val) - - def _init_state(self): - """ - Initialize current sync state, potentially fetching sync info stored - in database. - """ - # set local default attributes - for attr in self._public_attr_keys: - setattr(self, attr, self._public_attrs[attr]) - # fetch info from stored sync state - sync_state_dict = None - if self._db is not None: - sync_state_dict = self._db.stored_sync_state - if sync_state_dict is not None: - for attr in self._public_attr_keys: - setattr(self, attr, sync_state_dict[attr]) - - def save(self): - """ - Save the current sync state in the database. - """ - sync_state_dict = {} - for attr in self._public_attr_keys: - sync_state_dict[attr] = getattr(self, attr) - if self._db is not None: - self._db.stored_sync_state = sync_state_dict - - def clear(self): - """ - Clear the sync state info data. - """ - if self._db is not None: - self._db.stored_sync_state = None - self._init_state() - - def has_stored_info(self): - """ - Return whether there is any sync state info stored on the database. - - :return: Whether there's any sync state info store on db. - :rtype: bool - """ - return self._db is not None and self._db.stored_sync_state is not None - - def __str__(self): - return 'ClientSyncState: %s' % ', '.join( - ['%s: %s' % (k, getattr(self, k)) for k in self._public_attr_keys]) - class Synchronizer(U1DBSynchronizer): """ Collect the state around synchronizing 2 U1DB replicas. @@ -146,36 +49,20 @@ class Synchronizer(U1DBSynchronizer): """ sync_target = self.sync_target - # recover current sync state from source database - sync_state = self.source.sync_state - self.target_replica_uid = sync_state.target_replica_uid - target_gen = sync_state.target_gen - target_trans_id = sync_state.target_trans_id - target_my_gen = sync_state.target_my_gen - target_my_trans_id = sync_state.target_my_trans_id - target_last_known_gen = sync_state.target_last_known_gen - target_last_known_trans_id = \ - sync_state.target_last_known_trans_id - my_gen = sync_state.my_gen - changes = sync_state.changes - sent = sync_state.sent - received = sync_state.received - # get target identifier, its current generation, # and its last-seen database generation for this source ensure_callback = None - if not sync_state.has_stored_info(): - try: - (self.target_replica_uid, target_gen, target_trans_id, - target_my_gen, target_my_trans_id) = \ - sync_target.get_sync_info(self.source._replica_uid) - except errors.DatabaseDoesNotExist: - if not autocreate: - raise - # will try to ask sync_exchange() to create the db - self.target_replica_uid = None - target_gen, target_trans_id = (0, '') - target_my_gen, target_my_trans_id = (0, '') + try: + (self.target_replica_uid, target_gen, target_trans_id, + target_my_gen, target_my_trans_id) = \ + sync_target.get_sync_info(self.source._replica_uid) + except errors.DatabaseDoesNotExist: + if not autocreate: + raise + # will try to ask sync_exchange() to create the db + self.target_replica_uid = None + target_gen, target_trans_id = (0, '') + target_my_gen, target_my_trans_id = (0, '') # make sure we'll have access to target replica uid once it exists if self.target_replica_uid is None: @@ -192,17 +79,15 @@ class Synchronizer(U1DBSynchronizer): target_my_gen, target_my_trans_id) # what's changed since that generation and this current gen - if not sync_state.has_stored_info(): - my_gen, _, changes = self.source.whats_changed(target_my_gen) + my_gen, _, changes = self.source.whats_changed(target_my_gen) # get source last-seen database generation for the target - if not sync_state.has_stored_info(): - if self.target_replica_uid is None: - target_last_known_gen, target_last_known_trans_id = 0, '' - else: - target_last_known_gen, target_last_known_trans_id = \ - self.source._get_replica_gen_and_trans_id( - self.target_replica_uid) + if self.target_replica_uid is None: + target_last_known_gen, target_last_known_trans_id = 0, '' + else: + target_last_known_gen, target_last_known_trans_id = \ + self.source._get_replica_gen_and_trans_id( + self.target_replica_uid) # validate transaction ids if not changes and target_last_known_gen == target_gen: @@ -220,20 +105,6 @@ class Synchronizer(U1DBSynchronizer): _, gen, trans = changes[idx] docs_by_generation.append((doc, gen, trans)) idx += 1 - # store current sync state info - if not sync_state.has_stored_info(): - sync_state.target_replica_uid = self.target_replica_uid - sync_state.target_gen = target_gen - sync_state.target_trans_id = target_trans_id - sync_state.target_my_gen = target_my_gen - sync_state.target_my_trans_id = target_my_trans_id - sync_state.my_gen = my_gen - sync_state.changes = changes - sync_state.target_last_known_trans_id = \ - target_last_known_trans_id - sync_state.target_last_known_gen = target_last_known_gen - sync_state.sent = sent = 0 - sync_state.received = received = 0 # exchange documents and try to insert the returned ones with # the target, return target synced-up-to gen. @@ -243,16 +114,7 @@ class Synchronizer(U1DBSynchronizer): new_gen, new_trans_id = sync_target.sync_exchange( docs_by_generation, self.source._replica_uid, target_last_known_gen, target_last_known_trans_id, - self._insert_doc_from_target, ensure_callback=ensure_callback, - sync_state=sync_state) - - # save sync state info if the sync was interrupted - if new_gen is None and new_trans_id is None: - sync_state.save() - return my_gen - - # sync exchange was succesfull, remove sync state info from source - sync_state.clear() + self._insert_doc_from_target, ensure_callback=ensure_callback) # record target synced-up-to generation including applying what we sent self.source._set_replica_gen_and_trans_id( diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 93de98d3..8f753f74 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -63,7 +63,6 @@ from leap.soledad.client.events import ( SOLEDAD_SYNC_RECEIVE_STATUS, signal, ) -from leap.soledad.client.sync import ClientSyncState logger = logging.getLogger(__name__) @@ -321,7 +320,6 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): HTTPSyncTarget.__init__(self, url, creds) self._crypto = crypto self._stopped = True - self._sync_state = None self._stop_lock = threading.Lock() def _init_post_request(self, url, action, headers, content_length): @@ -347,7 +345,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): self._conn.endheaders() def _get_remote_docs(self, url, last_known_generation, last_known_trans_id, - headers, return_doc_cb, ensure_callback=None): + headers, return_doc_cb, ensure_callback, sync_id): """ Fetch sync documents from the remote database and insert them in the local database. @@ -377,7 +375,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :rtype: list of str """ - def _post_get_doc(): + def _post_get_doc(received): """ Get a sync document from server by means of a POST request. """ @@ -388,10 +386,11 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): '', entries, last_known_generation=last_known_generation, last_known_trans_id=last_known_trans_id, + sync_id=sync_id, ensure=ensure_callback is not None) # inform server of how many documents have already been received size += self._prepare( - ',', entries, received=self._sync_state.received) + ',', entries, received=received) entries.append('\r\n]') size += len(entries[-1]) # send headers @@ -402,14 +401,16 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): return self._response() number_of_changes = None + received = 0 - while number_of_changes is None or \ - self._sync_state.received < number_of_changes: + new_generation = last_known_generation + new_transaction_id = last_known_trans_id + while number_of_changes is None or received < number_of_changes: # bail out if sync process was interrupted if self.stopped is True: - return None, None + return last_known_generation, last_known_trans_id # try to fetch one document from target - data, _ = _post_get_doc() + data, _ = _post_get_doc(received) # decode incoming stream parts = data.splitlines() if not parts or parts[0] != '[' or parts[-1] != ']': @@ -424,6 +425,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): soledad_assert('new_generation' in metadata) soledad_assert('new_transaction_id' in metadata) number_of_changes = metadata['number_of_changes'] + new_generation = metadata['new_generation'] + new_transaction_id = metadata['new_transaction_id'] except json.JSONDecodeError, AssertionError: raise BrokenSyncStream # make sure we have replica_uid from fresh new dbs @@ -453,12 +456,12 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # end of symmetric decryption # ------------------------------------------------------------- return_doc_cb(doc, entry['gen'], entry['trans_id']) - self._sync_state.received += 1 + received += 1 signal( SOLEDAD_SYNC_RECEIVE_STATUS, "%d/%d" % - (self._sync_state.received, number_of_changes)) - return metadata['new_generation'], metadata['new_transaction_id'] + (received, number_of_changes)) + return new_generation, new_transaction_id def _request(self, method, url_parts, params=None, body=None, content_type=None): @@ -566,8 +569,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): def sync_exchange(self, docs_by_generations, source_replica_uid, last_known_generation, last_known_trans_id, - return_doc_cb, ensure_callback=None, - sync_state=None): + return_doc_cb, ensure_callback=None): """ Find out which documents the remote database does not know about, encrypt and send them. @@ -596,12 +598,8 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): :return: The new generation and transaction id of the target replica. :rtype: tuple """ - # get the sync state information from client - self._sync_state = sync_state - if self._sync_state is None: - self._sync_state = ClientSyncState() - self.start() + sync_id = str(uuid4()) self._ensure_connection() if self._trace_hook: # for tests @@ -610,7 +608,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): headers = self._sign_request('POST', url, {}) def _post_put_doc(headers, last_known_generation, last_known_trans_id, - id, rev, content, gen, trans_id): + id, rev, content, gen, trans_id, sync_id): """ Put a sync document on server by means of a POST request. @@ -626,6 +624,7 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): '', entries, last_known_generation=last_known_generation, last_known_trans_id=last_known_trans_id, + sync_id=sync_id, ensure=ensure_callback is not None) # add the document to the request size += self._prepare( @@ -645,11 +644,11 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): cur_target_gen = last_known_generation cur_target_trans_id = last_known_trans_id - # skip docs that were already sent - if self._sync_state.sent > 0: - docs_by_generations = docs_by_generations[self._sync_state.sent:] - # send docs + sent = 0 + signal( + SOLEDAD_SYNC_SEND_STATUS, + "%d/%d" % (0, len(docs_by_generations))) for doc, gen, trans_id in docs_by_generations: # allow for interrupting the sync process if self.stopped is True: @@ -668,17 +667,18 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): # ------------------------------------------------------------- cur_target_gen, cur_target_trans_id = _post_put_doc( headers, cur_target_gen, cur_target_trans_id, id=doc.doc_id, - rev=doc.rev, content=doc_json, gen=gen, trans_id=trans_id) - self._sync_state.sent += 1 + rev=doc.rev, content=doc_json, gen=gen, trans_id=trans_id, + sync_id=sync_id) + sent += 1 signal( SOLEDAD_SYNC_SEND_STATUS, - "%d/%d" % (self._sync_state.sent, len(docs_by_generations))) + "%d/%d" % (sent, len(docs_by_generations))) # get docs from target cur_target_gen, cur_target_trans_id = self._get_remote_docs( url, last_known_generation, last_known_trans_id, headers, - return_doc_cb, ensure_callback) + return_doc_cb, ensure_callback, sync_id) self.stop() return cur_target_gen, cur_target_trans_id diff --git a/common/src/leap/soledad/common/ddocs/syncs/updates/state.js b/common/src/leap/soledad/common/ddocs/syncs/updates/state.js index cb2b6b7b..d62aeb40 100644 --- a/common/src/leap/soledad/common/ddocs/syncs/updates/state.js +++ b/common/src/leap/soledad/common/ddocs/syncs/updates/state.js @@ -29,6 +29,7 @@ * '_rev' '', * 'ongoing_syncs': { * '': { + * 'sync_id': '', * 'seen_ids': [['', [, ...], * 'changes_to_return': { * 'gen': , @@ -59,17 +60,22 @@ function(doc, req) { // parse and validate incoming data var body = JSON.parse(req.body); if (body['source_replica_uid'] == null) - return [null, 'invalid data'] + return [null, 'invalid data']; var source_replica_uid = body['source_replica_uid']; + if (body['sync_id'] == null) + return [null, 'invalid data']; + var sync_id = body['sync_id']; + // trash outdated sync data for that replica if that exists if (doc['ongoing_syncs'][source_replica_uid] != null && - doc['ongoing_syncs'][source_replica_uid] == null) + doc['ongoing_syncs'][source_replica_uid]['sync_id'] != sync_id) delete doc['ongoing_syncs'][source_replica_uid]; // create an entry for that source replica if (doc['ongoing_syncs'][source_replica_uid] == null) doc['ongoing_syncs'][source_replica_uid] = { + 'sync_id': sync_id, 'seen_ids': {}, 'changes_to_return': null, }; diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js index 04ceb2ec..94b7e767 100644 --- a/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js +++ b/common/src/leap/soledad/common/ddocs/syncs/views/changes_to_return/map.js @@ -2,14 +2,15 @@ function(doc) { if (doc['_id'] == 'u1db_sync_state' && doc['ongoing_syncs'] != null) for (var source_replica_uid in doc['ongoing_syncs']) { var changes = doc['ongoing_syncs'][source_replica_uid]['changes_to_return']; + var sync_id = doc['ongoing_syncs'][source_replica_uid]['sync_id']; if (changes == null) - emit([source_replica_uid, 0], null); + emit([source_replica_uid, sync_id, 0], null); else if (changes.length == 0) - emit([source_replica_uid, 0], []); + emit([source_replica_uid, sync_id, 0], []); else for (var i = 0; i < changes['changes_to_return'].length; i++) emit( - [source_replica_uid, i], + [source_replica_uid, sync_id, i], { 'gen': changes['gen'], 'trans_id': changes['trans_id'], diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js index 34c65b3f..16118e88 100644 --- a/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js +++ b/common/src/leap/soledad/common/ddocs/syncs/views/seen_ids/map.js @@ -1,9 +1,11 @@ function(doc) { if (doc['_id'] == 'u1db_sync_state' && doc['ongoing_syncs'] != null) - for (var source_replica_uid in doc['ongoing_syncs']) + for (var source_replica_uid in doc['ongoing_syncs']) { + var sync_id = doc['ongoing_syncs'][source_replica_uid]['sync_id']; emit( - source_replica_uid, + [source_replica_uid, sync_id], { 'seen_ids': doc['ongoing_syncs'][source_replica_uid]['seen_ids'], }); + } } diff --git a/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js b/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js index 1d8f8e84..e88c6ebb 100644 --- a/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js +++ b/common/src/leap/soledad/common/ddocs/syncs/views/state/map.js @@ -2,11 +2,12 @@ function(doc) { if (doc['_id'] == 'u1db_sync_state' && doc['ongoing_syncs'] != null) for (var source_replica_uid in doc['ongoing_syncs']) { var changes = doc['ongoing_syncs'][source_replica_uid]['changes_to_return']; + var sync_id = doc['ongoing_syncs'][source_replica_uid]['sync_id']; if (changes == null) - emit(source_replica_uid, null); + emit([source_replica_uid, sync_id], null); else emit( - source_replica_uid, + [source_replica_uid, sync_id], { 'gen': changes['gen'], 'trans_id': changes['trans_id'], diff --git a/server/src/leap/soledad/server/sync.py b/server/src/leap/soledad/server/sync.py index 16926f14..c6928aaa 100644 --- a/server/src/leap/soledad/server/sync.py +++ b/server/src/leap/soledad/server/sync.py @@ -48,7 +48,7 @@ class ServerSyncState(object): called 'u1db_sync_state'. """ - def __init__(self, db, source_replica_uid): + def __init__(self, db, source_replica_uid, sync_id): """ Initialize the sync state object. @@ -59,6 +59,7 @@ class ServerSyncState(object): """ self._db = db self._source_replica_uid = source_replica_uid + self._sync_id = sync_id def _key(self, key): """ @@ -91,6 +92,7 @@ class ServerSyncState(object): with CouchDatabase.sync_info_lock[self._db.replica_uid]: res.put_json( body={ + 'sync_id': self._sync_id, 'source_replica_uid': self._source_replica_uid, key: value, }, @@ -118,7 +120,8 @@ class ServerSyncState(object): """ ddoc_path = ['_design', 'syncs', '_view', 'seen_ids'] resource = self._db._database.resource(*ddoc_path) - response = resource.get_json(key=self._key(self._source_replica_uid)) + response = resource.get_json( + key=self._key([self._source_replica_uid, self._sync_id])) data = response[2] if data['rows']: entry = data['rows'].pop() @@ -160,7 +163,8 @@ class ServerSyncState(object): """ ddoc_path = ['_design', 'syncs', '_view', 'state'] resource = self._db._database.resource(*ddoc_path) - response = resource.get_json(key=self._key(self._source_replica_uid)) + response = resource.get_json( + key=self._key([self._source_replica_uid, self._sync_id])) data = response[2] gen = None trans_id = None @@ -184,7 +188,7 @@ class ServerSyncState(object): resource = self._db._database.resource(*ddoc_path) response = resource.get_json( key=self._key( - [self._source_replica_uid, received])) + [self._source_replica_uid, self._sync_id, received])) data = response[2] if not data['rows']: return None, None, None @@ -197,7 +201,7 @@ class ServerSyncState(object): class SyncExchange(sync.SyncExchange): - def __init__(self, db, source_replica_uid, last_known_generation): + def __init__(self, db, source_replica_uid, last_known_generation, sync_id): """ :param db: The target syncing database. :type db: CouchDatabase @@ -210,11 +214,13 @@ class SyncExchange(sync.SyncExchange): self._db = db self.source_replica_uid = source_replica_uid self.source_last_known_generation = last_known_generation + self.sync_id = sync_id self.new_gen = None self.new_trans_id = None self._trace_hook = None # recover sync state - self._sync_state = ServerSyncState(self._db, self.source_replica_uid) + self._sync_state = ServerSyncState( + self._db, self.source_replica_uid, sync_id) def find_changes_to_return(self, received): @@ -322,9 +328,9 @@ class SyncResource(http_app.SyncResource): @http_app.http_method( last_known_generation=int, last_known_trans_id=http_app.none_or_str, - content_as_args=True) + sync_id=http_app.none_or_str, content_as_args=True) def post_args(self, last_known_generation, last_known_trans_id=None, - ensure=False): + sync_id=None, ensure=False): """ Handle the initial arguments for the sync POST request from client. @@ -348,7 +354,7 @@ class SyncResource(http_app.SyncResource): last_known_generation, last_known_trans_id) # get a sync exchange object self.sync_exch = self.sync_exchange_class( - db, self.source_replica_uid, last_known_generation) + db, self.source_replica_uid, last_known_generation, sync_id) @http_app.http_method(content_as_args=True) def post_put(self, id, rev, content, gen, trans_id): @@ -405,8 +411,8 @@ class SyncResource(http_app.SyncResource): def post_end(self): """ - Return the current generation and transaction_id after inserting a - series of incoming documents. + Return the current generation and transaction_id after inserting one + incoming document. """ self.responder.content_type = 'application/x-soledad-sync-response' self.responder.start_response(200) -- cgit v1.2.3 From a35176a298480676d16fe195971ed89b21a78357 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 9 Aug 2013 13:29:01 +0200 Subject: Make server auth time-insensitive. --- server/src/leap/soledad/server/auth.py | 16 ++++++++++++---- .../changes/feature_3399-check-auth-in-constant-way | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 soledad_server/changes/feature_3399-check-auth-in-constant-way diff --git a/server/src/leap/soledad/server/auth.py b/server/src/leap/soledad/server/auth.py index e9d2b032..57f600a1 100644 --- a/server/src/leap/soledad/server/auth.py +++ b/server/src/leap/soledad/server/auth.py @@ -30,6 +30,7 @@ from abc import ABCMeta, abstractmethod from routes.mapper import Mapper from couchdb.client import Server from twisted.python import log +from hashlib import sha512 from leap.soledad.common import ( @@ -415,10 +416,17 @@ class SoledadTokenAuthMiddleware(SoledadAuthMiddleware): server = Server(url=self._app.state.couch_url) dbname = self.TOKENS_DB db = server[dbname] - token = db.get(token) - if token is None or \ - token[self.TOKENS_TYPE_KEY] != self.TOKENS_TYPE_DEF or \ - token[self.TOKENS_USER_ID_KEY] != uuid: + # lookup key is a hash of the token to prevent timing attacks. + token = db.get(sha512(token).hexdigest()) + if token is None: + raise InvalidAuthTokenError() + # we compare uuid hashes to avoid possible timing attacks that + # might exploit python's builtin comparison operator behaviour, + # which fails immediatelly when non-matching bytes are found. + couch_uuid_hash = sha512(token[self.TOKENS_USER_ID_KEY]).digest() + req_uuid_hash = sha512(uuid).digest() + if token[self.TOKENS_TYPE_KEY] != self.TOKENS_TYPE_DEF \ + or couch_uuid_hash != req_uuid_hash: raise InvalidAuthTokenError() return True diff --git a/soledad_server/changes/feature_3399-check-auth-in-constant-way b/soledad_server/changes/feature_3399-check-auth-in-constant-way new file mode 100644 index 00000000..ebd18680 --- /dev/null +++ b/soledad_server/changes/feature_3399-check-auth-in-constant-way @@ -0,0 +1 @@ + o Authenticate in time-insensitive manner. Closes #3399. -- cgit v1.2.3 From ea2e14430bb26c51611d7c5267489d264caa9bf9 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 5 Jun 2014 10:01:57 -0300 Subject: Add changes file. --- common/changes/bug_5739_fix-multipart-problem | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 common/changes/bug_5739_fix-multipart-problem diff --git a/common/changes/bug_5739_fix-multipart-problem b/common/changes/bug_5739_fix-multipart-problem new file mode 100644 index 00000000..449e09b8 --- /dev/null +++ b/common/changes/bug_5739_fix-multipart-problem @@ -0,0 +1,2 @@ + o Use a dedicated HTTP resource for couch multipart PUTs to avoid bigcouch + bug (#5739). -- cgit v1.2.3 From 68443c469e224d93fc36c7c1014191e883edaf67 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 5 Jun 2014 10:11:13 -0300 Subject: Reset synchronizer state in order to reuse the same synchronizer multiple times. --- client/changes/bug_reset-synchronizer-state | 2 ++ client/src/leap/soledad/client/__init__.py | 1 + client/src/leap/soledad/client/sqlcipher.py | 4 ++++ 3 files changed, 7 insertions(+) create mode 100644 client/changes/bug_reset-synchronizer-state diff --git a/client/changes/bug_reset-synchronizer-state b/client/changes/bug_reset-synchronizer-state new file mode 100644 index 00000000..9678b36b --- /dev/null +++ b/client/changes/bug_reset-synchronizer-state @@ -0,0 +1,2 @@ + o Reset synchronizer state in order to reuse the same synchronizer object + multiple times. diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 2fb33184..656c0e77 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -1065,6 +1065,7 @@ class Soledad(object): """ if self._db: # acquire lock before attempt to sync + # TODO: move this lock to inside SQLCipherDatabase. with Soledad.syncing_lock[self._db._get_replica_uid()]: local_gen = self._db.sync( urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 26238af6..576b51ad 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -401,6 +401,10 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): creds=creds, crypto=self._crypto)) self._syncers[url] = (h, syncer) + # in order to reuse the same synchronizer multiple times we have to + # reset its state (i.e. the number of documents received from target + # and inserted in the local replica). + syncer.num_inserted = 0 return syncer def _extra_schema_init(self, c): -- cgit v1.2.3 From 08a2f350690b3a66212f3d4f63b24b20f682f88e Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 5 Jun 2014 10:21:29 -0300 Subject: Move the syncing lock to inside SQLCipherDatabase. --- client/src/leap/soledad/client/__init__.py | 11 ----------- client/src/leap/soledad/client/sqlcipher.py | 26 +++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 656c0e77..0d3a21fd 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -34,8 +34,6 @@ import urlparse import hmac from hashlib import sha256 -from threading import Lock -from collections import defaultdict try: import cchardet as chardet @@ -224,12 +222,6 @@ class Soledad(object): Prefix for default values for path. """ - syncing_lock = defaultdict(Lock) - """ - A dictionary that hold locks which avoid multiple sync attempts from the - same database replica. - """ - def __init__(self, uuid, passphrase, secrets_path, local_db_path, server_url, cert_file, auth_token=None, secret_id=None): """ @@ -1064,9 +1056,6 @@ class Soledad(object): :rtype: str """ if self._db: - # acquire lock before attempt to sync - # TODO: move this lock to inside SQLCipherDatabase. - with Soledad.syncing_lock[self._db._get_replica_uid()]: local_gen = self._db.sync( urlparse.urljoin(self.server_url, 'user-%s' % self._uuid), creds=self._creds, autocreate=False) diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 576b51ad..5ffa9c7e 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -52,6 +52,7 @@ import json from hashlib import sha256 from contextlib import contextmanager +from collections import defaultdict from pysqlcipher import dbapi2 from u1db.backends import sqlite_backend @@ -153,6 +154,13 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): create_doc_lock = threading.Lock() update_indexes_lock = threading.Lock() + syncing_lock = defaultdict(threading.Lock) + """ + A dictionary that hold locks which avoid multiple sync attempts from the + same database replica. + """ + + def __init__(self, sqlcipher_file, password, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', kdf_iter=4000, cipher_page_size=1024): @@ -343,6 +351,10 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): """ Synchronize documents with remote replica exposed at url. + There can be at most one instance syncing the same database replica at + the same time, so this method will block until the syncing lock can be + acquired. + :param url: The url of the target replica to sync with. :type url: str :param creds: optional dictionary giving credentials. @@ -355,6 +367,8 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :rtype: int """ res = None + # the following context manager blocks until the syncing lock can be + # acquired. with self.syncer(url, creds=creds) as syncer: res = syncer.sync(autocreate=autocreate) return res @@ -371,10 +385,16 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): def syncer(self, url, creds=None): """ Accesor for synchronizer. + + As we reuse the same synchronizer for every sync, there can be only + one instance synchronizing the same database replica at the same time. + Because of that, this method blocks until the syncing lock can be + acquired. """ - syncer = self._get_syncer(url, creds=creds) - yield syncer - syncer.sync_target.close() + with SQLCipherDatabase.syncing_lock[self._get_replica_uid()]: + syncer = self._get_syncer(url, creds=creds) + yield syncer + syncer.sync_target.close() def _get_syncer(self, url, creds=None): """ -- cgit v1.2.3 From 9a4fd9e1e7580f4f31acbaea7d3315302c01e42e Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 5 Jun 2014 13:08:59 -0300 Subject: Reorganize profiling scripts. --- scripts/db_access/client_side_db.py | 14 +- .../profiling/backends_cpu_usage/movingaverage.py | 210 +-------------------- scripts/profiling/movingaverage.py | 209 ++++++++++++++++++++ 3 files changed, 214 insertions(+), 219 deletions(-) mode change 100644 => 120000 scripts/profiling/backends_cpu_usage/movingaverage.py create mode 100644 scripts/profiling/movingaverage.py diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py index 9aadd5fe..6c456c41 100644 --- a/scripts/db_access/client_side_db.py +++ b/scripts/db_access/client_side_db.py @@ -15,10 +15,14 @@ import srp._pysrp as srp import binascii import logging + from leap.common.config import get_path_prefix from leap.soledad.client import Soledad +from util import ValidateUserHandle + + # create a logger logger = logging.getLogger(__name__) LOG_FORMAT = '%(asctime)s %(message)s' @@ -118,16 +122,6 @@ def get_soledad_instance(username, provider, passphrase, basedir): auth_token=token) -class ValidateUserHandle(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') - res = m.match(values) - if res == None: - parser.error('User handle should have the form user@provider.') - setattr(namespace, 'username', res.groups()[0]) - setattr(namespace, 'provider', res.groups()[1]) - - # main program if __name__ == '__main__': diff --git a/scripts/profiling/backends_cpu_usage/movingaverage.py b/scripts/profiling/backends_cpu_usage/movingaverage.py deleted file mode 100644 index bac1b3e1..00000000 --- a/scripts/profiling/backends_cpu_usage/movingaverage.py +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env python -# -# Sean Reifschneider, tummy.com, ltd. -# Released into the Public Domain, 2011-02-06 - -import itertools -from itertools import islice -from collections import deque - - -######################################################### -def movingaverage(data, subset_size, data_is_list = None, - avoid_fp_drift = True): - '''Return the moving averages of the data, with a window size of - `subset_size`. `subset_size` must be an integer greater than 0 and - less than the length of the input data, or a ValueError will be raised. - - `data_is_list` can be used to tune the algorithm for list or iteratable - as an input. The default value, `None` will auto-detect this. - The algorithm used if `data` is a list is almost twice as fast as if - it is an iteratable. - - `avoid_fp_drift`, if True (the default) sums every sub-set rather than - keeping a "rolling sum" (which may be subject to floating-point drift). - While more correct, it is also dramatically slower for subset sizes - much larger than 20. - - NOTE: You really should consider setting `avoid_fp_drift = False` unless - you are dealing with very small numbers (say, far smaller than 0.00001) - or require extreme accuracy at the cost of execution time. For - `subset_size` < 20, the performance difference is very small. - ''' - if subset_size < 1: - raise ValueError('subset_size must be 1 or larger') - - if data_is_list is None: - data_is_list = hasattr(data, '__getslice__') - - divisor = float(subset_size) - if data_is_list: - # This only works if we can re-access old elements, but is much faster. - # In other words, it can't be just an iterable, it needs to be a list. - - if subset_size > len(data): - raise ValueError('subset_size must be smaller than data set size') - - if avoid_fp_drift: - for x in range(subset_size, len(data) + 1): - yield sum(data[x - subset_size:x]) / divisor - else: - cur = sum(data[0:subset_size]) - yield cur / divisor - for x in range(subset_size, len(data)): - cur += data[x] - data[x - subset_size] - yield cur / divisor - else: - # Based on the recipe at: - # http://docs.python.org/library/collections.html#deque-recipes - it = iter(data) - d = deque(islice(it, subset_size)) - - if subset_size > len(d): - raise ValueError('subset_size must be smaller than data set size') - - if avoid_fp_drift: - yield sum(d) / divisor - for elem in it: - d.popleft() - d.append(elem) - yield sum(d) / divisor - else: - s = sum(d) - yield s / divisor - for elem in it: - s += elem - d.popleft() - d.append(elem) - yield s / divisor - - -########################## -if __name__ == '__main__': - import unittest - - class TestMovingAverage(unittest.TestCase): - #################### - def test_List(self): - try: - list(movingaverage([1,2,3], 0)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage([1,2,3,4,5,6], 7)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) - - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, False)), [40.0,42.0,45.0,43.0]) - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, True)), [40.0,42.0,45.0,43.0]) - - - ###################### - def test_XRange(self): - try: - list(movingaverage(xrange(1, 4), 0)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage(xrange(1, 7), 7)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage(xrange(1, 7), 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), - 2)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) - self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) - - - ########################### - def test_ListRolling(self): - try: - list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, - avoid_fp_drift = False)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, - avoid_fp_drift = False)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, - avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, - avoid_fp_drift = False)), [2,3,4,5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, - avoid_fp_drift = False)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, - avoid_fp_drift = False)), [3,4]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, - avoid_fp_drift = False)), [3.5]) - - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) - - - ############################# - def test_XRangeRolling(self): - try: - list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage(xrange(1, 7), 1, - avoid_fp_drift = False)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage(xrange(1, 7), 2, - avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), - 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 3, - avoid_fp_drift = False)), [2,3,4,5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 4, - avoid_fp_drift = False)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 5, - avoid_fp_drift = False)), [3,4]) - self.assertEqual(list(movingaverage(xrange(1, 7), 6, - avoid_fp_drift = False)), [3.5]) - - - ###################################################################### - suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) - unittest.TextTestRunner(verbosity = 2).run(suite) - diff --git a/scripts/profiling/backends_cpu_usage/movingaverage.py b/scripts/profiling/backends_cpu_usage/movingaverage.py new file mode 120000 index 00000000..098b0a01 --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/movingaverage.py @@ -0,0 +1 @@ +../movingaverage.py \ No newline at end of file diff --git a/scripts/profiling/movingaverage.py b/scripts/profiling/movingaverage.py new file mode 100644 index 00000000..bac1b3e1 --- /dev/null +++ b/scripts/profiling/movingaverage.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +# Sean Reifschneider, tummy.com, ltd. +# Released into the Public Domain, 2011-02-06 + +import itertools +from itertools import islice +from collections import deque + + +######################################################### +def movingaverage(data, subset_size, data_is_list = None, + avoid_fp_drift = True): + '''Return the moving averages of the data, with a window size of + `subset_size`. `subset_size` must be an integer greater than 0 and + less than the length of the input data, or a ValueError will be raised. + + `data_is_list` can be used to tune the algorithm for list or iteratable + as an input. The default value, `None` will auto-detect this. + The algorithm used if `data` is a list is almost twice as fast as if + it is an iteratable. + + `avoid_fp_drift`, if True (the default) sums every sub-set rather than + keeping a "rolling sum" (which may be subject to floating-point drift). + While more correct, it is also dramatically slower for subset sizes + much larger than 20. + + NOTE: You really should consider setting `avoid_fp_drift = False` unless + you are dealing with very small numbers (say, far smaller than 0.00001) + or require extreme accuracy at the cost of execution time. For + `subset_size` < 20, the performance difference is very small. + ''' + if subset_size < 1: + raise ValueError('subset_size must be 1 or larger') + + if data_is_list is None: + data_is_list = hasattr(data, '__getslice__') + + divisor = float(subset_size) + if data_is_list: + # This only works if we can re-access old elements, but is much faster. + # In other words, it can't be just an iterable, it needs to be a list. + + if subset_size > len(data): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + for x in range(subset_size, len(data) + 1): + yield sum(data[x - subset_size:x]) / divisor + else: + cur = sum(data[0:subset_size]) + yield cur / divisor + for x in range(subset_size, len(data)): + cur += data[x] - data[x - subset_size] + yield cur / divisor + else: + # Based on the recipe at: + # http://docs.python.org/library/collections.html#deque-recipes + it = iter(data) + d = deque(islice(it, subset_size)) + + if subset_size > len(d): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + yield sum(d) / divisor + for elem in it: + d.popleft() + d.append(elem) + yield sum(d) / divisor + else: + s = sum(d) + yield s / divisor + for elem in it: + s += elem - d.popleft() + d.append(elem) + yield s / divisor + + +########################## +if __name__ == '__main__': + import unittest + + class TestMovingAverage(unittest.TestCase): + #################### + def test_List(self): + try: + list(movingaverage([1,2,3], 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True)), [40.0,42.0,45.0,43.0]) + + + ###################### + def test_XRange(self): + try: + list(movingaverage(xrange(1, 4), 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) + + + ########################### + def test_ListRolling(self): + try: + list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, + avoid_fp_drift = False)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, + avoid_fp_drift = False)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + + + ############################# + def test_XRangeRolling(self): + try: + list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6, + avoid_fp_drift = False)), [3.5]) + + + ###################################################################### + suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) + unittest.TextTestRunner(verbosity = 2).run(suite) + -- cgit v1.2.3 From bff6444cc2a119a49f5c259644ffc2d6862f779e Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 5 Jun 2014 14:57:08 -0300 Subject: Add missing doc. --- client/src/leap/soledad/client/target.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/client/src/leap/soledad/client/target.py b/client/src/leap/soledad/client/target.py index 8f753f74..968545b6 100644 --- a/client/src/leap/soledad/client/target.py +++ b/client/src/leap/soledad/client/target.py @@ -378,6 +378,10 @@ class SoledadSyncTarget(HTTPSyncTarget, TokenBasedAuth): def _post_get_doc(received): """ Get a sync document from server by means of a POST request. + + :param received: The number of documents already received in the + current sync session. + :type received: int """ entries = ['['] size = 1 -- cgit v1.2.3