From 42b02476ff70326e2d52fa70a94f1f7035cb185a Mon Sep 17 00:00:00 2001 From: drebs Date: Sun, 15 Dec 2013 02:04:14 -0200 Subject: Add database migration script. --- scripts/migrate_dbs.py | 288 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 288 insertions(+) create mode 100644 scripts/migrate_dbs.py (limited to 'scripts') diff --git a/scripts/migrate_dbs.py b/scripts/migrate_dbs.py new file mode 100644 index 00000000..f1c20d87 --- /dev/null +++ b/scripts/migrate_dbs.py @@ -0,0 +1,288 @@ +#!/usr/bin/python + +import sys +import json +import logging +import argparse +import re +import threading +from urlparse import urlparse +from ConfigParser import ConfigParser +from couchdb.client import Server +from couchdb.http import ResourceNotFound, Resource, Session +from datetime import datetime + +from leap.soledad.common.couch import CouchDatabase + + +# parse command line for the log file name +logger_fname = "/tmp/u1db-couch-db-migration_%s.log" % \ + str(datetime.now()).replace(' ', '_') +parser = argparse.ArgumentParser() +parser.add_argument('--log', action='store', default=logger_fname, type=str, + required=False, help='the name of the log file', nargs=1) +args = parser.parse_args() + + +# configure the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +print "Logging to %s." % args.log +logging.basicConfig( + filename=args.log, + format="%(asctime)-15s %(message)s") + + +# configure threads +max_threads = 20 +semaphore_pool = threading.BoundedSemaphore(value=max_threads) + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = cp.get('soledad-server', 'couch_url') + +resource = Resource(url, Session(retry_delays=[1,2,4,8], timeout=10)) +server = Server(url=resource) + +hidden_url = re.sub( + 'http://(.*):.*@', + 'http://\\1:xxxxx@', + url) + +print """ +========== +ATTENTION! +========== + +This script will modify Soledad's shared and user databases in: + + %s + +This script does not make a backup of the couch db data, so make sure youj +have a copy or you may loose data. +""" % hidden_url +confirm = raw_input("Proceed (type uppercase YES)? ") + +if confirm != "YES": + exit(1) + + +# +# Thread +# + +class DocWorkerThread(threading.Thread): + + def __init__(self, dbname, doc_id, db_idx, db_len, doc_idx, doc_len, + transaction_log, conflict_log, release_fun): + threading.Thread.__init__(self) + resource = Resource(url, Session(retry_delays=[1,2,4,8], timeout=10)) + server = Server(url=resource) + self._dbname = dbname + self._cdb = server[self._dbname] + self._doc_id = doc_id + self._db_idx = db_idx + self._db_len = db_len + self._doc_idx = doc_idx + self._doc_len = doc_len + self._transaction_log = transaction_log + self._conflict_log = conflict_log + self._release_fun = release_fun + + def run(self): + + old_doc = self._cdb[self._doc_id] + + # skip non u1db docs + if 'u1db_rev' not in old_doc: + logger.debug('(%d/%d) (%d/%d) Skipping %s/%s).' % + (self._db_idx, self._db_len, self._doc_idx, + self._doc_len, self._dbname, self._doc_id)) + self._release_fun() + return + else: + logger.debug('(%d/%d) (%d/%d) Processing %s/%s ...' % + (self._db_idx, self._db_len, self._doc_idx, + self._doc_len, self._dbname, self._doc_id)) + + doc = { + '_id': self._doc_id, + '_rev': old_doc['_rev'], + 'u1db_rev': old_doc['u1db_rev'] + } + attachments = [] + + # add transactions + doc['u1db_transactions'] = map( + lambda (gen, doc_id, trans_id): (gen, trans_id), + filter( + lambda (gen, doc_id, trans_id): doc_id == doc['_id'], + self._transaction_log)) + if len(doc['u1db_transactions']) == 0: + del doc['u1db_transactions'] + + # add conflicts + if doc['_id'] in self._conflict_log: + attachments.append([ + conflict_log[doc['_id']], + 'u1db_conflicts', + "application/octet-stream"]) + + # move document's content to 'u1db_content' attachment + content = self._cdb.get_attachment(doc, 'u1db_json') + if content is not None: + attachments.append([ + content, + 'u1db_content', + "application/octet-stream"]) + #self._cdb.delete_attachment(doc, 'u1db_json') + + # save modified doc + self._cdb.save(doc) + + # save all doc attachments + for content, att_name, content_type in attachments: + self._cdb.put_attachment( + doc, + content, + filename=att_name, + content_type=content_type) + + # release the semaphore + self._release_fun() + + +db_idx = 0 +db_len = len(server) +for dbname in server: + + db_idx += 1 + + if not (dbname.startswith('user-') or dbname == 'shared') \ + or dbname == 'user-test-db': + logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) + continue + + logger.info("(%d/%d) Migrating db %s." % (db_idx, db_len, dbname)) + + # get access to couch db + cdb = Server(url)[dbname] + + # get access to soledad db + sdb = CouchDatabase(url, dbname) + + # Migration table + # --------------- + # + # * Metadata that was previously stored in special documents migrate to + # inside documents, to allow for atomic doc-and-metadata updates. + # * Doc content attachment name changes. + # * Indexes are removed, to be implemented in the future possibly as + # design docs view functions. + # + # +-----------------+-------------------------+-------------------------+ + # | Data | old storage | new storage | + # |-----------------+-------------------------+-------------------------+ + # | doc content | /u1db_json | /u1db_content | + # | doc conflicts | u1db/_conflicts | /u1db_conflicts | + # | transaction log | u1db/_transaction_log | doc.u1db_transactions | + # | sync log | u1db/_other_generations | u1db_sync_log | + # | indexes | u1db/_indexes | not implemented | + # | replica uid | u1db/_replica_uid | u1db_config | + # +-----------------+-------------------------+-------------------------+ + + def get_att_content(db, doc_id, att_name): + try: + return json.loads( + db.get_attachment( + doc_id, att_name).read())['content'] + except: + import ipdb + ipdb.set_trace() + + # only migrate databases that have the 'u1db/_replica_uid' document + try: + metadoc = cdb.get('u1db/_replica_uid') + replica_uid = get_att_content(cdb, 'u1db/_replica_uid', 'u1db_json') + except ResourceNotFound: + continue + + #--------------------------------------------------------------------- + # Step 1: Set replica uid. + #--------------------------------------------------------------------- + sdb._set_replica_uid(replica_uid) + + #--------------------------------------------------------------------- + # Step 2: Obtain metadata. + #--------------------------------------------------------------------- + + # obtain the transaction log: [['', ''], ...] + transaction_log = get_att_content( + cdb, 'u1db/_transaction_log', 'u1db_json') + new_transaction_log = [] + gen = 1 + for (doc_id, trans_id) in transaction_log: + new_transaction_log.append((gen, doc_id, trans_id)) + gen += 1 + transaction_log = new_transaction_log + + # obtain the conflict log: {'': ['', ''], ...} + conflict_log = get_att_content(cdb, 'u1db/_conflicts', 'u1db_json') + + # obtain the sync log: + # {'': ['', ''], ...} + other_generations = get_att_content( + cdb, 'u1db/_other_generations', 'u1db_json') + + #--------------------------------------------------------------------- + # Step 3: Iterate over all documents in database. + #--------------------------------------------------------------------- + doc_len = len(cdb) + logger.info("(%d, %d) Found %d documents." % (db_idx, db_len, doc_len)) + doc_idx = 0 + threads = [] + for doc_id in cdb: + doc_idx = doc_idx + 1 + + semaphore_pool.acquire() + thread = DocWorkerThread(dbname, doc_id, db_idx, db_len, + doc_idx, doc_len, transaction_log, + conflict_log, semaphore_pool.release) + thread.daemon = True + thread.start() + threads.append(thread) + + map(lambda thread: thread.join(), threads) + + #--------------------------------------------------------------------- + # Step 4: Move sync log. + #--------------------------------------------------------------------- + + # move sync log + sync_doc = { + '_id': 'u1db_sync_log', + 'syncs': [] + } + + for replica_uid in other_generations: + gen, transaction_id = other_generations[replica_uid] + sync_doc['syncs'].append([replica_uid, gen, transaction_id]) + cdb.save(sync_doc) + + #--------------------------------------------------------------------- + # Step 5: Delete old meta documents. + #--------------------------------------------------------------------- + + # remove unused docs + for doc_id in ['_transaction_log', '_conflicts', '_other_generations', + '_indexes', '_replica_uid']: + for prefix in ['u1db/', 'u1db%2F']: + try: + doc = cdb['%s%s' % (prefix, doc_id)] + logger.info( + "(%d/%d) Deleting %s/%s/%s." % + (db_idx, db_len, dbname, 'u1db', doc_id)) + cdb.delete(doc) + except ResourceNotFound: + pass -- cgit v1.2.3 From 0bf2f9ec215f3c638701631e5676b670d7acc1b9 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 22 Jan 2014 14:51:45 -0200 Subject: Fix dev scripts doc and names. --- scripts/README.rst | 13 ------------- scripts/client-side-db.py | 36 ------------------------------------ scripts/client_side_db.py | 40 ++++++++++++++++++++++++++++++++++++++++ scripts/server-side-db.py | 38 -------------------------------------- scripts/server_side_db.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 87 deletions(-) delete mode 100644 scripts/client-side-db.py create mode 100644 scripts/client_side_db.py delete mode 100644 scripts/server-side-db.py create mode 100644 scripts/server_side_db.py (limited to 'scripts') diff --git a/scripts/README.rst b/scripts/README.rst index fdd1d642..37cf2c0e 100644 --- a/scripts/README.rst +++ b/scripts/README.rst @@ -2,16 +2,3 @@ Soledad Scripts =============== The scripts in this directory are meant to be used for development purposes. - -Currently, the scripts are: - - * server-side-db.py: Gives access to server-side soledad user database, - based on the configuration in /etc/leap/soledad-server.conf. One should - use it as: - - python -i server-side-db.py - - * client-side-db.py: Gives access to client-side soledad user database, - based on data stored in ~/.config/leap/soledad. One should use it as: - - python -i client-side-db.py diff --git a/scripts/client-side-db.py b/scripts/client-side-db.py deleted file mode 100644 index 0c3df7a4..00000000 --- a/scripts/client-side-db.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/python - -# This script gives client-side access to one Soledad user database by using -# the data stored in ~/.config/leap/soledad/ - -import sys -import os - -from leap.common.config import get_path_prefix -from leap.soledad.client import Soledad - -if len(sys.argv) != 3: - print 'Usage: %s ' % sys.argv[0] - exit(1) - -uuid = sys.argv[1] -passphrase = unicode(sys.argv[2]) - -secrets_path = os.path.join(get_path_prefix(), 'leap', 'soledad', - '%s.secret' % uuid) -local_db_path = os.path.join(get_path_prefix(), 'leap', 'soledad', - '%s.db' % uuid) -server_url = 'http://dummy-url' -cert_file = 'cert' - -sol = Soledad(uuid, passphrase, secrets_path, local_db_path, server_url, - cert_file) -db = sol._db - -# get replica info -replica_uid = db._replica_uid -gen, docs = db.get_all_docs() -print "replica_uid: %s" % replica_uid -print "generation: %d" % gen -gen, trans_id = db._get_generation_info() -print "transaction_id: %s" % trans_id diff --git a/scripts/client_side_db.py b/scripts/client_side_db.py new file mode 100644 index 00000000..6d1843ac --- /dev/null +++ b/scripts/client_side_db.py @@ -0,0 +1,40 @@ +#!/usr/bin/python + +# This script gives client-side access to one Soledad user database by using +# the data stored in the appropriate config dir given by get_path_prefix(). +# +# Use it like this: +# +# python -i client-side-db.py + +import sys +import os + +from leap.common.config import get_path_prefix +from leap.soledad.client import Soledad + +if len(sys.argv) != 3: + print 'Usage: %s ' % sys.argv[0] + exit(1) + +uuid = sys.argv[1] +passphrase = unicode(sys.argv[2]) + +secrets_path = os.path.join(get_path_prefix(), 'leap', 'soledad', + '%s.secret' % uuid) +local_db_path = os.path.join(get_path_prefix(), 'leap', 'soledad', + '%s.db' % uuid) +server_url = 'http://dummy-url' +cert_file = 'cert' + +sol = Soledad(uuid, passphrase, secrets_path, local_db_path, server_url, + cert_file) +db = sol._db + +# get replica info +replica_uid = db._replica_uid +gen, docs = db.get_all_docs() +print "replica_uid: %s" % replica_uid +print "generation: %d" % gen +gen, trans_id = db._get_generation_info() +print "transaction_id: %s" % trans_id diff --git a/scripts/server-side-db.py b/scripts/server-side-db.py deleted file mode 100644 index 01a9aaac..00000000 --- a/scripts/server-side-db.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/python - -# This script gives server-side access to one Soledad user database by using -# the configuration stored in /etc/leap/soledad-server.conf. - -import sys -from ConfigParser import ConfigParser - -from leap.soledad.common.couch import CouchDatabase - -if len(sys.argv) != 2: - print 'Usage: %s ' % sys.argv[0] - exit(1) - -uuid = sys.argv[1] - -# get couch url -cp = ConfigParser() -cp.read('/etc/leap/soledad-server.conf') -url = cp.get('soledad-server', 'couch_url') - -# access user db -dbname = 'user-%s' % uuid -db = CouchDatabase(url, dbname) - -# get replica info -replica_uid = db._replica_uid -gen, docs = db.get_all_docs() -print "dbname: %s" % dbname -print "replica_uid: %s" % replica_uid -print "generation: %d" % gen - -# get relevant docs -schemes = map(lambda d: d.content['_enc_scheme'], docs) -pubenc = filter(lambda d: d.content['_enc_scheme'] == 'pubkey', docs) - -print "total number of docs: %d" % len(docs) -print "pubkey encrypted docs: %d" % len(pubenc) diff --git a/scripts/server_side_db.py b/scripts/server_side_db.py new file mode 100644 index 00000000..18641a0f --- /dev/null +++ b/scripts/server_side_db.py @@ -0,0 +1,42 @@ +#!/usr/bin/python + +# This script gives server-side access to one Soledad user database by using +# the configuration stored in /etc/leap/soledad-server.conf. +# +# Use it like this: +# +# python -i server-side-db.py + +import sys +from ConfigParser import ConfigParser + +from leap.soledad.common.couch import CouchDatabase + +if len(sys.argv) != 2: + print 'Usage: %s ' % sys.argv[0] + exit(1) + +uuid = sys.argv[1] + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = cp.get('soledad-server', 'couch_url') + +# access user db +dbname = 'user-%s' % uuid +db = CouchDatabase(url, dbname) + +# get replica info +replica_uid = db._replica_uid +gen, docs = db.get_all_docs() +print "dbname: %s" % dbname +print "replica_uid: %s" % replica_uid +print "generation: %d" % gen + +# get relevant docs +schemes = map(lambda d: d.content['_enc_scheme'], docs) +pubenc = filter(lambda d: d.content['_enc_scheme'] == 'pubkey', docs) + +print "total number of docs: %d" % len(docs) +print "pubkey encrypted docs: %d" % len(pubenc) -- cgit v1.2.3 From d834422a48fda0ad1a5b1bd1db5edd0a4a35c823 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 22 Jan 2014 14:52:21 -0200 Subject: Add debian package build dev script. --- scripts/build_debian_package.sh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 scripts/build_debian_package.sh (limited to 'scripts') diff --git a/scripts/build_debian_package.sh b/scripts/build_debian_package.sh new file mode 100755 index 00000000..cc62c3ac --- /dev/null +++ b/scripts/build_debian_package.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +# This script generates Soledad Debian packages. +# +# When invoking this script, you should pass a git repository URL and the name +# of the branch that contains the code you wish to build the packages from. +# +# The script will clone the given branch from the given repo, as well as the +# main Soledad repo in github which contains the most up-to-date debian +# branch. It will then merge the desired branch into the debian branch and +# build the packages. + +if [ $# -ne 2 ]; then + echo "Usage: ${0} " + exit 1 +fi + +SOLEDAD_MAIN_REPO=git://github.com/leapcode/soledad.git + +url=$1 +branch=$2 +workdir=`mktemp -d` + +git clone -b ${branch} ${url} ${workdir}/soledad +export GIT_DIR=${workdir}/soledad/.git +export GIT_WORK_TREE=${workdir}/soledad +git remote add leapcode ${SOLEDAD_MAIN_REPO} +git fetch leapcode +git checkout debian +git merge --no-edit ${branch} +(cd ${workdir}/soledad && debuild -uc -us) +echo "Packages generated in ${workdir}" -- cgit v1.2.3 From 33f03a631ae55501ee7d725f706b34fb875e95b5 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 30 Jan 2014 18:12:57 -0200 Subject: Add script to find max upload size (#5011). --- scripts/find_max_upload_size.py | 169 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100755 scripts/find_max_upload_size.py (limited to 'scripts') diff --git a/scripts/find_max_upload_size.py b/scripts/find_max_upload_size.py new file mode 100755 index 00000000..8abeee78 --- /dev/null +++ b/scripts/find_max_upload_size.py @@ -0,0 +1,169 @@ +#!/usr/bin/python + +# This script finds the maximum upload size for a document in the current +# server. It pulls couch URL from Soledad config file and attempts multiple +# PUTs until it finds the maximum size supported by the server. +# +# As the Soledad couch user is not an admin, you have to pass a database into +# which the test will be run. The database should already exist and be +# initialized with soledad design documents. +# +# Use it like this: +# +# ./find_max_upload_size.py +# ./find_max_upload_size.py -h + +import os +import configparser +import couchdb +import logging +import argparse +import random +import string +import binascii +import json + + +SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' +PREFIX = '/tmp/soledad_test' +LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' + + +# configure logger +logger = logging.getLogger(__name__) + + +def config_log(level): + logging.basicConfig(format=LOG_FORMAT, level=level) + + +def log_to_file(filename): + handler = logging.FileHandler(filename, mode='a') + handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) + logger.addHandler(handler) + + +# create test dir +if not os.path.exists(PREFIX): + os.mkdir(PREFIX) + + +def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): + config = configparser.ConfigParser() + config.read(config_file) + return config['soledad-server']['couch_url'] + + +# generate or load an uploadable doc with the given size in mb +def gen_body(size): + if os.path.exists( + os.path.join(PREFIX, 'body-%d.json' % size)): + logger.debug('Loading body with %d MB...' % size) + with open(os.path.join(PREFIX, 'body-%d.json' % size), 'r') as f: + return json.loads(f.read()) + else: + length = int(size * 1024 ** 2) + hexdata = binascii.hexlify(os.urandom(length))[:length] + body = { + 'couch_rev': None, + 'u1db_rev': '1', + 'content': hexdata, + 'trans_id': '1', + 'conflicts': None, + 'update_conflicts': False, + } + logger.debug('Generating body with %d MB...' % size) + with open(os.path.join(PREFIX, 'body-%d.json' % size), 'w+') as f: + f.write(json.dumps(body)) + return body + + +def delete_doc(db): + doc = db.get('largedoc') + db.delete(doc) + + +def upload(db, size): + ddoc_path = ['_design', 'docs', '_update', 'put', 'largedoc'] + resource = db.resource(*ddoc_path) + body = gen_body(size) + try: + logger.debug('Uploading %d MB body...' % size) + response = resource.put_json( + body=body, + headers={'content-type': 'application/json'}) + # the document might have been updated in between, so we check for + # the return message + msg = response[2].read() + if msg == 'ok': + delete_doc(db) + logger.debug('Success uploading %d MB doc.' % size) + return True + else: + # should not happen + logger.error('Unexpected error uploading %d MB doc: %s' % (size, msg)) + return False + except Exception as e: + logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) + return False + + +def find_max_upload_size(dbname): + couch_url = get_couch_url() + db_url = '%s/%s' % (couch_url, dbname) + logger.debug('Couch URL: %s' % db_url) + # get a 'raw' couch handler + server = couchdb.client.Server(couch_url) + db = server[dbname] + # delete eventual leftover from last run + largedoc = db.get('largedoc') + if largedoc is not None: + db.delete(largedoc) + # phase 1: increase upload size exponentially + logger.info('Starting phase 1: increasing size exponentially.') + size = 1 + while True: + if upload(db, size): + size *= 2 + else: + break + # phase 2: binary search for maximum value + unable = size + able = size / 2 + logger.info('Starting phase 2: binary search for maximum value.') + while unable - able > 1: + size = able + ((unable - able) / 2) + if upload(db, size): + able = size + else: + unable = size + return able + + +if __name__ == '__main__': + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + '-d', action='store_true', dest='debug', + help='print debugging information') + parser.add_argument( + '-l', dest='logfile', + help='log output to file') + parser.add_argument( + 'dbname', help='the name of the database to test in') + args = parser.parse_args() + + # log to file + if args.logfile is not None: + add_file_handler(args.logfile) + + # set loglevel + if args.debug is True: + config_log(logging.DEBUG) + else: + config_log(logging.INFO) + + # run test and report + logger.info('Will test using db %s.' % args.dbname) + maxsize = find_max_upload_size(args.dbname) + logger.info('Max upload size is %d MB.' % maxsize) -- cgit v1.2.3 From fca2f2fa8a74e0a9d696ebacddf3e1cc4628fb62 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 7 Mar 2014 16:57:25 -0300 Subject: Improve local db script. --- scripts/client_side_db.py | 167 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 136 insertions(+), 31 deletions(-) (limited to 'scripts') diff --git a/scripts/client_side_db.py b/scripts/client_side_db.py index 6d1843ac..15980f5d 100644 --- a/scripts/client_side_db.py +++ b/scripts/client_side_db.py @@ -1,40 +1,145 @@ #!/usr/bin/python -# This script gives client-side access to one Soledad user database by using -# the data stored in the appropriate config dir given by get_path_prefix(). -# -# Use it like this: -# -# python -i client-side-db.py +# This script gives client-side access to one Soledad user database. + import sys import os +import argparse +import re +import tempfile +import getpass +import requests +import json +import srp._pysrp as srp +import binascii from leap.common.config import get_path_prefix from leap.soledad.client import Soledad -if len(sys.argv) != 3: - print 'Usage: %s ' % sys.argv[0] - exit(1) - -uuid = sys.argv[1] -passphrase = unicode(sys.argv[2]) - -secrets_path = os.path.join(get_path_prefix(), 'leap', 'soledad', - '%s.secret' % uuid) -local_db_path = os.path.join(get_path_prefix(), 'leap', 'soledad', - '%s.db' % uuid) -server_url = 'http://dummy-url' -cert_file = 'cert' - -sol = Soledad(uuid, passphrase, secrets_path, local_db_path, server_url, - cert_file) -db = sol._db - -# get replica info -replica_uid = db._replica_uid -gen, docs = db.get_all_docs() -print "replica_uid: %s" % replica_uid -print "generation: %d" % gen -gen, trans_id = db._get_generation_info() -print "transaction_id: %s" % trans_id + +safe_unhexlify = lambda x: binascii.unhexlify(x) if ( + len(x) % 2 == 0) else binascii.unhexlify('0' + x) + + +def fail(reason): + print 'Fail: ' + reason + exit(2) + + +def get_api_info(provider): + info = requests.get( + 'https://'+provider+'/provider.json', verify=False).json() + return info['api_uri'], info['api_version'] + + +def login(username, passphrase, provider, api_uri, api_version): + usr = srp.User(username, passphrase, srp.SHA256, srp.NG_1024) + auth = None + try: + auth = authenticate(api_uri, api_version, usr).json() + except requests.exceptions.ConnectionError: + fail('Could not connect to server.') + if 'errors' in auth: + fail(str(auth['errors'])) + return api_uri, api_version, auth + + +def authenticate(api_uri, api_version, usr): + api_url = "%s/%s" % (api_uri, api_version) + session = requests.session() + uname, A = usr.start_authentication() + params = {'login': uname, 'A': binascii.hexlify(A)} + init = session.post( + api_url + '/sessions', data=params, verify=False).json() + if 'errors' in init: + fail('test user not found') + M = usr.process_challenge( + safe_unhexlify(init['salt']), safe_unhexlify(init['B'])) + return session.put(api_url + '/sessions/' + uname, verify=False, + data={'client_auth': binascii.hexlify(M)}) + + +def get_soledad_info(username, provider, passphrase, basedir): + api_uri, api_version = get_api_info(provider) + auth = login(username, passphrase, provider, api_uri, api_version) + # get soledad server url + service_url = '%s/%s/config/soledad-service.json' % \ + (api_uri, api_version) + soledad_hosts = requests.get(service_url, verify=False).json()['hosts'] + hostnames = soledad_hosts.keys() + # allow for choosing the host + host = hostnames[0] + if len(hostnames) > 1: + i = 1 + print "There are many available hosts:" + for h in hostnames: + print " (%d) %s.%s" % (i, h, provider) + i += 1 + choice = raw_input("Choose a host to use (default: 1): ") + if choice != '': + host = hostnames[int(choice) - 1] + server_url = 'https://%s:%d/user-%s' % \ + (soledad_hosts[host]['hostname'], soledad_hosts[host]['port'], + auth[2]['id']) + # get provider ca certificate + ca_cert = requests.get('https://%s/ca.crt' % provider, verify=False).text + cert_file = os.path.join(basedir, 'ca.crt') + with open(cert_file, 'w') as f: + f.write(ca_cert) + return auth[2]['id'], server_url, cert_file, auth[2]['token'] + + +def get_soledad_instance(username, provider, passphrase, basedir): + # setup soledad info + uuid, server_url, cert_file, token = \ + get_soledad_info(username, provider, passphrase, basedir) + secrets_path = os.path.join( + basedir, '%s.secret' % uuid) + local_db_path = os.path.join( + basedir, '%s.db' % uuid) + # instantiate soledad + return Soledad( + uuid, + unicode(passphrase), + secrets_path=secrets_path, + local_db_path=local_db_path, + server_url=server_url, + cert_file=cert_file, + auth_token=token) + + +# main program + +if __name__ == '__main__': + + class ValidateUserHandle(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') + res = m.match(values) + if res == None: + parser.error('User handle should have the form user@provider.') + setattr(namespace, 'username', res.groups()[0]) + setattr(namespace, 'provider', res.groups()[1]) + + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + 'user@provider', action=ValidateUserHandle, help='the user handle') + parser.add_argument( + '-b', dest='basedir', required=False, default=None, help='the user handle') + args = parser.parse_args() + + # get the password + passphrase = getpass.getpass( + 'Password for %s@%s: ' % (args.username, args.provider)) + + # get the basedir + basedir = args.basedir + if basedir is None: + basedir = tempfile.mkdtemp() + print 'Using %s as base directory.' % basedir + + # get the soledad instance + s = get_soledad_instance( + args.username, args.provider, passphrase, basedir) -- cgit v1.2.3 From 31ffadc4e58de95222a073f23d1f3c340797e55b Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 25 Feb 2014 14:52:09 -0300 Subject: Remove old migration script. --- scripts/migrate_dbs.py | 288 ------------------------------------------------- 1 file changed, 288 deletions(-) delete mode 100644 scripts/migrate_dbs.py (limited to 'scripts') diff --git a/scripts/migrate_dbs.py b/scripts/migrate_dbs.py deleted file mode 100644 index f1c20d87..00000000 --- a/scripts/migrate_dbs.py +++ /dev/null @@ -1,288 +0,0 @@ -#!/usr/bin/python - -import sys -import json -import logging -import argparse -import re -import threading -from urlparse import urlparse -from ConfigParser import ConfigParser -from couchdb.client import Server -from couchdb.http import ResourceNotFound, Resource, Session -from datetime import datetime - -from leap.soledad.common.couch import CouchDatabase - - -# parse command line for the log file name -logger_fname = "/tmp/u1db-couch-db-migration_%s.log" % \ - str(datetime.now()).replace(' ', '_') -parser = argparse.ArgumentParser() -parser.add_argument('--log', action='store', default=logger_fname, type=str, - required=False, help='the name of the log file', nargs=1) -args = parser.parse_args() - - -# configure the logger -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -print "Logging to %s." % args.log -logging.basicConfig( - filename=args.log, - format="%(asctime)-15s %(message)s") - - -# configure threads -max_threads = 20 -semaphore_pool = threading.BoundedSemaphore(value=max_threads) - -# get couch url -cp = ConfigParser() -cp.read('/etc/leap/soledad-server.conf') -url = cp.get('soledad-server', 'couch_url') - -resource = Resource(url, Session(retry_delays=[1,2,4,8], timeout=10)) -server = Server(url=resource) - -hidden_url = re.sub( - 'http://(.*):.*@', - 'http://\\1:xxxxx@', - url) - -print """ -========== -ATTENTION! -========== - -This script will modify Soledad's shared and user databases in: - - %s - -This script does not make a backup of the couch db data, so make sure youj -have a copy or you may loose data. -""" % hidden_url -confirm = raw_input("Proceed (type uppercase YES)? ") - -if confirm != "YES": - exit(1) - - -# -# Thread -# - -class DocWorkerThread(threading.Thread): - - def __init__(self, dbname, doc_id, db_idx, db_len, doc_idx, doc_len, - transaction_log, conflict_log, release_fun): - threading.Thread.__init__(self) - resource = Resource(url, Session(retry_delays=[1,2,4,8], timeout=10)) - server = Server(url=resource) - self._dbname = dbname - self._cdb = server[self._dbname] - self._doc_id = doc_id - self._db_idx = db_idx - self._db_len = db_len - self._doc_idx = doc_idx - self._doc_len = doc_len - self._transaction_log = transaction_log - self._conflict_log = conflict_log - self._release_fun = release_fun - - def run(self): - - old_doc = self._cdb[self._doc_id] - - # skip non u1db docs - if 'u1db_rev' not in old_doc: - logger.debug('(%d/%d) (%d/%d) Skipping %s/%s).' % - (self._db_idx, self._db_len, self._doc_idx, - self._doc_len, self._dbname, self._doc_id)) - self._release_fun() - return - else: - logger.debug('(%d/%d) (%d/%d) Processing %s/%s ...' % - (self._db_idx, self._db_len, self._doc_idx, - self._doc_len, self._dbname, self._doc_id)) - - doc = { - '_id': self._doc_id, - '_rev': old_doc['_rev'], - 'u1db_rev': old_doc['u1db_rev'] - } - attachments = [] - - # add transactions - doc['u1db_transactions'] = map( - lambda (gen, doc_id, trans_id): (gen, trans_id), - filter( - lambda (gen, doc_id, trans_id): doc_id == doc['_id'], - self._transaction_log)) - if len(doc['u1db_transactions']) == 0: - del doc['u1db_transactions'] - - # add conflicts - if doc['_id'] in self._conflict_log: - attachments.append([ - conflict_log[doc['_id']], - 'u1db_conflicts', - "application/octet-stream"]) - - # move document's content to 'u1db_content' attachment - content = self._cdb.get_attachment(doc, 'u1db_json') - if content is not None: - attachments.append([ - content, - 'u1db_content', - "application/octet-stream"]) - #self._cdb.delete_attachment(doc, 'u1db_json') - - # save modified doc - self._cdb.save(doc) - - # save all doc attachments - for content, att_name, content_type in attachments: - self._cdb.put_attachment( - doc, - content, - filename=att_name, - content_type=content_type) - - # release the semaphore - self._release_fun() - - -db_idx = 0 -db_len = len(server) -for dbname in server: - - db_idx += 1 - - if not (dbname.startswith('user-') or dbname == 'shared') \ - or dbname == 'user-test-db': - logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) - continue - - logger.info("(%d/%d) Migrating db %s." % (db_idx, db_len, dbname)) - - # get access to couch db - cdb = Server(url)[dbname] - - # get access to soledad db - sdb = CouchDatabase(url, dbname) - - # Migration table - # --------------- - # - # * Metadata that was previously stored in special documents migrate to - # inside documents, to allow for atomic doc-and-metadata updates. - # * Doc content attachment name changes. - # * Indexes are removed, to be implemented in the future possibly as - # design docs view functions. - # - # +-----------------+-------------------------+-------------------------+ - # | Data | old storage | new storage | - # |-----------------+-------------------------+-------------------------+ - # | doc content | /u1db_json | /u1db_content | - # | doc conflicts | u1db/_conflicts | /u1db_conflicts | - # | transaction log | u1db/_transaction_log | doc.u1db_transactions | - # | sync log | u1db/_other_generations | u1db_sync_log | - # | indexes | u1db/_indexes | not implemented | - # | replica uid | u1db/_replica_uid | u1db_config | - # +-----------------+-------------------------+-------------------------+ - - def get_att_content(db, doc_id, att_name): - try: - return json.loads( - db.get_attachment( - doc_id, att_name).read())['content'] - except: - import ipdb - ipdb.set_trace() - - # only migrate databases that have the 'u1db/_replica_uid' document - try: - metadoc = cdb.get('u1db/_replica_uid') - replica_uid = get_att_content(cdb, 'u1db/_replica_uid', 'u1db_json') - except ResourceNotFound: - continue - - #--------------------------------------------------------------------- - # Step 1: Set replica uid. - #--------------------------------------------------------------------- - sdb._set_replica_uid(replica_uid) - - #--------------------------------------------------------------------- - # Step 2: Obtain metadata. - #--------------------------------------------------------------------- - - # obtain the transaction log: [['', ''], ...] - transaction_log = get_att_content( - cdb, 'u1db/_transaction_log', 'u1db_json') - new_transaction_log = [] - gen = 1 - for (doc_id, trans_id) in transaction_log: - new_transaction_log.append((gen, doc_id, trans_id)) - gen += 1 - transaction_log = new_transaction_log - - # obtain the conflict log: {'': ['', ''], ...} - conflict_log = get_att_content(cdb, 'u1db/_conflicts', 'u1db_json') - - # obtain the sync log: - # {'': ['', ''], ...} - other_generations = get_att_content( - cdb, 'u1db/_other_generations', 'u1db_json') - - #--------------------------------------------------------------------- - # Step 3: Iterate over all documents in database. - #--------------------------------------------------------------------- - doc_len = len(cdb) - logger.info("(%d, %d) Found %d documents." % (db_idx, db_len, doc_len)) - doc_idx = 0 - threads = [] - for doc_id in cdb: - doc_idx = doc_idx + 1 - - semaphore_pool.acquire() - thread = DocWorkerThread(dbname, doc_id, db_idx, db_len, - doc_idx, doc_len, transaction_log, - conflict_log, semaphore_pool.release) - thread.daemon = True - thread.start() - threads.append(thread) - - map(lambda thread: thread.join(), threads) - - #--------------------------------------------------------------------- - # Step 4: Move sync log. - #--------------------------------------------------------------------- - - # move sync log - sync_doc = { - '_id': 'u1db_sync_log', - 'syncs': [] - } - - for replica_uid in other_generations: - gen, transaction_id = other_generations[replica_uid] - sync_doc['syncs'].append([replica_uid, gen, transaction_id]) - cdb.save(sync_doc) - - #--------------------------------------------------------------------- - # Step 5: Delete old meta documents. - #--------------------------------------------------------------------- - - # remove unused docs - for doc_id in ['_transaction_log', '_conflicts', '_other_generations', - '_indexes', '_replica_uid']: - for prefix in ['u1db/', 'u1db%2F']: - try: - doc = cdb['%s%s' % (prefix, doc_id)] - logger.info( - "(%d/%d) Deleting %s/%s/%s." % - (db_idx, db_len, dbname, 'u1db', doc_id)) - cdb.delete(doc) - except ResourceNotFound: - pass -- cgit v1.2.3 From 49b69191522d91165327db9e2805dd675a317b0f Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 25 Feb 2014 16:10:52 -0300 Subject: Add script for updating design docs. --- scripts/update_design_docs.py | 147 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 scripts/update_design_docs.py (limited to 'scripts') diff --git a/scripts/update_design_docs.py b/scripts/update_design_docs.py new file mode 100644 index 00000000..e7b5a29c --- /dev/null +++ b/scripts/update_design_docs.py @@ -0,0 +1,147 @@ +#!/usr/bin/python + +# This script updates Soledad's design documents in the session database and +# all user databases with contents from the installed leap.soledad.common +# package. + +import json +import logging +import argparse +import re +import threading +import binascii + + +from getpass import getpass +from ConfigParser import ConfigParser +from couchdb.client import Server +from couchdb.http import Resource, Session +from datetime import datetime +from urlparse import urlparse + + +from leap.soledad.common import ddocs + + +# parse command line for the log file name +logger_fname = "/tmp/update-design-docs_%s.log" % \ + str(datetime.now()).replace(' ', '_') +parser = argparse.ArgumentParser() +parser.add_argument('--log', action='store', default=logger_fname, type=str, + required=False, help='the name of the log file', nargs=1) +args = parser.parse_args() + + +# configure the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +print "Logging to %s." % args.log +logging.basicConfig( + filename=args.log, + format="%(asctime)-15s %(message)s") + + +# configure threads +max_threads = 20 +semaphore_pool = threading.BoundedSemaphore(value=max_threads) +threads = [] + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = urlparse(cp.get('soledad-server', 'couch_url')) + +# get admin password +netloc = re.sub('^.*@', '', url.netloc) +url = url._replace(netloc=netloc) +password = getpass("Admin password for %s: " % url.geturl()) +url = url._replace(netloc='admin:%s@%s' % (password, netloc)) + +resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10)) +server = Server(url=resource) + +hidden_url = re.sub( + 'http://(.*):.*@', + 'http://\\1:xxxxx@', + url.geturl()) + +print """ +========== +ATTENTION! +========== + +This script will modify Soledad's shared and user databases in: + + %s + +This script does not make a backup of the couch db data, so make sure you +have a copy or you may loose data. +""" % hidden_url +confirm = raw_input("Proceed (type uppercase YES)? ") + +if confirm != "YES": + exit(1) + +# convert design doc content + +design_docs = { + '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)), + '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)), + '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)), +} + +# +# Thread +# + +class DBWorkerThread(threading.Thread): + + def __init__(self, server, dbname, db_idx, db_len, release_fun): + threading.Thread.__init__(self) + self._dbname = dbname + self._cdb = server[self._dbname] + self._db_idx = db_idx + self._db_len = db_len + self._release_fun = release_fun + + def run(self): + + logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len, + self._dbname)) + + for doc_id in design_docs: + doc = self._cdb[doc_id] + for key in ['lists', 'views', 'updates']: + if key in design_docs[doc_id]: + doc[key] = design_docs[doc_id][key] + self._cdb.save(doc) + + # release the semaphore + self._release_fun() + + +db_idx = 0 +db_len = len(server) +for dbname in server: + + db_idx += 1 + + if not (dbname.startswith('user-') or dbname == 'shared') \ + or dbname == 'user-test-db': + logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) + continue + + + # get access to couch db + cdb = Server(url.geturl())[dbname] + + #--------------------------------------------------------------------- + # Start DB worker thread + #--------------------------------------------------------------------- + semaphore_pool.acquire() + thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release) + thread.daemon = True + thread.start() + threads.append(thread) + +map(lambda thread: thread.join(), threads) -- cgit v1.2.3 From 94ec89384abeba52d660addf3528b11a9aa5a8b0 Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 11 Mar 2014 16:14:26 -0300 Subject: Add script to measure PUT memory usage. --- scripts/client_side_db.py | 145 ------------------ scripts/db_access/client_side_db.py | 145 ++++++++++++++++++ scripts/db_access/server_side_db.py | 42 +++++ .../doc_put_memory_usage/find_max_upload_size.py | 169 +++++++++++++++++++++ scripts/doc_put_memory_usage/get-mem.py | 16 ++ scripts/doc_put_memory_usage/plot-mem.py | 73 +++++++++ scripts/find_max_upload_size.py | 169 --------------------- scripts/server_side_db.py | 42 ----- 8 files changed, 445 insertions(+), 356 deletions(-) delete mode 100644 scripts/client_side_db.py create mode 100644 scripts/db_access/client_side_db.py create mode 100644 scripts/db_access/server_side_db.py create mode 100755 scripts/doc_put_memory_usage/find_max_upload_size.py create mode 100755 scripts/doc_put_memory_usage/get-mem.py create mode 100755 scripts/doc_put_memory_usage/plot-mem.py delete mode 100755 scripts/find_max_upload_size.py delete mode 100644 scripts/server_side_db.py (limited to 'scripts') diff --git a/scripts/client_side_db.py b/scripts/client_side_db.py deleted file mode 100644 index 15980f5d..00000000 --- a/scripts/client_side_db.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/python - -# This script gives client-side access to one Soledad user database. - - -import sys -import os -import argparse -import re -import tempfile -import getpass -import requests -import json -import srp._pysrp as srp -import binascii - -from leap.common.config import get_path_prefix -from leap.soledad.client import Soledad - - -safe_unhexlify = lambda x: binascii.unhexlify(x) if ( - len(x) % 2 == 0) else binascii.unhexlify('0' + x) - - -def fail(reason): - print 'Fail: ' + reason - exit(2) - - -def get_api_info(provider): - info = requests.get( - 'https://'+provider+'/provider.json', verify=False).json() - return info['api_uri'], info['api_version'] - - -def login(username, passphrase, provider, api_uri, api_version): - usr = srp.User(username, passphrase, srp.SHA256, srp.NG_1024) - auth = None - try: - auth = authenticate(api_uri, api_version, usr).json() - except requests.exceptions.ConnectionError: - fail('Could not connect to server.') - if 'errors' in auth: - fail(str(auth['errors'])) - return api_uri, api_version, auth - - -def authenticate(api_uri, api_version, usr): - api_url = "%s/%s" % (api_uri, api_version) - session = requests.session() - uname, A = usr.start_authentication() - params = {'login': uname, 'A': binascii.hexlify(A)} - init = session.post( - api_url + '/sessions', data=params, verify=False).json() - if 'errors' in init: - fail('test user not found') - M = usr.process_challenge( - safe_unhexlify(init['salt']), safe_unhexlify(init['B'])) - return session.put(api_url + '/sessions/' + uname, verify=False, - data={'client_auth': binascii.hexlify(M)}) - - -def get_soledad_info(username, provider, passphrase, basedir): - api_uri, api_version = get_api_info(provider) - auth = login(username, passphrase, provider, api_uri, api_version) - # get soledad server url - service_url = '%s/%s/config/soledad-service.json' % \ - (api_uri, api_version) - soledad_hosts = requests.get(service_url, verify=False).json()['hosts'] - hostnames = soledad_hosts.keys() - # allow for choosing the host - host = hostnames[0] - if len(hostnames) > 1: - i = 1 - print "There are many available hosts:" - for h in hostnames: - print " (%d) %s.%s" % (i, h, provider) - i += 1 - choice = raw_input("Choose a host to use (default: 1): ") - if choice != '': - host = hostnames[int(choice) - 1] - server_url = 'https://%s:%d/user-%s' % \ - (soledad_hosts[host]['hostname'], soledad_hosts[host]['port'], - auth[2]['id']) - # get provider ca certificate - ca_cert = requests.get('https://%s/ca.crt' % provider, verify=False).text - cert_file = os.path.join(basedir, 'ca.crt') - with open(cert_file, 'w') as f: - f.write(ca_cert) - return auth[2]['id'], server_url, cert_file, auth[2]['token'] - - -def get_soledad_instance(username, provider, passphrase, basedir): - # setup soledad info - uuid, server_url, cert_file, token = \ - get_soledad_info(username, provider, passphrase, basedir) - secrets_path = os.path.join( - basedir, '%s.secret' % uuid) - local_db_path = os.path.join( - basedir, '%s.db' % uuid) - # instantiate soledad - return Soledad( - uuid, - unicode(passphrase), - secrets_path=secrets_path, - local_db_path=local_db_path, - server_url=server_url, - cert_file=cert_file, - auth_token=token) - - -# main program - -if __name__ == '__main__': - - class ValidateUserHandle(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') - res = m.match(values) - if res == None: - parser.error('User handle should have the form user@provider.') - setattr(namespace, 'username', res.groups()[0]) - setattr(namespace, 'provider', res.groups()[1]) - - # parse command line - parser = argparse.ArgumentParser() - parser.add_argument( - 'user@provider', action=ValidateUserHandle, help='the user handle') - parser.add_argument( - '-b', dest='basedir', required=False, default=None, help='the user handle') - args = parser.parse_args() - - # get the password - passphrase = getpass.getpass( - 'Password for %s@%s: ' % (args.username, args.provider)) - - # get the basedir - basedir = args.basedir - if basedir is None: - basedir = tempfile.mkdtemp() - print 'Using %s as base directory.' % basedir - - # get the soledad instance - s = get_soledad_instance( - args.username, args.provider, passphrase, basedir) diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py new file mode 100644 index 00000000..15980f5d --- /dev/null +++ b/scripts/db_access/client_side_db.py @@ -0,0 +1,145 @@ +#!/usr/bin/python + +# This script gives client-side access to one Soledad user database. + + +import sys +import os +import argparse +import re +import tempfile +import getpass +import requests +import json +import srp._pysrp as srp +import binascii + +from leap.common.config import get_path_prefix +from leap.soledad.client import Soledad + + +safe_unhexlify = lambda x: binascii.unhexlify(x) if ( + len(x) % 2 == 0) else binascii.unhexlify('0' + x) + + +def fail(reason): + print 'Fail: ' + reason + exit(2) + + +def get_api_info(provider): + info = requests.get( + 'https://'+provider+'/provider.json', verify=False).json() + return info['api_uri'], info['api_version'] + + +def login(username, passphrase, provider, api_uri, api_version): + usr = srp.User(username, passphrase, srp.SHA256, srp.NG_1024) + auth = None + try: + auth = authenticate(api_uri, api_version, usr).json() + except requests.exceptions.ConnectionError: + fail('Could not connect to server.') + if 'errors' in auth: + fail(str(auth['errors'])) + return api_uri, api_version, auth + + +def authenticate(api_uri, api_version, usr): + api_url = "%s/%s" % (api_uri, api_version) + session = requests.session() + uname, A = usr.start_authentication() + params = {'login': uname, 'A': binascii.hexlify(A)} + init = session.post( + api_url + '/sessions', data=params, verify=False).json() + if 'errors' in init: + fail('test user not found') + M = usr.process_challenge( + safe_unhexlify(init['salt']), safe_unhexlify(init['B'])) + return session.put(api_url + '/sessions/' + uname, verify=False, + data={'client_auth': binascii.hexlify(M)}) + + +def get_soledad_info(username, provider, passphrase, basedir): + api_uri, api_version = get_api_info(provider) + auth = login(username, passphrase, provider, api_uri, api_version) + # get soledad server url + service_url = '%s/%s/config/soledad-service.json' % \ + (api_uri, api_version) + soledad_hosts = requests.get(service_url, verify=False).json()['hosts'] + hostnames = soledad_hosts.keys() + # allow for choosing the host + host = hostnames[0] + if len(hostnames) > 1: + i = 1 + print "There are many available hosts:" + for h in hostnames: + print " (%d) %s.%s" % (i, h, provider) + i += 1 + choice = raw_input("Choose a host to use (default: 1): ") + if choice != '': + host = hostnames[int(choice) - 1] + server_url = 'https://%s:%d/user-%s' % \ + (soledad_hosts[host]['hostname'], soledad_hosts[host]['port'], + auth[2]['id']) + # get provider ca certificate + ca_cert = requests.get('https://%s/ca.crt' % provider, verify=False).text + cert_file = os.path.join(basedir, 'ca.crt') + with open(cert_file, 'w') as f: + f.write(ca_cert) + return auth[2]['id'], server_url, cert_file, auth[2]['token'] + + +def get_soledad_instance(username, provider, passphrase, basedir): + # setup soledad info + uuid, server_url, cert_file, token = \ + get_soledad_info(username, provider, passphrase, basedir) + secrets_path = os.path.join( + basedir, '%s.secret' % uuid) + local_db_path = os.path.join( + basedir, '%s.db' % uuid) + # instantiate soledad + return Soledad( + uuid, + unicode(passphrase), + secrets_path=secrets_path, + local_db_path=local_db_path, + server_url=server_url, + cert_file=cert_file, + auth_token=token) + + +# main program + +if __name__ == '__main__': + + class ValidateUserHandle(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') + res = m.match(values) + if res == None: + parser.error('User handle should have the form user@provider.') + setattr(namespace, 'username', res.groups()[0]) + setattr(namespace, 'provider', res.groups()[1]) + + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + 'user@provider', action=ValidateUserHandle, help='the user handle') + parser.add_argument( + '-b', dest='basedir', required=False, default=None, help='the user handle') + args = parser.parse_args() + + # get the password + passphrase = getpass.getpass( + 'Password for %s@%s: ' % (args.username, args.provider)) + + # get the basedir + basedir = args.basedir + if basedir is None: + basedir = tempfile.mkdtemp() + print 'Using %s as base directory.' % basedir + + # get the soledad instance + s = get_soledad_instance( + args.username, args.provider, passphrase, basedir) diff --git a/scripts/db_access/server_side_db.py b/scripts/db_access/server_side_db.py new file mode 100644 index 00000000..18641a0f --- /dev/null +++ b/scripts/db_access/server_side_db.py @@ -0,0 +1,42 @@ +#!/usr/bin/python + +# This script gives server-side access to one Soledad user database by using +# the configuration stored in /etc/leap/soledad-server.conf. +# +# Use it like this: +# +# python -i server-side-db.py + +import sys +from ConfigParser import ConfigParser + +from leap.soledad.common.couch import CouchDatabase + +if len(sys.argv) != 2: + print 'Usage: %s ' % sys.argv[0] + exit(1) + +uuid = sys.argv[1] + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = cp.get('soledad-server', 'couch_url') + +# access user db +dbname = 'user-%s' % uuid +db = CouchDatabase(url, dbname) + +# get replica info +replica_uid = db._replica_uid +gen, docs = db.get_all_docs() +print "dbname: %s" % dbname +print "replica_uid: %s" % replica_uid +print "generation: %d" % gen + +# get relevant docs +schemes = map(lambda d: d.content['_enc_scheme'], docs) +pubenc = filter(lambda d: d.content['_enc_scheme'] == 'pubkey', docs) + +print "total number of docs: %d" % len(docs) +print "pubkey encrypted docs: %d" % len(pubenc) diff --git a/scripts/doc_put_memory_usage/find_max_upload_size.py b/scripts/doc_put_memory_usage/find_max_upload_size.py new file mode 100755 index 00000000..02c68015 --- /dev/null +++ b/scripts/doc_put_memory_usage/find_max_upload_size.py @@ -0,0 +1,169 @@ +#!/usr/bin/python + +# This script finds the maximum upload size for a document in the current +# server. It pulls couch URL from Soledad config file and attempts multiple +# PUTs until it finds the maximum size supported by the server. +# +# As the Soledad couch user is not an admin, you have to pass a database into +# which the test will be run. The database should already exist and be +# initialized with soledad design documents. +# +# Use it like this: +# +# ./find_max_upload_size.py +# ./find_max_upload_size.py -h + +import os +import configparser +import logging +import argparse +import random +import string +import binascii +import json +import time +import uuid + + +from couchdb.client import Database +from socket import error as socket_error +from leap.soledad.common.couch import CouchDatabase + + +SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' +PREFIX = '/tmp/soledad_test' +LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' +RETRIES = 3 # number of times to retry uploading a document of a certain + # size after a failure + + +# configure logger +logger = logging.getLogger(__name__) + + +def config_log(level): + logging.basicConfig(format=LOG_FORMAT, level=level) + + +def log_to_file(filename): + handler = logging.FileHandler(filename, mode='a') + handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) + logger.addHandler(handler) + + +# create test dir +if not os.path.exists(PREFIX): + os.mkdir(PREFIX) + + +def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): + config = configparser.ConfigParser() + config.read(config_file) + return config['soledad-server']['couch_url'] + + +# generate or load an uploadable doc with the given size in mb +def get_content(size): + fname = os.path.join(PREFIX, 'content-%d.json' % size) + if os.path.exists(fname): + logger.debug('Loading content with %d MB...' % size) + with open(fname, 'r') as f: + return f.read() + else: + length = int(size * 1024 ** 2) + logger.debug('Generating body with %d MB...' % size) + content = binascii.hexlify(os.urandom(length))[:length] + with open(fname, 'w') as f: + f.write(content) + return content + + +def delete_doc(db): + doc = db.get('largedoc') + db.delete(doc) + + +def upload(db, size, couch_db): + # try many times to be sure that size is infeasible + for i in range(RETRIES): + # wait until server is up to upload + while True: + try: + 'largedoc' in couch_db + break + except socket_error: + logger.debug('Waiting for server to come up...') + time.sleep(1) + # attempt to upload + try: + logger.debug( + 'Trying to upload %d MB document (attempt %d/%d)...' % + (size, (i+1), RETRIES)) + content = get_content(size) + logger.debug('Starting upload of %d bytes.' % len(content)) + doc = db.create_doc({'data': content}, doc_id='largedoc') + delete_doc(couch_db) + logger.debug('Success uploading %d MB doc.' % size) + return True + except Exception as e: + logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) + return False + + +def find_max_upload_size(db_uri): + db = CouchDatabase.open_database(db_uri, False) + couch_db = Database(db_uri) + logger.debug('Database URI: %s' % db_uri) + # delete eventual leftover from last run + if 'largedoc' in couch_db: + delete_doc(couch_db) + # phase 1: increase upload size exponentially + logger.info('Starting phase 1: increasing size exponentially.') + size = 1 + #import ipdb; ipdb.set_trace() + while True: + if upload(db, size, couch_db): + size *= 2 + else: + break + + # phase 2: binary search for maximum value + unable = size + able = size / 2 + logger.info('Starting phase 2: binary search for maximum value.') + while unable - able > 1: + size = able + ((unable - able) / 2) + if upload(db, size, couch_db): + able = size + else: + unable = size + return able + + +if __name__ == '__main__': + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + '-d', action='store_true', dest='debug', + help='print debugging information') + parser.add_argument( + '-l', dest='logfile', + help='log output to file') + parser.add_argument( + 'db_uri', help='the couch database URI to test') + args = parser.parse_args() + + # log to file + if args.logfile is not None: + log_to_file(args.logfile) + + # set loglevel + if args.debug is True: + config_log(logging.DEBUG) + else: + config_log(logging.INFO) + + # run test and report + logger.info('Will test using db at %s.' % args.db_uri) + maxsize = find_max_upload_size(args.db_uri) + logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/doc_put_memory_usage/get-mem.py b/scripts/doc_put_memory_usage/get-mem.py new file mode 100755 index 00000000..d64875fc --- /dev/null +++ b/scripts/doc_put_memory_usage/get-mem.py @@ -0,0 +1,16 @@ +#!/usr/bin/python + + +import psutil +import time + + +delta = 50 * 60 +start = time.time() + +while True: + now = time.time() + print "%s %s" % (now - start, psutil.phymem_usage().used) + time.sleep(0.1) + if now > start + delta: + break diff --git a/scripts/doc_put_memory_usage/plot-mem.py b/scripts/doc_put_memory_usage/plot-mem.py new file mode 100755 index 00000000..e24679a2 --- /dev/null +++ b/scripts/doc_put_memory_usage/plot-mem.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt + + +files = [ + ('local', 'couchdb-json', 'b'), + ('local', 'bigcouch-json', 'r'), + ('local', 'couchdb-multipart', 'g'), + ('local', 'bigcouch-multipart', 'm'), +] + + +# config the plot +plt.xlabel('time') +plt.ylabel('memory usage') +plt.title('bigcouch versus couch memory usage') + + +for fi in files: + + machine = fi[0] + database = fi[1] + color = fi[2] + filename = '%s-%s.txt' % (machine, database) + + x = [] + y = [] + + xmax = None + xmin = None + ymax = None + ymin = None + + # read data from file + with open(filename, 'r') as f: + line = f.readline() + while line is not None: + time, mem = tuple(line.strip().split(' ')) + mem = float(mem) / (10**9) + x.append(float(time)) + y.append(mem) + if ymax == None or mem > ymax: + ymax = mem + xmax = time + if ymin == None or mem < ymin: + ymin = mem + xmin = time + line = f.readline() + if line == '': + break + + kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', + # 'marker': '.', + 'color': color, + } + plt.plot(x, y, label=database, **kwargs) + + #plt.axes().get_xaxis().set_ticks(x) + #plt.axes().get_xaxis().set_ticklabels(x) + + # annotate max and min values + #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) + #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/find_max_upload_size.py b/scripts/find_max_upload_size.py deleted file mode 100755 index 8abeee78..00000000 --- a/scripts/find_max_upload_size.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/python - -# This script finds the maximum upload size for a document in the current -# server. It pulls couch URL from Soledad config file and attempts multiple -# PUTs until it finds the maximum size supported by the server. -# -# As the Soledad couch user is not an admin, you have to pass a database into -# which the test will be run. The database should already exist and be -# initialized with soledad design documents. -# -# Use it like this: -# -# ./find_max_upload_size.py -# ./find_max_upload_size.py -h - -import os -import configparser -import couchdb -import logging -import argparse -import random -import string -import binascii -import json - - -SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' -PREFIX = '/tmp/soledad_test' -LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' - - -# configure logger -logger = logging.getLogger(__name__) - - -def config_log(level): - logging.basicConfig(format=LOG_FORMAT, level=level) - - -def log_to_file(filename): - handler = logging.FileHandler(filename, mode='a') - handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) - logger.addHandler(handler) - - -# create test dir -if not os.path.exists(PREFIX): - os.mkdir(PREFIX) - - -def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): - config = configparser.ConfigParser() - config.read(config_file) - return config['soledad-server']['couch_url'] - - -# generate or load an uploadable doc with the given size in mb -def gen_body(size): - if os.path.exists( - os.path.join(PREFIX, 'body-%d.json' % size)): - logger.debug('Loading body with %d MB...' % size) - with open(os.path.join(PREFIX, 'body-%d.json' % size), 'r') as f: - return json.loads(f.read()) - else: - length = int(size * 1024 ** 2) - hexdata = binascii.hexlify(os.urandom(length))[:length] - body = { - 'couch_rev': None, - 'u1db_rev': '1', - 'content': hexdata, - 'trans_id': '1', - 'conflicts': None, - 'update_conflicts': False, - } - logger.debug('Generating body with %d MB...' % size) - with open(os.path.join(PREFIX, 'body-%d.json' % size), 'w+') as f: - f.write(json.dumps(body)) - return body - - -def delete_doc(db): - doc = db.get('largedoc') - db.delete(doc) - - -def upload(db, size): - ddoc_path = ['_design', 'docs', '_update', 'put', 'largedoc'] - resource = db.resource(*ddoc_path) - body = gen_body(size) - try: - logger.debug('Uploading %d MB body...' % size) - response = resource.put_json( - body=body, - headers={'content-type': 'application/json'}) - # the document might have been updated in between, so we check for - # the return message - msg = response[2].read() - if msg == 'ok': - delete_doc(db) - logger.debug('Success uploading %d MB doc.' % size) - return True - else: - # should not happen - logger.error('Unexpected error uploading %d MB doc: %s' % (size, msg)) - return False - except Exception as e: - logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) - return False - - -def find_max_upload_size(dbname): - couch_url = get_couch_url() - db_url = '%s/%s' % (couch_url, dbname) - logger.debug('Couch URL: %s' % db_url) - # get a 'raw' couch handler - server = couchdb.client.Server(couch_url) - db = server[dbname] - # delete eventual leftover from last run - largedoc = db.get('largedoc') - if largedoc is not None: - db.delete(largedoc) - # phase 1: increase upload size exponentially - logger.info('Starting phase 1: increasing size exponentially.') - size = 1 - while True: - if upload(db, size): - size *= 2 - else: - break - # phase 2: binary search for maximum value - unable = size - able = size / 2 - logger.info('Starting phase 2: binary search for maximum value.') - while unable - able > 1: - size = able + ((unable - able) / 2) - if upload(db, size): - able = size - else: - unable = size - return able - - -if __name__ == '__main__': - # parse command line - parser = argparse.ArgumentParser() - parser.add_argument( - '-d', action='store_true', dest='debug', - help='print debugging information') - parser.add_argument( - '-l', dest='logfile', - help='log output to file') - parser.add_argument( - 'dbname', help='the name of the database to test in') - args = parser.parse_args() - - # log to file - if args.logfile is not None: - add_file_handler(args.logfile) - - # set loglevel - if args.debug is True: - config_log(logging.DEBUG) - else: - config_log(logging.INFO) - - # run test and report - logger.info('Will test using db %s.' % args.dbname) - maxsize = find_max_upload_size(args.dbname) - logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/server_side_db.py b/scripts/server_side_db.py deleted file mode 100644 index 18641a0f..00000000 --- a/scripts/server_side_db.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/python - -# This script gives server-side access to one Soledad user database by using -# the configuration stored in /etc/leap/soledad-server.conf. -# -# Use it like this: -# -# python -i server-side-db.py - -import sys -from ConfigParser import ConfigParser - -from leap.soledad.common.couch import CouchDatabase - -if len(sys.argv) != 2: - print 'Usage: %s ' % sys.argv[0] - exit(1) - -uuid = sys.argv[1] - -# get couch url -cp = ConfigParser() -cp.read('/etc/leap/soledad-server.conf') -url = cp.get('soledad-server', 'couch_url') - -# access user db -dbname = 'user-%s' % uuid -db = CouchDatabase(url, dbname) - -# get replica info -replica_uid = db._replica_uid -gen, docs = db.get_all_docs() -print "dbname: %s" % dbname -print "replica_uid: %s" % replica_uid -print "generation: %d" % gen - -# get relevant docs -schemes = map(lambda d: d.content['_enc_scheme'], docs) -pubenc = filter(lambda d: d.content['_enc_scheme'] == 'pubkey', docs) - -print "total number of docs: %d" % len(docs) -print "pubkey encrypted docs: %d" % len(pubenc) -- cgit v1.2.3 From 3337f48c810df45aac7d3009b49b4b2a34ef019d Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 11 Mar 2014 16:14:39 -0300 Subject: Add scripts to measure backend cpu usage. --- scripts/backends_cpu_usage/log_cpu_usage.py | 46 ++++++ scripts/backends_cpu_usage/movingaverage.py | 209 +++++++++++++++++++++++++++ scripts/backends_cpu_usage/plot.py | 81 +++++++++++ scripts/backends_cpu_usage/test_u1db_sync.py | 113 +++++++++++++++ 4 files changed, 449 insertions(+) create mode 100755 scripts/backends_cpu_usage/log_cpu_usage.py create mode 100644 scripts/backends_cpu_usage/movingaverage.py create mode 100755 scripts/backends_cpu_usage/plot.py create mode 100755 scripts/backends_cpu_usage/test_u1db_sync.py (limited to 'scripts') diff --git a/scripts/backends_cpu_usage/log_cpu_usage.py b/scripts/backends_cpu_usage/log_cpu_usage.py new file mode 100755 index 00000000..2674e1ff --- /dev/null +++ b/scripts/backends_cpu_usage/log_cpu_usage.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + + +# Get the CPU usage and print to file. + + +import psutil +import time +import argparse +import os +import threading + + +class LogCpuUsage(threading.Thread): + + def __init__(self, fname): + threading.Thread.__init__(self) + self._stopped = True + self._fname = fname + + def run(self): + self._stopped = False + with open(self._fname, 'w') as f: + start = time.time() + while self._stopped is False: + now = time.time() + f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) + time.sleep(0.01) + + def stop(self): + self._stopped = True + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('file', help='where to save output') + args = parser.parse_args() + + if os.path.isfile(args.file): + replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) + if replace.lower() != 'y': + print 'Bailing out.' + exit(1) + + log_cpu = LogCpuUsage(args.file) + log_cpu.run() diff --git a/scripts/backends_cpu_usage/movingaverage.py b/scripts/backends_cpu_usage/movingaverage.py new file mode 100644 index 00000000..bac1b3e1 --- /dev/null +++ b/scripts/backends_cpu_usage/movingaverage.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +# Sean Reifschneider, tummy.com, ltd. +# Released into the Public Domain, 2011-02-06 + +import itertools +from itertools import islice +from collections import deque + + +######################################################### +def movingaverage(data, subset_size, data_is_list = None, + avoid_fp_drift = True): + '''Return the moving averages of the data, with a window size of + `subset_size`. `subset_size` must be an integer greater than 0 and + less than the length of the input data, or a ValueError will be raised. + + `data_is_list` can be used to tune the algorithm for list or iteratable + as an input. The default value, `None` will auto-detect this. + The algorithm used if `data` is a list is almost twice as fast as if + it is an iteratable. + + `avoid_fp_drift`, if True (the default) sums every sub-set rather than + keeping a "rolling sum" (which may be subject to floating-point drift). + While more correct, it is also dramatically slower for subset sizes + much larger than 20. + + NOTE: You really should consider setting `avoid_fp_drift = False` unless + you are dealing with very small numbers (say, far smaller than 0.00001) + or require extreme accuracy at the cost of execution time. For + `subset_size` < 20, the performance difference is very small. + ''' + if subset_size < 1: + raise ValueError('subset_size must be 1 or larger') + + if data_is_list is None: + data_is_list = hasattr(data, '__getslice__') + + divisor = float(subset_size) + if data_is_list: + # This only works if we can re-access old elements, but is much faster. + # In other words, it can't be just an iterable, it needs to be a list. + + if subset_size > len(data): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + for x in range(subset_size, len(data) + 1): + yield sum(data[x - subset_size:x]) / divisor + else: + cur = sum(data[0:subset_size]) + yield cur / divisor + for x in range(subset_size, len(data)): + cur += data[x] - data[x - subset_size] + yield cur / divisor + else: + # Based on the recipe at: + # http://docs.python.org/library/collections.html#deque-recipes + it = iter(data) + d = deque(islice(it, subset_size)) + + if subset_size > len(d): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + yield sum(d) / divisor + for elem in it: + d.popleft() + d.append(elem) + yield sum(d) / divisor + else: + s = sum(d) + yield s / divisor + for elem in it: + s += elem - d.popleft() + d.append(elem) + yield s / divisor + + +########################## +if __name__ == '__main__': + import unittest + + class TestMovingAverage(unittest.TestCase): + #################### + def test_List(self): + try: + list(movingaverage([1,2,3], 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True)), [40.0,42.0,45.0,43.0]) + + + ###################### + def test_XRange(self): + try: + list(movingaverage(xrange(1, 4), 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) + + + ########################### + def test_ListRolling(self): + try: + list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, + avoid_fp_drift = False)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, + avoid_fp_drift = False)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + + + ############################# + def test_XRangeRolling(self): + try: + list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6, + avoid_fp_drift = False)), [3.5]) + + + ###################################################################### + suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) + unittest.TextTestRunner(verbosity = 2).run(suite) + diff --git a/scripts/backends_cpu_usage/plot.py b/scripts/backends_cpu_usage/plot.py new file mode 100755 index 00000000..4e5083ad --- /dev/null +++ b/scripts/backends_cpu_usage/plot.py @@ -0,0 +1,81 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt +from movingaverage import movingaverage + + +def smooth(l): + return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) + + +files = [ + ('sqlite', 'b'), + ('sqlcipher', 'r'), + ('u1dblite', 'g'), + ('u1dbcipher', 'm'), +] + + +# config the plot +plt.xlabel('time (s)') +plt.ylabel('cpu usage (%)') +plt.title('u1db backends CPU usage') + + +for fi in files: + + backend = fi[0] + color = fi[1] + filename = '%s.txt' % backend + + x = [] + y = [] + + xmax = None + xmin = None + ymax = None + ymin = None + + # read data from file + with open(filename, 'r') as f: + line = f.readline() + while line is not None: + time, cpu = tuple(line.strip().split(' ')) + cpu = float(cpu) + x.append(float(time)) + y.append(cpu) + if ymax == None or cpu > ymax: + ymax = cpu + xmax = time + if ymin == None or cpu < ymin: + ymin = cpu + xmin = time + line = f.readline() + if line == '': + break + + kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', + # 'marker': '.', + 'color': color, + } + plt.plot( + [n for n in smooth(x)], + [n for n in smooth(y)], + label=backend, **kwargs) + + #plt.axes().get_xaxis().set_ticks(x) + #plt.axes().get_xaxis().set_ticklabels(x) + + # annotate max and min values + #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) + #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.ylim(0, 100) +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/backends_cpu_usage/test_u1db_sync.py b/scripts/backends_cpu_usage/test_u1db_sync.py new file mode 100755 index 00000000..26ef8f9f --- /dev/null +++ b/scripts/backends_cpu_usage/test_u1db_sync.py @@ -0,0 +1,113 @@ +#!/usr/bin/python + + +import u1db +import tempfile +import logging +import shutil +import os +import argparse +import time +import binascii +import random + + +from leap.soledad.client.sqlcipher import open as sqlcipher_open +from log_cpu_usage import LogCpuUsage +from u1dblite import open as u1dblite_open +from u1dbcipher import open as u1dbcipher_open + + +DOCS_TO_SYNC = 1000 +SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB +BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB + + +def get_data(size): + return binascii.hexlify(os.urandom(size/2)) + + +def run_test(testname, open_fun, tempdir, docs, *args): + logger.info('Starting test \"%s\".' % testname) + + # instantiate dbs + db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) + db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) + + # get sync target and synchsonizer + target = db2.get_sync_target() + synchronizer = u1db.sync.Synchronizer(db1, target) + + + # generate lots of small documents + logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) + for content in docs: + db1.create_doc(content) + logger.info('%d documents created in source db.' % DOCS_TO_SYNC) + + # run the test + filename = testname + '.txt' + logger.info('Logging CPU usage to %s.' % filename) + log_cpu = LogCpuUsage(filename) + tstart = time.time() + + # start logging cpu + log_cpu.start() + logger.info('Sleeping for 5 seconds...') + time.sleep(5) + + # sync + logger.info('Starting sync...') + sstart = time.time() + synchronizer.sync() + send = time.time() + logger.info('Sync finished.') + + # stop logging cpu + logger.info('Sleeping for 5 seconds...') + time.sleep(5) + tend = time.time() + log_cpu.stop() + + # report + logger.info('Total sync time: %f seconds' % (send - sstart)) + logger.info('Total test time: %f seconds' % (tend - tstart)) + logger.info('Finished test \"%s\".' % testname) + + # close dbs + db1.close() + db2.close() + + +if __name__ == '__main__': + + # configure logger + logger = logging.getLogger(__name__) + LOG_FORMAT = '%(asctime)s %(message)s' + logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + + # get a temporary dir + tempdir = tempfile.mkdtemp() + logger.info('Using temporary directory %s' % tempdir) + + + # create a lot of documents with random sizes + docs = [] + for i in xrange(DOCS_TO_SYNC): + docs.append({ + 'index': i, + #'data': get_data( + # random.randrange( + # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) + }) + + # run tests + run_test('sqlite', u1db.open, tempdir, docs, True) + run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) + run_test('u1dblite', u1dblite_open, tempdir, docs) + run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) + + # remove temporary dir + logger.info('Removing temporary directory %s' % tempdir) + shutil.rmtree(tempdir) -- cgit v1.2.3 From 5a2e9ac138faca940e10920be008a229a7a54cca Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 14 Mar 2014 16:04:28 -0300 Subject: Improve client db access script. --- scripts/db_access/client_side_db.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py index 15980f5d..2bf4ab5e 100644 --- a/scripts/db_access/client_side_db.py +++ b/scripts/db_access/client_side_db.py @@ -13,17 +13,24 @@ import requests import json import srp._pysrp as srp import binascii +import logging from leap.common.config import get_path_prefix from leap.soledad.client import Soledad +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + safe_unhexlify = lambda x: binascii.unhexlify(x) if ( len(x) % 2 == 0) else binascii.unhexlify('0' + x) def fail(reason): - print 'Fail: ' + reason + logger.error('Fail: ' + reason) exit(2) @@ -94,6 +101,8 @@ def get_soledad_instance(username, provider, passphrase, basedir): # setup soledad info uuid, server_url, cert_file, token = \ get_soledad_info(username, provider, passphrase, basedir) + logger.info('UUID is %s' % uuid) + logger.info('Server URL is %s' % server_url) secrets_path = os.path.join( basedir, '%s.secret' % uuid) local_db_path = os.path.join( @@ -138,7 +147,7 @@ if __name__ == '__main__': basedir = args.basedir if basedir is None: basedir = tempfile.mkdtemp() - print 'Using %s as base directory.' % basedir + logger.info('Using %s as base directory.' % basedir) # get the soledad instance s = get_soledad_instance( -- cgit v1.2.3 From 4dd32df24959b3f6062f525a3bba5cd592729107 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 24 Mar 2014 12:38:44 -0300 Subject: Add script for resetting users database. --- scripts/db_access/reset_db.py | 79 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 scripts/db_access/reset_db.py (limited to 'scripts') diff --git a/scripts/db_access/reset_db.py b/scripts/db_access/reset_db.py new file mode 100644 index 00000000..80871856 --- /dev/null +++ b/scripts/db_access/reset_db.py @@ -0,0 +1,79 @@ +#!/usr/bin/python + +# This script can be run on server side to completelly reset a user database. +# +# WARNING: running this script over a database will delete all documents but +# the one with id u1db_config (which contains db metadata) and design docs +# needed for couch backend. + + +import sys +from ConfigParser import ConfigParser +import threading +import logging +from couchdb import Database as CouchDatabase + + +if len(sys.argv) != 2: + print 'Usage: %s ' % sys.argv[0] + exit(1) + +uuid = sys.argv[1] + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = cp.get('soledad-server', 'couch_url') + + +# confirm +yes = raw_input("Are you sure you want to reset the database for user %s " + "(type YES)? " % uuid) +if yes != 'YES': + print 'Bailing out...' + exit(2) + + +db = CouchDatabase('%s/user-%s' % (url, uuid)) + + +class _DeleterThread(threading.Thread): + + def __init__(self, db, doc_id, release_fun): + threading.Thread.__init__(self) + self._db = db + self._doc_id = doc_id + self._release_fun = release_fun + + def run(self): + logger.info('[%s] deleting doc...' % self._doc_id) + del self._db[self._doc_id] + logger.info('[%s] done.' % self._doc_id) + self._release_fun() + + +semaphore_pool = threading.BoundedSemaphore(value=20) + + +threads = [] +for doc_id in db: + if doc_id != 'u1db_config' and not doc_id.startswith('_design'): + semaphore_pool.acquire() + logger.info('[main] launching thread for doc: %s' % doc_id) + t = _DeleterThread(db, doc_id, semaphore_pool.release) + t.start() + threads.append(t) + + +logger.info('[main] waiting for threads.') +map(lambda thread: thread.join(), threads) + + +logger.info('[main] done.') -- cgit v1.2.3 From d028eecfcbb8a137d5f4896a4bf7268c3eefd1da Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 27 Mar 2014 10:53:20 -0300 Subject: Add test script for syncing many replicas concurrently (#5386). --- scripts/profiling/sync/sync-many.py | 125 ++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 scripts/profiling/sync/sync-many.py (limited to 'scripts') diff --git a/scripts/profiling/sync/sync-many.py b/scripts/profiling/sync/sync-many.py new file mode 100644 index 00000000..3666df2c --- /dev/null +++ b/scripts/profiling/sync/sync-many.py @@ -0,0 +1,125 @@ +#!/usr/bin/python + +# The purpose of this script is to stress a soledad server by: +# +# - Instantiating multiple clients. +# - Creating many documents in each client. +# - Syncing all at the same time with th server multiple times, until +# they've all reached an agreement on the state of the databases and +# there's nothing else to be synced. + + +import threading +import tempfile +import argparse +import logging +import re +import getpass +import time +import shutil + + +from client_side_db import get_soledad_instance + + +from leap.soledad.client import BootstrapSequenceError + + +NUMBER_OF_REPLICAS = 5 +DOCUMENTS_PER_REPLICA = 10 + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +class WorkerThread(threading.Thread): + + def __init__(self, thread_id, soledad, all_set): + threading.Thread.__init__(self) + self._id = thread_id + self._soledad = soledad + self._all_set = all_set + self._done_creating = threading.Event() + + def run(self): + # create many documents + logger.info('[replica %d] creating documents...' % self._id) + for i in xrange(DOCUMENTS_PER_REPLICA): + self._soledad.create_doc({'a_doc': i}) + # wait for others + self._done_creating.set() + logger.info('[replica %d] done creating documents.' % self._id) + self._all_set.wait() + # sync + successes = 0 + while True: + logger.info('[replica %d] syncing.' % self._id) + if self._id == 1: + time.sleep(5) + old_gen = self._soledad.sync() + logger.info('[replica %d] synced.' % self._id) + new_gen = self._soledad._db._get_generation() + logger.info('[replica %d] old gen %d - new gen %d.' % + (self._id, old_gen, new_gen)) + if old_gen == new_gen: + successes += 1 + logger.info('[replica %d] sync not needed.' % self._id) + if successes == 3: + break + + +def stress_test(username, provider, passphrase, basedir): + threads = [] + all_set = threading.Event() + for i in xrange(NUMBER_OF_REPLICAS): + logging.info('[main] starting thread %d.' % i) + s = get_soledad_instance( + username, + provider, + passphrase, + tempfile.mkdtemp(dir=basedir)) + t = WorkerThread(i, s, all_set) + t.start() + threads.append(t) + map(lambda t: t._done_creating.wait(), threads) + all_set.set() + map(lambda t: t.join(), threads) + logger.info('Removing dir %s' % basedir) + shutil.rmtree(basedir) + + +# main program + +if __name__ == '__main__': + + class ValidateUserHandle(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') + res = m.match(values) + if res == None: + parser.error('User handle should have the form user@provider.') + setattr(namespace, 'username', res.groups()[0]) + setattr(namespace, 'provider', res.groups()[1]) + + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + 'user@provider', action=ValidateUserHandle, help='the user handle') + parser.add_argument( + '-b', dest='basedir', required=False, default=None, help='the user handle') + args = parser.parse_args() + + # get the password + passphrase = getpass.getpass( + 'Password for %s@%s: ' % (args.username, args.provider)) + + # get the basedir + basedir = args.basedir + if basedir is None: + basedir = tempfile.mkdtemp() + logger.info('[main] using %s as base directory.' % basedir) + + stress_test(args.username, args.provider, passphrase, basedir) -- cgit v1.2.3 From c3f5e9a6b340db969844a662c27fcb2b3f7596b9 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 4 Apr 2014 11:55:35 -0300 Subject: Renew couch connection session after multipart PUT (#5448). --- scripts/profiling/sync/sync-many.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/profiling/sync/sync-many.py b/scripts/profiling/sync/sync-many.py index 3666df2c..83793b0a 100644 --- a/scripts/profiling/sync/sync-many.py +++ b/scripts/profiling/sync/sync-many.py @@ -25,7 +25,7 @@ from client_side_db import get_soledad_instance from leap.soledad.client import BootstrapSequenceError -NUMBER_OF_REPLICAS = 5 +NUMBER_OF_REPLICAS = 1 DOCUMENTS_PER_REPLICA = 10 -- cgit v1.2.3