diff options
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/README.rst | 13 | ||||
| -rwxr-xr-x | scripts/backends_cpu_usage/log_cpu_usage.py | 46 | ||||
| -rw-r--r-- | scripts/backends_cpu_usage/movingaverage.py | 209 | ||||
| -rwxr-xr-x | scripts/backends_cpu_usage/plot.py | 81 | ||||
| -rwxr-xr-x | scripts/backends_cpu_usage/test_u1db_sync.py | 113 | ||||
| -rwxr-xr-x | scripts/build_debian_package.sh | 32 | ||||
| -rw-r--r-- | scripts/client-side-db.py | 36 | ||||
| -rw-r--r-- | scripts/db_access/client_side_db.py | 154 | ||||
| -rw-r--r-- | scripts/db_access/reset_db.py | 79 | ||||
| -rw-r--r-- | scripts/db_access/server_side_db.py (renamed from scripts/server-side-db.py) | 4 | ||||
| -rwxr-xr-x | scripts/doc_put_memory_usage/find_max_upload_size.py | 169 | ||||
| -rwxr-xr-x | scripts/doc_put_memory_usage/get-mem.py | 16 | ||||
| -rwxr-xr-x | scripts/doc_put_memory_usage/plot-mem.py | 73 | ||||
| -rw-r--r-- | scripts/profiling/sync/sync-many.py | 125 | ||||
| -rw-r--r-- | scripts/update_design_docs.py | 147 | 
15 files changed, 1248 insertions, 49 deletions
| diff --git a/scripts/README.rst b/scripts/README.rst index fdd1d642..37cf2c0e 100644 --- a/scripts/README.rst +++ b/scripts/README.rst @@ -2,16 +2,3 @@ Soledad Scripts  ===============  The scripts in this directory are meant to be used for development purposes. - -Currently, the scripts are: - -  * server-side-db.py: Gives access to server-side soledad user database, -    based on the configuration in /etc/leap/soledad-server.conf. One should -    use it as: - -      python -i server-side-db.py <uuid> - -  * client-side-db.py: Gives access to client-side soledad user database, -    based on data stored in ~/.config/leap/soledad. One should use it as: - -      python -i client-side-db.py <uuid> <passphrase> diff --git a/scripts/backends_cpu_usage/log_cpu_usage.py b/scripts/backends_cpu_usage/log_cpu_usage.py new file mode 100755 index 00000000..2674e1ff --- /dev/null +++ b/scripts/backends_cpu_usage/log_cpu_usage.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + + +# Get the CPU usage and print to file. + + +import psutil +import time +import argparse +import os +import threading + + +class LogCpuUsage(threading.Thread): + +    def __init__(self, fname): +        threading.Thread.__init__(self) +        self._stopped = True +        self._fname = fname  + +    def run(self): +        self._stopped = False +        with open(self._fname, 'w') as f: +            start = time.time() +            while self._stopped is False: +                now = time.time() +                f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) +                time.sleep(0.01) + +    def stop(self): +        self._stopped = True + + +if __name__ == '__main__': +    parser = argparse.ArgumentParser() +    parser.add_argument('file', help='where to save output') +    args = parser.parse_args() + +    if os.path.isfile(args.file): +        replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) +        if replace.lower() != 'y': +            print 'Bailing out.' +            exit(1) +     +    log_cpu = LogCpuUsage(args.file) +    log_cpu.run() diff --git a/scripts/backends_cpu_usage/movingaverage.py b/scripts/backends_cpu_usage/movingaverage.py new file mode 100644 index 00000000..bac1b3e1 --- /dev/null +++ b/scripts/backends_cpu_usage/movingaverage.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +#  Sean Reifschneider, tummy.com, ltd.  <jafo@tummy.com> +#  Released into the Public Domain, 2011-02-06 + +import itertools +from itertools import islice +from collections import deque + + +######################################################### +def movingaverage(data, subset_size, data_is_list = None, +		avoid_fp_drift = True): +	'''Return the moving averages of the data, with a window size of +	`subset_size`.  `subset_size` must be an integer greater than 0 and +	less than the length of the input data, or a ValueError will be raised. + +	`data_is_list` can be used to tune the algorithm for list or iteratable +	as an input.  The default value, `None` will auto-detect this. +	The algorithm used if `data` is a list is almost twice as fast as if +	it is an iteratable. + +	`avoid_fp_drift`, if True (the default) sums every sub-set rather than +	keeping a "rolling sum" (which may be subject to floating-point drift). +	While more correct, it is also dramatically slower for subset sizes +	much larger than 20. + +	NOTE: You really should consider setting `avoid_fp_drift = False` unless +	you are dealing with very small numbers (say, far smaller than 0.00001) +	or require extreme accuracy at the cost of execution time.  For +	`subset_size` < 20, the performance difference is very small. +	''' +	if subset_size < 1: +		raise ValueError('subset_size must be 1 or larger') + +	if data_is_list is None: +		data_is_list = hasattr(data, '__getslice__') + +	divisor = float(subset_size) +	if data_is_list: +		#  This only works if we can re-access old elements, but is much faster. +		#  In other words, it can't be just an iterable, it needs to be a list. + +		if subset_size > len(data): +			raise ValueError('subset_size must be smaller than data set size') + +		if avoid_fp_drift: +			for x in range(subset_size, len(data) + 1): +				yield sum(data[x - subset_size:x]) / divisor +		else: +			cur = sum(data[0:subset_size]) +			yield cur / divisor +			for x in range(subset_size, len(data)): +				cur += data[x] - data[x - subset_size] +				yield cur / divisor +	else: +		#  Based on the recipe at: +		#     http://docs.python.org/library/collections.html#deque-recipes +		it = iter(data) +		d = deque(islice(it, subset_size)) + +		if subset_size > len(d): +			raise ValueError('subset_size must be smaller than data set size') + +		if avoid_fp_drift: +			yield sum(d) / divisor +			for elem in it: +				d.popleft() +				d.append(elem) +				yield sum(d) / divisor +		else: +			s = sum(d) +			yield s / divisor +			for elem in it: +				s += elem - d.popleft() +				d.append(elem) +				yield s / divisor + + +########################## +if __name__ == '__main__': +	import unittest + +	class TestMovingAverage(unittest.TestCase): +		#################### +		def test_List(self): +			try: +				list(movingaverage([1,2,3], 0)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage([1,2,3,4,5,6], 7)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) + +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, False)), [40.0,42.0,45.0,43.0]) +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, True)), [40.0,42.0,45.0,43.0]) + + +		###################### +		def test_XRange(self): +			try: +				list(movingaverage(xrange(1, 4), 0)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage(xrange(1, 7), 7)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 2)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), +					2)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) + + +		########################### +		def test_ListRolling(self): +			try: +				list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, +					avoid_fp_drift = False)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, +					avoid_fp_drift = False)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, +					avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, +					avoid_fp_drift = False)), [2,3,4,5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, +					avoid_fp_drift = False)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, +					avoid_fp_drift = False)), [3,4]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, +					avoid_fp_drift = False)), [3.5]) + +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + + +		############################# +		def test_XRangeRolling(self): +			try: +				list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage(xrange(1, 7), 1, +					avoid_fp_drift = False)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 2, +					avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), +					2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 3, +					avoid_fp_drift = False)), [2,3,4,5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 4, +					avoid_fp_drift = False)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 5, +					avoid_fp_drift = False)), [3,4]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 6, +					avoid_fp_drift = False)), [3.5]) + + +	###################################################################### +	suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) +	unittest.TextTestRunner(verbosity = 2).run(suite) + diff --git a/scripts/backends_cpu_usage/plot.py b/scripts/backends_cpu_usage/plot.py new file mode 100755 index 00000000..4e5083ad --- /dev/null +++ b/scripts/backends_cpu_usage/plot.py @@ -0,0 +1,81 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt +from movingaverage import movingaverage + + +def smooth(l): +    return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) + + +files = [ +    ('sqlite', 'b'), +    ('sqlcipher', 'r'), +    ('u1dblite', 'g'), +    ('u1dbcipher', 'm'), +] + + +# config the plot +plt.xlabel('time (s)') +plt.ylabel('cpu usage (%)') +plt.title('u1db backends CPU usage') + + +for fi in files: + +    backend = fi[0] +    color = fi[1] +    filename = '%s.txt' % backend  + +    x = [] +    y = [] + +    xmax = None +    xmin = None +    ymax = None +    ymin = None + +    # read data from file +    with open(filename, 'r') as f: +        line = f.readline() +        while line is not None: +            time, cpu = tuple(line.strip().split(' ')) +            cpu = float(cpu) +            x.append(float(time)) +            y.append(cpu) +            if ymax == None or cpu > ymax: +                ymax = cpu +                xmax = time +            if ymin == None or cpu < ymin: +                ymin = cpu +                xmin = time +            line = f.readline() +            if line == '': +                break + +    kwargs = { +        'linewidth': 1.0, +        'linestyle': '-', +    #    'marker': '.', +        'color': color, +    } +    plt.plot( +        [n for n in smooth(x)], +        [n for n in smooth(y)], +        label=backend, **kwargs) + +    #plt.axes().get_xaxis().set_ticks(x) +    #plt.axes().get_xaxis().set_ticklabels(x) + +    # annotate max and min values +    #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) +    #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.ylim(0, 100) +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/backends_cpu_usage/test_u1db_sync.py b/scripts/backends_cpu_usage/test_u1db_sync.py new file mode 100755 index 00000000..26ef8f9f --- /dev/null +++ b/scripts/backends_cpu_usage/test_u1db_sync.py @@ -0,0 +1,113 @@ +#!/usr/bin/python + + +import u1db +import tempfile +import logging +import shutil +import os +import argparse +import time +import binascii +import random + + +from leap.soledad.client.sqlcipher import open as sqlcipher_open +from log_cpu_usage import LogCpuUsage +from u1dblite import open as u1dblite_open +from u1dbcipher import open as u1dbcipher_open + + +DOCS_TO_SYNC = 1000 +SMALLEST_DOC_SIZE = 1 * 1024  # 1 KB +BIGGEST_DOC_SIZE = 100 * 1024  # 100 KB + + +def get_data(size): +    return binascii.hexlify(os.urandom(size/2)) + + +def run_test(testname, open_fun, tempdir, docs,  *args): +    logger.info('Starting test \"%s\".' % testname) + +    # instantiate dbs +    db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) +    db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) + +    # get sync target and synchsonizer +    target = db2.get_sync_target() +    synchronizer = u1db.sync.Synchronizer(db1, target) + + +    # generate lots of small documents +    logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) +    for content in docs: +        db1.create_doc(content) +    logger.info('%d documents created in source db.' % DOCS_TO_SYNC) + +    # run the test +    filename = testname + '.txt' +    logger.info('Logging CPU usage to %s.' % filename) +    log_cpu = LogCpuUsage(filename) +    tstart = time.time() + +    # start logging cpu +    log_cpu.start() +    logger.info('Sleeping for 5 seconds...') +    time.sleep(5) + +    # sync +    logger.info('Starting sync...') +    sstart = time.time() +    synchronizer.sync() +    send = time.time() +    logger.info('Sync finished.') + +    # stop logging cpu +    logger.info('Sleeping for 5 seconds...') +    time.sleep(5) +    tend = time.time() +    log_cpu.stop() + +    # report +    logger.info('Total sync time: %f seconds' % (send - sstart)) +    logger.info('Total test time: %f seconds' % (tend - tstart)) +    logger.info('Finished test \"%s\".' % testname) + +    # close dbs +    db1.close() +    db2.close() + + +if __name__ == '__main__': +     +    # configure logger +    logger = logging.getLogger(__name__) +    LOG_FORMAT = '%(asctime)s %(message)s' +    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +    # get a temporary dir +    tempdir = tempfile.mkdtemp() +    logger.info('Using temporary directory %s' % tempdir) + + +    # create a lot of documents with random sizes +    docs = [] +    for i in xrange(DOCS_TO_SYNC): +        docs.append({ +            'index': i, +            #'data': get_data( +            #    random.randrange( +            #        SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) +        }) + +    # run tests +    run_test('sqlite', u1db.open, tempdir, docs, True) +    run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) +    run_test('u1dblite', u1dblite_open, tempdir, docs) +    run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) + +    # remove temporary dir +    logger.info('Removing temporary directory %s' % tempdir) +    shutil.rmtree(tempdir) diff --git a/scripts/build_debian_package.sh b/scripts/build_debian_package.sh new file mode 100755 index 00000000..cc62c3ac --- /dev/null +++ b/scripts/build_debian_package.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +# This script generates Soledad Debian packages. +# +# When invoking this script, you should pass a git repository URL and the name +# of the branch that contains the code you wish to build the packages from. +# +# The script will clone the given branch from the given repo, as well as the +# main Soledad repo in github which contains the most up-to-date debian +# branch. It will then merge the desired branch into the debian branch and +# build the packages. + +if [ $# -ne 2 ]; then +  echo "Usage: ${0} <url> <branch>" +  exit 1 +fi + +SOLEDAD_MAIN_REPO=git://github.com/leapcode/soledad.git + +url=$1 +branch=$2 +workdir=`mktemp -d` + +git clone -b ${branch} ${url} ${workdir}/soledad +export GIT_DIR=${workdir}/soledad/.git +export GIT_WORK_TREE=${workdir}/soledad +git remote add leapcode ${SOLEDAD_MAIN_REPO} +git fetch leapcode +git checkout debian +git merge --no-edit ${branch} +(cd ${workdir}/soledad && debuild -uc -us) +echo "Packages generated in ${workdir}" diff --git a/scripts/client-side-db.py b/scripts/client-side-db.py deleted file mode 100644 index 0c3df7a4..00000000 --- a/scripts/client-side-db.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/python - -# This script gives client-side access to one Soledad user database by using -# the data stored in ~/.config/leap/soledad/ - -import sys -import os - -from leap.common.config import get_path_prefix -from leap.soledad.client import Soledad - -if len(sys.argv) != 3: -    print 'Usage: %s <uuid> <passphrase>' % sys.argv[0] -    exit(1) - -uuid = sys.argv[1] -passphrase = unicode(sys.argv[2]) - -secrets_path = os.path.join(get_path_prefix(), 'leap', 'soledad', -                            '%s.secret' % uuid) -local_db_path = os.path.join(get_path_prefix(), 'leap', 'soledad', -                             '%s.db' % uuid) -server_url = 'http://dummy-url' -cert_file = 'cert' - -sol = Soledad(uuid, passphrase, secrets_path, local_db_path, server_url, -             cert_file) -db = sol._db - -# get replica info -replica_uid = db._replica_uid -gen, docs = db.get_all_docs() -print "replica_uid: %s" % replica_uid -print "generation:  %d" % gen -gen, trans_id = db._get_generation_info() -print "transaction_id: %s" % trans_id diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py new file mode 100644 index 00000000..2bf4ab5e --- /dev/null +++ b/scripts/db_access/client_side_db.py @@ -0,0 +1,154 @@ +#!/usr/bin/python + +# This script gives client-side access to one Soledad user database. + + +import sys +import os +import argparse +import re +import tempfile +import getpass +import requests +import json +import srp._pysrp as srp +import binascii +import logging + +from leap.common.config import get_path_prefix +from leap.soledad.client import Soledad + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +safe_unhexlify = lambda x: binascii.unhexlify(x) if ( +    len(x) % 2 == 0) else binascii.unhexlify('0' + x) + + +def fail(reason): +    logger.error('Fail: ' + reason) +    exit(2) + + +def get_api_info(provider): +    info = requests.get( +        'https://'+provider+'/provider.json', verify=False).json() +    return info['api_uri'], info['api_version'] + + +def login(username, passphrase, provider, api_uri, api_version): +    usr = srp.User(username, passphrase, srp.SHA256, srp.NG_1024) +    auth = None +    try: +        auth = authenticate(api_uri, api_version, usr).json() +    except requests.exceptions.ConnectionError: +        fail('Could not connect to server.') +    if 'errors' in auth: +        fail(str(auth['errors'])) +    return api_uri, api_version, auth + + +def authenticate(api_uri, api_version, usr): +    api_url = "%s/%s" % (api_uri, api_version) +    session = requests.session() +    uname, A = usr.start_authentication() +    params = {'login': uname, 'A': binascii.hexlify(A)} +    init = session.post( +        api_url + '/sessions', data=params, verify=False).json() +    if 'errors' in init: +        fail('test user not found') +    M = usr.process_challenge( +        safe_unhexlify(init['salt']), safe_unhexlify(init['B'])) +    return session.put(api_url + '/sessions/' + uname, verify=False, +                       data={'client_auth': binascii.hexlify(M)}) + + +def get_soledad_info(username, provider, passphrase, basedir): +    api_uri, api_version = get_api_info(provider) +    auth = login(username, passphrase, provider, api_uri, api_version) +    # get soledad server url +    service_url = '%s/%s/config/soledad-service.json' % \ +                  (api_uri, api_version) +    soledad_hosts = requests.get(service_url, verify=False).json()['hosts'] +    hostnames = soledad_hosts.keys() +    # allow for choosing the host +    host = hostnames[0] +    if len(hostnames) > 1: +        i = 1 +        print "There are many available hosts:" +        for h in hostnames: +            print "  (%d) %s.%s" % (i, h, provider) +            i += 1 +        choice = raw_input("Choose a host to use (default: 1): ") +        if choice != '': +            host = hostnames[int(choice) - 1] +    server_url = 'https://%s:%d/user-%s' % \ +              (soledad_hosts[host]['hostname'], soledad_hosts[host]['port'], +               auth[2]['id']) +    # get provider ca certificate +    ca_cert = requests.get('https://%s/ca.crt' % provider, verify=False).text +    cert_file = os.path.join(basedir, 'ca.crt') +    with open(cert_file, 'w') as f: +      f.write(ca_cert) +    return auth[2]['id'], server_url, cert_file, auth[2]['token'] + + +def get_soledad_instance(username, provider, passphrase, basedir): +    # setup soledad info +    uuid, server_url, cert_file, token = \ +        get_soledad_info(username, provider, passphrase, basedir) +    logger.info('UUID is %s' % uuid) +    logger.info('Server URL is %s' % server_url) +    secrets_path = os.path.join( +        basedir, '%s.secret' % uuid) +    local_db_path = os.path.join( +        basedir, '%s.db' % uuid) +    # instantiate soledad +    return Soledad( +        uuid, +        unicode(passphrase), +        secrets_path=secrets_path, +        local_db_path=local_db_path, +        server_url=server_url, +        cert_file=cert_file, +        auth_token=token) + + +# main program + +if __name__ == '__main__': + +    class ValidateUserHandle(argparse.Action): +        def __call__(self, parser, namespace, values, option_string=None): +            m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') +            res = m.match(values) +            if res == None: +                parser.error('User handle should have the form user@provider.') +            setattr(namespace, 'username', res.groups()[0]) +            setattr(namespace, 'provider', res.groups()[1]) + +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        'user@provider', action=ValidateUserHandle, help='the user handle') +    parser.add_argument( +        '-b', dest='basedir', required=False, default=None, help='the user handle') +    args = parser.parse_args() + +    # get the password +    passphrase = getpass.getpass( +        'Password for %s@%s: ' % (args.username, args.provider)) + +    # get the basedir +    basedir = args.basedir +    if basedir is None: +        basedir = tempfile.mkdtemp() +    logger.info('Using %s as base directory.' % basedir) + +    # get the soledad instance +    s = get_soledad_instance( +        args.username, args.provider, passphrase, basedir) diff --git a/scripts/db_access/reset_db.py b/scripts/db_access/reset_db.py new file mode 100644 index 00000000..80871856 --- /dev/null +++ b/scripts/db_access/reset_db.py @@ -0,0 +1,79 @@ +#!/usr/bin/python + +# This script can be run on server side to completelly reset a user database. +# +# WARNING: running this script over a database will delete all documents but +# the one with id u1db_config (which contains db metadata) and design docs +# needed for couch backend. + + +import sys +from ConfigParser import ConfigParser +import threading +import logging +from couchdb import Database as CouchDatabase + + +if len(sys.argv) != 2: +    print 'Usage: %s <uuid>' % sys.argv[0] +    exit(1) + +uuid = sys.argv[1] + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = cp.get('soledad-server', 'couch_url') + + +# confirm +yes = raw_input("Are you sure you want to reset the database for user %s " +                "(type YES)? " % uuid) +if yes != 'YES': +    print 'Bailing out...' +    exit(2) + + +db = CouchDatabase('%s/user-%s' % (url, uuid)) + + +class _DeleterThread(threading.Thread): + +    def __init__(self, db, doc_id, release_fun): +        threading.Thread.__init__(self) +        self._db = db +        self._doc_id = doc_id +        self._release_fun = release_fun + +    def run(self): +        logger.info('[%s] deleting doc...' % self._doc_id) +        del self._db[self._doc_id] +        logger.info('[%s] done.' % self._doc_id) +        self._release_fun() + + +semaphore_pool = threading.BoundedSemaphore(value=20) + + +threads = [] +for doc_id in db: +    if doc_id != 'u1db_config' and not doc_id.startswith('_design'): +        semaphore_pool.acquire() +        logger.info('[main] launching thread for doc: %s' % doc_id) +        t = _DeleterThread(db, doc_id, semaphore_pool.release) +        t.start() +        threads.append(t) + + +logger.info('[main] waiting for threads.') +map(lambda thread: thread.join(), threads) + + +logger.info('[main] done.') diff --git a/scripts/server-side-db.py b/scripts/db_access/server_side_db.py index 01a9aaac..18641a0f 100644 --- a/scripts/server-side-db.py +++ b/scripts/db_access/server_side_db.py @@ -2,6 +2,10 @@  # This script gives server-side access to one Soledad user database by using  # the configuration stored in /etc/leap/soledad-server.conf. +# +# Use it like this: +#  +#     python -i server-side-db.py <uuid>  import sys  from ConfigParser import ConfigParser diff --git a/scripts/doc_put_memory_usage/find_max_upload_size.py b/scripts/doc_put_memory_usage/find_max_upload_size.py new file mode 100755 index 00000000..02c68015 --- /dev/null +++ b/scripts/doc_put_memory_usage/find_max_upload_size.py @@ -0,0 +1,169 @@ +#!/usr/bin/python + +# This script finds the maximum upload size for a document in the current +# server. It pulls couch URL from Soledad config file and attempts multiple +# PUTs until it finds the maximum size supported by the server. +# +# As the Soledad couch user is not an admin, you have to pass a database into +# which the test will be run. The database should already exist and be +# initialized with soledad design documents. +# +# Use it like this: +# +#     ./find_max_upload_size.py <dbname> +#     ./find_max_upload_size.py -h + +import os +import configparser +import logging +import argparse +import random +import string +import binascii +import json +import time +import uuid + + +from couchdb.client import Database +from socket import error as socket_error +from leap.soledad.common.couch import CouchDatabase + + +SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' +PREFIX = '/tmp/soledad_test' +LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' +RETRIES = 3  # number of times to retry uploading a document of a certain +             # size after a failure + + +# configure logger +logger = logging.getLogger(__name__) + + +def config_log(level): +    logging.basicConfig(format=LOG_FORMAT, level=level) + + +def log_to_file(filename): +    handler = logging.FileHandler(filename, mode='a') +    handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) +    logger.addHandler(handler) + + +# create test dir +if not os.path.exists(PREFIX): +    os.mkdir(PREFIX) + + +def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): +    config = configparser.ConfigParser() +    config.read(config_file) +    return config['soledad-server']['couch_url'] + + +# generate or load an uploadable doc with the given size in mb +def get_content(size): +    fname = os.path.join(PREFIX, 'content-%d.json' % size) +    if os.path.exists(fname): +        logger.debug('Loading content with %d MB...' % size) +        with open(fname, 'r') as f: +            return f.read() +    else: +        length = int(size * 1024 ** 2) +        logger.debug('Generating body with %d MB...' % size) +        content = binascii.hexlify(os.urandom(length))[:length] +        with open(fname, 'w') as f: +            f.write(content) +        return content + + +def delete_doc(db): +    doc = db.get('largedoc') +    db.delete(doc) + + +def upload(db, size, couch_db): +    # try many times to be sure that size is infeasible +    for i in range(RETRIES): +        # wait until server is up to upload +        while True: +            try: +                'largedoc' in couch_db +                break +            except socket_error: +                logger.debug('Waiting for server to come up...') +                time.sleep(1) +        # attempt to upload +        try: +            logger.debug( +                'Trying to upload %d MB document (attempt %d/%d)...' % +                (size, (i+1), RETRIES)) +            content = get_content(size) +            logger.debug('Starting upload of %d bytes.' % len(content)) +            doc = db.create_doc({'data': content}, doc_id='largedoc') +            delete_doc(couch_db) +            logger.debug('Success uploading %d MB doc.' % size) +            return True +        except Exception as e: +            logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) +    return False + + +def find_max_upload_size(db_uri): +    db = CouchDatabase.open_database(db_uri, False) +    couch_db = Database(db_uri) +    logger.debug('Database URI: %s' % db_uri) +    # delete eventual leftover from last run +    if 'largedoc' in couch_db: +        delete_doc(couch_db) +    # phase 1: increase upload size exponentially +    logger.info('Starting phase 1: increasing size exponentially.') +    size = 1 +    #import ipdb; ipdb.set_trace() +    while True: +        if upload(db, size, couch_db): +            size *= 2 +        else: +            break + +    # phase 2: binary search for maximum value +    unable = size +    able = size / 2 +    logger.info('Starting phase 2: binary search for maximum value.') +    while unable - able > 1: +        size = able + ((unable - able) / 2) +        if upload(db, size, couch_db): +            able = size +        else: +            unable = size +    return able + + +if __name__ == '__main__': +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        '-d', action='store_true', dest='debug', +        help='print debugging information') +    parser.add_argument( +        '-l', dest='logfile', +        help='log output to file') +    parser.add_argument( +        'db_uri', help='the couch database URI to test') +    args = parser.parse_args() + +    # log to file +    if args.logfile is not None: +        log_to_file(args.logfile) + +    # set loglevel +    if args.debug is True: +        config_log(logging.DEBUG) +    else: +        config_log(logging.INFO) + +    # run test and report +    logger.info('Will test using db at %s.' % args.db_uri) +    maxsize = find_max_upload_size(args.db_uri) +    logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/doc_put_memory_usage/get-mem.py b/scripts/doc_put_memory_usage/get-mem.py new file mode 100755 index 00000000..d64875fc --- /dev/null +++ b/scripts/doc_put_memory_usage/get-mem.py @@ -0,0 +1,16 @@ +#!/usr/bin/python + + +import psutil +import time + + +delta = 50 * 60 +start = time.time() + +while True: +    now = time.time() +    print "%s %s" % (now - start, psutil.phymem_usage().used) +    time.sleep(0.1) +    if now > start + delta: +        break diff --git a/scripts/doc_put_memory_usage/plot-mem.py b/scripts/doc_put_memory_usage/plot-mem.py new file mode 100755 index 00000000..e24679a2 --- /dev/null +++ b/scripts/doc_put_memory_usage/plot-mem.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt + + +files = [ +    ('local', 'couchdb-json', 'b'), +    ('local', 'bigcouch-json', 'r'), +    ('local', 'couchdb-multipart', 'g'), +    ('local', 'bigcouch-multipart', 'm'), +] + + +# config the plot +plt.xlabel('time') +plt.ylabel('memory usage') +plt.title('bigcouch versus couch memory usage') + + +for fi in files: + +    machine = fi[0] +    database = fi[1] +    color = fi[2] +    filename = '%s-%s.txt' % (machine, database) + +    x = [] +    y = [] + +    xmax = None +    xmin = None +    ymax = None +    ymin = None + +    # read data from file +    with open(filename, 'r') as f: +        line = f.readline() +        while line is not None: +            time, mem = tuple(line.strip().split(' ')) +            mem = float(mem) / (10**9) +            x.append(float(time)) +            y.append(mem) +            if ymax == None or mem > ymax: +                ymax = mem +                xmax = time +            if ymin == None or mem < ymin: +                ymin = mem +                xmin = time +            line = f.readline() +            if line == '': +                break + +    kwargs = { +        'linewidth': 1.0, +        'linestyle': '-', +    #    'marker': '.', +        'color': color, +    } +    plt.plot(x, y, label=database, **kwargs) + +    #plt.axes().get_xaxis().set_ticks(x) +    #plt.axes().get_xaxis().set_ticklabels(x) + +    # annotate max and min values +    #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) +    #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/profiling/sync/sync-many.py b/scripts/profiling/sync/sync-many.py new file mode 100644 index 00000000..83793b0a --- /dev/null +++ b/scripts/profiling/sync/sync-many.py @@ -0,0 +1,125 @@ +#!/usr/bin/python + +# The purpose of this script is to stress a soledad server by: +# +#   - Instantiating multiple clients. +#   - Creating many documents in each client. +#   - Syncing all at the same time with th server multiple times, until +#     they've all reached an agreement on the state of the databases and +#     there's nothing else to be synced. + + +import threading +import tempfile +import argparse +import logging +import re +import getpass +import time +import shutil + + +from client_side_db import get_soledad_instance + + +from leap.soledad.client import BootstrapSequenceError + + +NUMBER_OF_REPLICAS = 1 +DOCUMENTS_PER_REPLICA = 10 + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +class WorkerThread(threading.Thread): + +    def __init__(self, thread_id, soledad, all_set): +        threading.Thread.__init__(self) +        self._id = thread_id +        self._soledad = soledad +        self._all_set = all_set +        self._done_creating = threading.Event() + +    def run(self): +        # create many documents +        logger.info('[replica %d] creating documents...' % self._id) +        for i in xrange(DOCUMENTS_PER_REPLICA): +            self._soledad.create_doc({'a_doc': i}) +        # wait for others +        self._done_creating.set() +        logger.info('[replica %d] done creating documents.' % self._id) +        self._all_set.wait() +        # sync +        successes = 0 +        while True: +            logger.info('[replica %d] syncing.' % self._id) +            if self._id == 1: +                time.sleep(5) +            old_gen = self._soledad.sync() +            logger.info('[replica %d] synced.' % self._id) +            new_gen = self._soledad._db._get_generation() +            logger.info('[replica %d] old gen %d - new gen %d.' % +                (self._id, old_gen, new_gen)) +            if old_gen == new_gen: +                successes += 1 +                logger.info('[replica %d] sync not needed.' % self._id) +                if successes == 3: +                    break + + +def stress_test(username, provider, passphrase, basedir): +    threads = [] +    all_set = threading.Event() +    for i in xrange(NUMBER_OF_REPLICAS): +        logging.info('[main] starting thread %d.' % i) +        s = get_soledad_instance( +            username, +            provider, +            passphrase, +            tempfile.mkdtemp(dir=basedir)) +        t = WorkerThread(i, s, all_set) +        t.start() +        threads.append(t) +    map(lambda t: t._done_creating.wait(), threads) +    all_set.set() +    map(lambda t: t.join(), threads) +    logger.info('Removing dir %s' % basedir) +    shutil.rmtree(basedir) + + +# main program + +if __name__ == '__main__': + +    class ValidateUserHandle(argparse.Action): +        def __call__(self, parser, namespace, values, option_string=None): +            m = re.compile('^([^@]+)@([^@]+\.[^@]+)$') +            res = m.match(values) +            if res == None: +                parser.error('User handle should have the form user@provider.') +            setattr(namespace, 'username', res.groups()[0]) +            setattr(namespace, 'provider', res.groups()[1]) + +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        'user@provider', action=ValidateUserHandle, help='the user handle') +    parser.add_argument( +        '-b', dest='basedir', required=False, default=None, help='the user handle') +    args = parser.parse_args() + +    # get the password +    passphrase = getpass.getpass( +        'Password for %s@%s: ' % (args.username, args.provider)) + +    # get the basedir +    basedir = args.basedir +    if basedir is None: +        basedir = tempfile.mkdtemp() +    logger.info('[main] using %s as base directory.' % basedir) + +    stress_test(args.username, args.provider, passphrase, basedir) diff --git a/scripts/update_design_docs.py b/scripts/update_design_docs.py new file mode 100644 index 00000000..e7b5a29c --- /dev/null +++ b/scripts/update_design_docs.py @@ -0,0 +1,147 @@ +#!/usr/bin/python + +# This script updates Soledad's design documents in the session database and +# all user databases with contents from the installed leap.soledad.common +# package. + +import json +import logging +import argparse +import re +import threading +import binascii + + +from getpass import getpass +from ConfigParser import ConfigParser +from couchdb.client import Server +from couchdb.http import Resource, Session +from datetime import datetime +from urlparse import urlparse + + +from leap.soledad.common import ddocs + + +# parse command line for the log file name +logger_fname = "/tmp/update-design-docs_%s.log" % \ +               str(datetime.now()).replace(' ', '_') +parser = argparse.ArgumentParser() +parser.add_argument('--log', action='store', default=logger_fname, type=str, +                    required=False, help='the name of the log file', nargs=1) +args = parser.parse_args() + + +# configure the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +print "Logging to %s." % args.log +logging.basicConfig( +    filename=args.log, +    format="%(asctime)-15s %(message)s") + + +# configure threads +max_threads = 20 +semaphore_pool = threading.BoundedSemaphore(value=max_threads) +threads = [] + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = urlparse(cp.get('soledad-server', 'couch_url')) + +# get admin password +netloc = re.sub('^.*@', '', url.netloc) +url = url._replace(netloc=netloc) +password = getpass("Admin password for %s: " % url.geturl()) +url = url._replace(netloc='admin:%s@%s' % (password, netloc)) + +resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10)) +server = Server(url=resource) + +hidden_url = re.sub( +    'http://(.*):.*@', +    'http://\\1:xxxxx@', +    url.geturl()) + +print """ +========== +ATTENTION! +========== + +This script will modify Soledad's shared and user databases in: + +  %s + +This script does not make a backup of the couch db data, so make sure you +have a copy or you may loose data. +""" % hidden_url +confirm = raw_input("Proceed (type uppercase YES)? ") + +if confirm != "YES": +    exit(1) + +# convert design doc content + +design_docs = { +    '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)), +    '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)), +    '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)), +} + +# +# Thread +# + +class DBWorkerThread(threading.Thread): + +    def __init__(self, server, dbname, db_idx, db_len, release_fun): +        threading.Thread.__init__(self) +        self._dbname = dbname +        self._cdb = server[self._dbname] +        self._db_idx = db_idx +        self._db_len = db_len +        self._release_fun = release_fun + +    def run(self): + +        logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len, +                    self._dbname)) + +        for doc_id in design_docs: +            doc = self._cdb[doc_id] +            for key in ['lists', 'views', 'updates']: +                if key in design_docs[doc_id]: +                    doc[key] = design_docs[doc_id][key] +            self._cdb.save(doc) + +        # release the semaphore +        self._release_fun() + + +db_idx = 0 +db_len = len(server) +for dbname in server: + +    db_idx += 1 + +    if not (dbname.startswith('user-') or dbname == 'shared') \ +            or dbname == 'user-test-db': +        logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) +        continue + + +    # get access to couch db +    cdb = Server(url.geturl())[dbname] + +    #--------------------------------------------------------------------- +    # Start DB worker thread +    #--------------------------------------------------------------------- +    semaphore_pool.acquire() +    thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release) +    thread.daemon = True +    thread.start() +    threads.append(thread) + +map(lambda thread: thread.join(), threads) | 
