summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorTomás Touceda <chiiph@leap.se>2014-04-04 16:34:57 -0300
committerTomás Touceda <chiiph@leap.se>2014-04-04 16:34:57 -0300
commitce22976cc0e203e53799e771aa5e3717d498cc5c (patch)
tree8c69733f1f8a83ac83e40bf7e522a5fe8eae9b50 /scripts
parentdeb78a5f3502ece98ec3e0b70f93025c4a1b3da5 (diff)
parenta3fed4d42ab4a7be7bc7ebe86b35805ac73d62de (diff)
Merge branch 'release-0.4.5'0.5.00.4.5
Diffstat (limited to 'scripts')
-rw-r--r--scripts/README.rst13
-rwxr-xr-xscripts/backends_cpu_usage/log_cpu_usage.py46
-rw-r--r--scripts/backends_cpu_usage/movingaverage.py209
-rwxr-xr-xscripts/backends_cpu_usage/plot.py81
-rwxr-xr-xscripts/backends_cpu_usage/test_u1db_sync.py113
-rwxr-xr-xscripts/build_debian_package.sh32
-rw-r--r--scripts/client-side-db.py36
-rw-r--r--scripts/db_access/client_side_db.py154
-rw-r--r--scripts/db_access/reset_db.py79
-rw-r--r--scripts/db_access/server_side_db.py (renamed from scripts/server-side-db.py)4
-rwxr-xr-xscripts/doc_put_memory_usage/find_max_upload_size.py169
-rwxr-xr-xscripts/doc_put_memory_usage/get-mem.py16
-rwxr-xr-xscripts/doc_put_memory_usage/plot-mem.py73
-rw-r--r--scripts/profiling/sync/sync-many.py125
-rw-r--r--scripts/update_design_docs.py147
15 files changed, 1248 insertions, 49 deletions
diff --git a/scripts/README.rst b/scripts/README.rst
index fdd1d642..37cf2c0e 100644
--- a/scripts/README.rst
+++ b/scripts/README.rst
@@ -2,16 +2,3 @@ Soledad Scripts
===============
The scripts in this directory are meant to be used for development purposes.
-
-Currently, the scripts are:
-
- * server-side-db.py: Gives access to server-side soledad user database,
- based on the configuration in /etc/leap/soledad-server.conf. One should
- use it as:
-
- python -i server-side-db.py <uuid>
-
- * client-side-db.py: Gives access to client-side soledad user database,
- based on data stored in ~/.config/leap/soledad. One should use it as:
-
- python -i client-side-db.py <uuid> <passphrase>
diff --git a/scripts/backends_cpu_usage/log_cpu_usage.py b/scripts/backends_cpu_usage/log_cpu_usage.py
new file mode 100755
index 00000000..2674e1ff
--- /dev/null
+++ b/scripts/backends_cpu_usage/log_cpu_usage.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+
+
+# Get the CPU usage and print to file.
+
+
+import psutil
+import time
+import argparse
+import os
+import threading
+
+
+class LogCpuUsage(threading.Thread):
+
+ def __init__(self, fname):
+ threading.Thread.__init__(self)
+ self._stopped = True
+ self._fname = fname
+
+ def run(self):
+ self._stopped = False
+ with open(self._fname, 'w') as f:
+ start = time.time()
+ while self._stopped is False:
+ now = time.time()
+ f.write("%f %f\n" % ((now - start), psutil.cpu_percent()))
+ time.sleep(0.01)
+
+ def stop(self):
+ self._stopped = True
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('file', help='where to save output')
+ args = parser.parse_args()
+
+ if os.path.isfile(args.file):
+ replace = raw_input('File %s exists, replace it (y/N)? ' % args.file)
+ if replace.lower() != 'y':
+ print 'Bailing out.'
+ exit(1)
+
+ log_cpu = LogCpuUsage(args.file)
+ log_cpu.run()
diff --git a/scripts/backends_cpu_usage/movingaverage.py b/scripts/backends_cpu_usage/movingaverage.py
new file mode 100644
index 00000000..bac1b3e1
--- /dev/null
+++ b/scripts/backends_cpu_usage/movingaverage.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+#
+# Sean Reifschneider, tummy.com, ltd. <jafo@tummy.com>
+# Released into the Public Domain, 2011-02-06
+
+import itertools
+from itertools import islice
+from collections import deque
+
+
+#########################################################
+def movingaverage(data, subset_size, data_is_list = None,
+ avoid_fp_drift = True):
+ '''Return the moving averages of the data, with a window size of
+ `subset_size`. `subset_size` must be an integer greater than 0 and
+ less than the length of the input data, or a ValueError will be raised.
+
+ `data_is_list` can be used to tune the algorithm for list or iteratable
+ as an input. The default value, `None` will auto-detect this.
+ The algorithm used if `data` is a list is almost twice as fast as if
+ it is an iteratable.
+
+ `avoid_fp_drift`, if True (the default) sums every sub-set rather than
+ keeping a "rolling sum" (which may be subject to floating-point drift).
+ While more correct, it is also dramatically slower for subset sizes
+ much larger than 20.
+
+ NOTE: You really should consider setting `avoid_fp_drift = False` unless
+ you are dealing with very small numbers (say, far smaller than 0.00001)
+ or require extreme accuracy at the cost of execution time. For
+ `subset_size` < 20, the performance difference is very small.
+ '''
+ if subset_size < 1:
+ raise ValueError('subset_size must be 1 or larger')
+
+ if data_is_list is None:
+ data_is_list = hasattr(data, '__getslice__')
+
+ divisor = float(subset_size)
+ if data_is_list:
+ # This only works if we can re-access old elements, but is much faster.
+ # In other words, it can't be just an iterable, it needs to be a list.
+
+ if subset_size > len(data):
+ raise ValueError('subset_size must be smaller than data set size')
+
+ if avoid_fp_drift:
+ for x in range(subset_size, len(data) + 1):
+ yield sum(data[x - subset_size:x]) / divisor
+ else:
+ cur = sum(data[0:subset_size])
+ yield cur / divisor
+ for x in range(subset_size, len(data)):
+ cur += data[x] - data[x - subset_size]
+ yield cur / divisor
+ else:
+ # Based on the recipe at:
+ # http://docs.python.org/library/collections.html#deque-recipes
+ it = iter(data)
+ d = deque(islice(it, subset_size))
+
+ if subset_size > len(d):
+ raise ValueError('subset_size must be smaller than data set size')
+
+ if avoid_fp_drift:
+ yield sum(d) / divisor
+ for elem in it:
+ d.popleft()
+ d.append(elem)
+ yield sum(d) / divisor
+ else:
+ s = sum(d)
+ yield s / divisor
+ for elem in it:
+ s += elem - d.popleft()
+ d.append(elem)
+ yield s / divisor
+
+
+##########################
+if __name__ == '__main__':
+ import unittest
+
+ class TestMovingAverage(unittest.TestCase):
+ ####################
+ def test_List(self):
+ try:
+ list(movingaverage([1,2,3], 0))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage([1,2,3,4,5,6], 7))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5])
+
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, False)), [40.0,42.0,45.0,43.0])
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, True)), [40.0,42.0,45.0,43.0])
+
+
+ ######################
+ def test_XRange(self):
+ try:
+ list(movingaverage(xrange(1, 4), 0))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage(xrange(1, 7), 7))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 2)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))),
+ 2)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5])
+
+
+ ###########################
+ def test_ListRolling(self):
+ try:
+ list(movingaverage([1,2,3], 0, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1,
+ avoid_fp_drift = False)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2,
+ avoid_fp_drift = False)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2,
+ avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3,
+ avoid_fp_drift = False)), [2,3,4,5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4,
+ avoid_fp_drift = False)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5,
+ avoid_fp_drift = False)), [3,4])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6,
+ avoid_fp_drift = False)), [3.5])
+
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0])
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0])
+
+
+ #############################
+ def test_XRangeRolling(self):
+ try:
+ list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage(xrange(1, 7), 1,
+ avoid_fp_drift = False)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 2,
+ avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))),
+ 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 3,
+ avoid_fp_drift = False)), [2,3,4,5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 4,
+ avoid_fp_drift = False)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 5,
+ avoid_fp_drift = False)), [3,4])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 6,
+ avoid_fp_drift = False)), [3.5])
+
+
+ ######################################################################
+ suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage)
+ unittest.TextTestRunner(verbosity = 2).run(suite)
+
diff --git a/scripts/backends_cpu_usage/plot.py b/scripts/backends_cpu_usage/plot.py
new file mode 100755
index 00000000..4e5083ad
--- /dev/null
+++ b/scripts/backends_cpu_usage/plot.py
@@ -0,0 +1,81 @@
+#!/usr/bin/python
+
+
+from matplotlib import pyplot as plt
+from movingaverage import movingaverage
+
+
+def smooth(l):
+ return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False)
+
+
+files = [
+ ('sqlite', 'b'),
+ ('sqlcipher', 'r'),
+ ('u1dblite', 'g'),
+ ('u1dbcipher', 'm'),
+]
+
+
+# config the plot
+plt.xlabel('time (s)')
+plt.ylabel('cpu usage (%)')
+plt.title('u1db backends CPU usage')
+
+
+for fi in files:
+
+ backend = fi[0]
+ color = fi[1]
+ filename = '%s.txt' % backend
+
+ x = []
+ y = []
+
+ xmax = None
+ xmin = None
+ ymax = None
+ ymin = None
+
+ # read data from file
+ with open(filename, 'r') as f:
+ line = f.readline()
+ while line is not None:
+ time, cpu = tuple(line.strip().split(' '))
+ cpu = float(cpu)
+ x.append(float(time))
+ y.append(cpu)
+ if ymax == None or cpu > ymax:
+ ymax = cpu
+ xmax = time
+ if ymin == None or cpu < ymin:
+ ymin = cpu
+ xmin = time
+ line = f.readline()
+ if line == '':
+ break
+
+ kwargs = {
+ 'linewidth': 1.0,
+ 'linestyle': '-',
+ # 'marker': '.',
+ 'color': color,
+ }
+ plt.plot(
+ [n for n in smooth(x)],
+ [n for n in smooth(y)],
+ label=backend, **kwargs)
+
+ #plt.axes().get_xaxis().set_ticks(x)
+ #plt.axes().get_xaxis().set_ticklabels(x)
+
+ # annotate max and min values
+ #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax))
+ #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin))
+
+
+plt.ylim(0, 100)
+plt.grid()
+plt.legend()
+plt.show()
+
diff --git a/scripts/backends_cpu_usage/test_u1db_sync.py b/scripts/backends_cpu_usage/test_u1db_sync.py
new file mode 100755
index 00000000..26ef8f9f
--- /dev/null
+++ b/scripts/backends_cpu_usage/test_u1db_sync.py
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+
+
+import u1db
+import tempfile
+import logging
+import shutil
+import os
+import argparse
+import time
+import binascii
+import random
+
+
+from leap.soledad.client.sqlcipher import open as sqlcipher_open
+from log_cpu_usage import LogCpuUsage
+from u1dblite import open as u1dblite_open
+from u1dbcipher import open as u1dbcipher_open
+
+
+DOCS_TO_SYNC = 1000
+SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB
+BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB
+
+
+def get_data(size):
+ return binascii.hexlify(os.urandom(size/2))
+
+
+def run_test(testname, open_fun, tempdir, docs, *args):
+ logger.info('Starting test \"%s\".' % testname)
+
+ # instantiate dbs
+ db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args)
+ db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args)
+
+ # get sync target and synchsonizer
+ target = db2.get_sync_target()
+ synchronizer = u1db.sync.Synchronizer(db1, target)
+
+
+ # generate lots of small documents
+ logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC)
+ for content in docs:
+ db1.create_doc(content)
+ logger.info('%d documents created in source db.' % DOCS_TO_SYNC)
+
+ # run the test
+ filename = testname + '.txt'
+ logger.info('Logging CPU usage to %s.' % filename)
+ log_cpu = LogCpuUsage(filename)
+ tstart = time.time()
+
+ # start logging cpu
+ log_cpu.start()
+ logger.info('Sleeping for 5 seconds...')
+ time.sleep(5)
+
+ # sync
+ logger.info('Starting sync...')
+ sstart = time.time()
+ synchronizer.sync()
+ send = time.time()
+ logger.info('Sync finished.')
+
+ # stop logging cpu
+ logger.info('Sleeping for 5 seconds...')
+ time.sleep(5)
+ tend = time.time()
+ log_cpu.stop()
+
+ # report
+ logger.info('Total sync time: %f seconds' % (send - sstart))
+ logger.info('Total test time: %f seconds' % (tend - tstart))
+ logger.info('Finished test \"%s\".' % testname)
+
+ # close dbs
+ db1.close()
+ db2.close()
+
+
+if __name__ == '__main__':
+
+ # configure logger
+ logger = logging.getLogger(__name__)
+ LOG_FORMAT = '%(asctime)s %(message)s'
+ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+ # get a temporary dir
+ tempdir = tempfile.mkdtemp()
+ logger.info('Using temporary directory %s' % tempdir)
+
+
+ # create a lot of documents with random sizes
+ docs = []
+ for i in xrange(DOCS_TO_SYNC):
+ docs.append({
+ 'index': i,
+ #'data': get_data(
+ # random.randrange(
+ # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE))
+ })
+
+ # run tests
+ run_test('sqlite', u1db.open, tempdir, docs, True)
+ run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True)
+ run_test('u1dblite', u1dblite_open, tempdir, docs)
+ run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True)
+
+ # remove temporary dir
+ logger.info('Removing temporary directory %s' % tempdir)
+ shutil.rmtree(tempdir)
diff --git a/scripts/build_debian_package.sh b/scripts/build_debian_package.sh
new file mode 100755
index 00000000..cc62c3ac
--- /dev/null
+++ b/scripts/build_debian_package.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+# This script generates Soledad Debian packages.
+#
+# When invoking this script, you should pass a git repository URL and the name
+# of the branch that contains the code you wish to build the packages from.
+#
+# The script will clone the given branch from the given repo, as well as the
+# main Soledad repo in github which contains the most up-to-date debian
+# branch. It will then merge the desired branch into the debian branch and
+# build the packages.
+
+if [ $# -ne 2 ]; then
+ echo "Usage: ${0} <url> <branch>"
+ exit 1
+fi
+
+SOLEDAD_MAIN_REPO=git://github.com/leapcode/soledad.git
+
+url=$1
+branch=$2
+workdir=`mktemp -d`
+
+git clone -b ${branch} ${url} ${workdir}/soledad
+export GIT_DIR=${workdir}/soledad/.git
+export GIT_WORK_TREE=${workdir}/soledad
+git remote add leapcode ${SOLEDAD_MAIN_REPO}
+git fetch leapcode
+git checkout debian
+git merge --no-edit ${branch}
+(cd ${workdir}/soledad && debuild -uc -us)
+echo "Packages generated in ${workdir}"
diff --git a/scripts/client-side-db.py b/scripts/client-side-db.py
deleted file mode 100644
index 0c3df7a4..00000000
--- a/scripts/client-side-db.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/python
-
-# This script gives client-side access to one Soledad user database by using
-# the data stored in ~/.config/leap/soledad/
-
-import sys
-import os
-
-from leap.common.config import get_path_prefix
-from leap.soledad.client import Soledad
-
-if len(sys.argv) != 3:
- print 'Usage: %s <uuid> <passphrase>' % sys.argv[0]
- exit(1)
-
-uuid = sys.argv[1]
-passphrase = unicode(sys.argv[2])
-
-secrets_path = os.path.join(get_path_prefix(), 'leap', 'soledad',
- '%s.secret' % uuid)
-local_db_path = os.path.join(get_path_prefix(), 'leap', 'soledad',
- '%s.db' % uuid)
-server_url = 'http://dummy-url'
-cert_file = 'cert'
-
-sol = Soledad(uuid, passphrase, secrets_path, local_db_path, server_url,
- cert_file)
-db = sol._db
-
-# get replica info
-replica_uid = db._replica_uid
-gen, docs = db.get_all_docs()
-print "replica_uid: %s" % replica_uid
-print "generation: %d" % gen
-gen, trans_id = db._get_generation_info()
-print "transaction_id: %s" % trans_id
diff --git a/scripts/db_access/client_side_db.py b/scripts/db_access/client_side_db.py
new file mode 100644
index 00000000..2bf4ab5e
--- /dev/null
+++ b/scripts/db_access/client_side_db.py
@@ -0,0 +1,154 @@
+#!/usr/bin/python
+
+# This script gives client-side access to one Soledad user database.
+
+
+import sys
+import os
+import argparse
+import re
+import tempfile
+import getpass
+import requests
+import json
+import srp._pysrp as srp
+import binascii
+import logging
+
+from leap.common.config import get_path_prefix
+from leap.soledad.client import Soledad
+
+
+# create a logger
+logger = logging.getLogger(__name__)
+LOG_FORMAT = '%(asctime)s %(message)s'
+logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+safe_unhexlify = lambda x: binascii.unhexlify(x) if (
+ len(x) % 2 == 0) else binascii.unhexlify('0' + x)
+
+
+def fail(reason):
+ logger.error('Fail: ' + reason)
+ exit(2)
+
+
+def get_api_info(provider):
+ info = requests.get(
+ 'https://'+provider+'/provider.json', verify=False).json()
+ return info['api_uri'], info['api_version']
+
+
+def login(username, passphrase, provider, api_uri, api_version):
+ usr = srp.User(username, passphrase, srp.SHA256, srp.NG_1024)
+ auth = None
+ try:
+ auth = authenticate(api_uri, api_version, usr).json()
+ except requests.exceptions.ConnectionError:
+ fail('Could not connect to server.')
+ if 'errors' in auth:
+ fail(str(auth['errors']))
+ return api_uri, api_version, auth
+
+
+def authenticate(api_uri, api_version, usr):
+ api_url = "%s/%s" % (api_uri, api_version)
+ session = requests.session()
+ uname, A = usr.start_authentication()
+ params = {'login': uname, 'A': binascii.hexlify(A)}
+ init = session.post(
+ api_url + '/sessions', data=params, verify=False).json()
+ if 'errors' in init:
+ fail('test user not found')
+ M = usr.process_challenge(
+ safe_unhexlify(init['salt']), safe_unhexlify(init['B']))
+ return session.put(api_url + '/sessions/' + uname, verify=False,
+ data={'client_auth': binascii.hexlify(M)})
+
+
+def get_soledad_info(username, provider, passphrase, basedir):
+ api_uri, api_version = get_api_info(provider)
+ auth = login(username, passphrase, provider, api_uri, api_version)
+ # get soledad server url
+ service_url = '%s/%s/config/soledad-service.json' % \
+ (api_uri, api_version)
+ soledad_hosts = requests.get(service_url, verify=False).json()['hosts']
+ hostnames = soledad_hosts.keys()
+ # allow for choosing the host
+ host = hostnames[0]
+ if len(hostnames) > 1:
+ i = 1
+ print "There are many available hosts:"
+ for h in hostnames:
+ print " (%d) %s.%s" % (i, h, provider)
+ i += 1
+ choice = raw_input("Choose a host to use (default: 1): ")
+ if choice != '':
+ host = hostnames[int(choice) - 1]
+ server_url = 'https://%s:%d/user-%s' % \
+ (soledad_hosts[host]['hostname'], soledad_hosts[host]['port'],
+ auth[2]['id'])
+ # get provider ca certificate
+ ca_cert = requests.get('https://%s/ca.crt' % provider, verify=False).text
+ cert_file = os.path.join(basedir, 'ca.crt')
+ with open(cert_file, 'w') as f:
+ f.write(ca_cert)
+ return auth[2]['id'], server_url, cert_file, auth[2]['token']
+
+
+def get_soledad_instance(username, provider, passphrase, basedir):
+ # setup soledad info
+ uuid, server_url, cert_file, token = \
+ get_soledad_info(username, provider, passphrase, basedir)
+ logger.info('UUID is %s' % uuid)
+ logger.info('Server URL is %s' % server_url)
+ secrets_path = os.path.join(
+ basedir, '%s.secret' % uuid)
+ local_db_path = os.path.join(
+ basedir, '%s.db' % uuid)
+ # instantiate soledad
+ return Soledad(
+ uuid,
+ unicode(passphrase),
+ secrets_path=secrets_path,
+ local_db_path=local_db_path,
+ server_url=server_url,
+ cert_file=cert_file,
+ auth_token=token)
+
+
+# main program
+
+if __name__ == '__main__':
+
+ class ValidateUserHandle(argparse.Action):
+ def __call__(self, parser, namespace, values, option_string=None):
+ m = re.compile('^([^@]+)@([^@]+\.[^@]+)$')
+ res = m.match(values)
+ if res == None:
+ parser.error('User handle should have the form user@provider.')
+ setattr(namespace, 'username', res.groups()[0])
+ setattr(namespace, 'provider', res.groups()[1])
+
+ # parse command line
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'user@provider', action=ValidateUserHandle, help='the user handle')
+ parser.add_argument(
+ '-b', dest='basedir', required=False, default=None, help='the user handle')
+ args = parser.parse_args()
+
+ # get the password
+ passphrase = getpass.getpass(
+ 'Password for %s@%s: ' % (args.username, args.provider))
+
+ # get the basedir
+ basedir = args.basedir
+ if basedir is None:
+ basedir = tempfile.mkdtemp()
+ logger.info('Using %s as base directory.' % basedir)
+
+ # get the soledad instance
+ s = get_soledad_instance(
+ args.username, args.provider, passphrase, basedir)
diff --git a/scripts/db_access/reset_db.py b/scripts/db_access/reset_db.py
new file mode 100644
index 00000000..80871856
--- /dev/null
+++ b/scripts/db_access/reset_db.py
@@ -0,0 +1,79 @@
+#!/usr/bin/python
+
+# This script can be run on server side to completelly reset a user database.
+#
+# WARNING: running this script over a database will delete all documents but
+# the one with id u1db_config (which contains db metadata) and design docs
+# needed for couch backend.
+
+
+import sys
+from ConfigParser import ConfigParser
+import threading
+import logging
+from couchdb import Database as CouchDatabase
+
+
+if len(sys.argv) != 2:
+ print 'Usage: %s <uuid>' % sys.argv[0]
+ exit(1)
+
+uuid = sys.argv[1]
+
+
+# create a logger
+logger = logging.getLogger(__name__)
+LOG_FORMAT = '%(asctime)s %(message)s'
+logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+# get couch url
+cp = ConfigParser()
+cp.read('/etc/leap/soledad-server.conf')
+url = cp.get('soledad-server', 'couch_url')
+
+
+# confirm
+yes = raw_input("Are you sure you want to reset the database for user %s "
+ "(type YES)? " % uuid)
+if yes != 'YES':
+ print 'Bailing out...'
+ exit(2)
+
+
+db = CouchDatabase('%s/user-%s' % (url, uuid))
+
+
+class _DeleterThread(threading.Thread):
+
+ def __init__(self, db, doc_id, release_fun):
+ threading.Thread.__init__(self)
+ self._db = db
+ self._doc_id = doc_id
+ self._release_fun = release_fun
+
+ def run(self):
+ logger.info('[%s] deleting doc...' % self._doc_id)
+ del self._db[self._doc_id]
+ logger.info('[%s] done.' % self._doc_id)
+ self._release_fun()
+
+
+semaphore_pool = threading.BoundedSemaphore(value=20)
+
+
+threads = []
+for doc_id in db:
+ if doc_id != 'u1db_config' and not doc_id.startswith('_design'):
+ semaphore_pool.acquire()
+ logger.info('[main] launching thread for doc: %s' % doc_id)
+ t = _DeleterThread(db, doc_id, semaphore_pool.release)
+ t.start()
+ threads.append(t)
+
+
+logger.info('[main] waiting for threads.')
+map(lambda thread: thread.join(), threads)
+
+
+logger.info('[main] done.')
diff --git a/scripts/server-side-db.py b/scripts/db_access/server_side_db.py
index 01a9aaac..18641a0f 100644
--- a/scripts/server-side-db.py
+++ b/scripts/db_access/server_side_db.py
@@ -2,6 +2,10 @@
# This script gives server-side access to one Soledad user database by using
# the configuration stored in /etc/leap/soledad-server.conf.
+#
+# Use it like this:
+#
+# python -i server-side-db.py <uuid>
import sys
from ConfigParser import ConfigParser
diff --git a/scripts/doc_put_memory_usage/find_max_upload_size.py b/scripts/doc_put_memory_usage/find_max_upload_size.py
new file mode 100755
index 00000000..02c68015
--- /dev/null
+++ b/scripts/doc_put_memory_usage/find_max_upload_size.py
@@ -0,0 +1,169 @@
+#!/usr/bin/python
+
+# This script finds the maximum upload size for a document in the current
+# server. It pulls couch URL from Soledad config file and attempts multiple
+# PUTs until it finds the maximum size supported by the server.
+#
+# As the Soledad couch user is not an admin, you have to pass a database into
+# which the test will be run. The database should already exist and be
+# initialized with soledad design documents.
+#
+# Use it like this:
+#
+# ./find_max_upload_size.py <dbname>
+# ./find_max_upload_size.py -h
+
+import os
+import configparser
+import logging
+import argparse
+import random
+import string
+import binascii
+import json
+import time
+import uuid
+
+
+from couchdb.client import Database
+from socket import error as socket_error
+from leap.soledad.common.couch import CouchDatabase
+
+
+SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf'
+PREFIX = '/tmp/soledad_test'
+LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s'
+RETRIES = 3 # number of times to retry uploading a document of a certain
+ # size after a failure
+
+
+# configure logger
+logger = logging.getLogger(__name__)
+
+
+def config_log(level):
+ logging.basicConfig(format=LOG_FORMAT, level=level)
+
+
+def log_to_file(filename):
+ handler = logging.FileHandler(filename, mode='a')
+ handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT))
+ logger.addHandler(handler)
+
+
+# create test dir
+if not os.path.exists(PREFIX):
+ os.mkdir(PREFIX)
+
+
+def get_couch_url(config_file=SOLEDAD_CONFIG_FILE):
+ config = configparser.ConfigParser()
+ config.read(config_file)
+ return config['soledad-server']['couch_url']
+
+
+# generate or load an uploadable doc with the given size in mb
+def get_content(size):
+ fname = os.path.join(PREFIX, 'content-%d.json' % size)
+ if os.path.exists(fname):
+ logger.debug('Loading content with %d MB...' % size)
+ with open(fname, 'r') as f:
+ return f.read()
+ else:
+ length = int(size * 1024 ** 2)
+ logger.debug('Generating body with %d MB...' % size)
+ content = binascii.hexlify(os.urandom(length))[:length]
+ with open(fname, 'w') as f:
+ f.write(content)
+ return content
+
+
+def delete_doc(db):
+ doc = db.get('largedoc')
+ db.delete(doc)
+
+
+def upload(db, size, couch_db):
+ # try many times to be sure that size is infeasible
+ for i in range(RETRIES):
+ # wait until server is up to upload
+ while True:
+ try:
+ 'largedoc' in couch_db
+ break
+ except socket_error:
+ logger.debug('Waiting for server to come up...')
+ time.sleep(1)
+ # attempt to upload
+ try:
+ logger.debug(
+ 'Trying to upload %d MB document (attempt %d/%d)...' %
+ (size, (i+1), RETRIES))
+ content = get_content(size)
+ logger.debug('Starting upload of %d bytes.' % len(content))
+ doc = db.create_doc({'data': content}, doc_id='largedoc')
+ delete_doc(couch_db)
+ logger.debug('Success uploading %d MB doc.' % size)
+ return True
+ except Exception as e:
+ logger.debug('Failed to upload %d MB doc: %s' % (size, str(e)))
+ return False
+
+
+def find_max_upload_size(db_uri):
+ db = CouchDatabase.open_database(db_uri, False)
+ couch_db = Database(db_uri)
+ logger.debug('Database URI: %s' % db_uri)
+ # delete eventual leftover from last run
+ if 'largedoc' in couch_db:
+ delete_doc(couch_db)
+ # phase 1: increase upload size exponentially
+ logger.info('Starting phase 1: increasing size exponentially.')
+ size = 1
+ #import ipdb; ipdb.set_trace()
+ while True:
+ if upload(db, size, couch_db):
+ size *= 2
+ else:
+ break
+
+ # phase 2: binary search for maximum value
+ unable = size
+ able = size / 2
+ logger.info('Starting phase 2: binary search for maximum value.')
+ while unable - able > 1:
+ size = able + ((unable - able) / 2)
+ if upload(db, size, couch_db):
+ able = size
+ else:
+ unable = size
+ return able
+
+
+if __name__ == '__main__':
+ # parse command line
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-d', action='store_true', dest='debug',
+ help='print debugging information')
+ parser.add_argument(
+ '-l', dest='logfile',
+ help='log output to file')
+ parser.add_argument(
+ 'db_uri', help='the couch database URI to test')
+ args = parser.parse_args()
+
+ # log to file
+ if args.logfile is not None:
+ log_to_file(args.logfile)
+
+ # set loglevel
+ if args.debug is True:
+ config_log(logging.DEBUG)
+ else:
+ config_log(logging.INFO)
+
+ # run test and report
+ logger.info('Will test using db at %s.' % args.db_uri)
+ maxsize = find_max_upload_size(args.db_uri)
+ logger.info('Max upload size is %d MB.' % maxsize)
diff --git a/scripts/doc_put_memory_usage/get-mem.py b/scripts/doc_put_memory_usage/get-mem.py
new file mode 100755
index 00000000..d64875fc
--- /dev/null
+++ b/scripts/doc_put_memory_usage/get-mem.py
@@ -0,0 +1,16 @@
+#!/usr/bin/python
+
+
+import psutil
+import time
+
+
+delta = 50 * 60
+start = time.time()
+
+while True:
+ now = time.time()
+ print "%s %s" % (now - start, psutil.phymem_usage().used)
+ time.sleep(0.1)
+ if now > start + delta:
+ break
diff --git a/scripts/doc_put_memory_usage/plot-mem.py b/scripts/doc_put_memory_usage/plot-mem.py
new file mode 100755
index 00000000..e24679a2
--- /dev/null
+++ b/scripts/doc_put_memory_usage/plot-mem.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+
+
+from matplotlib import pyplot as plt
+
+
+files = [
+ ('local', 'couchdb-json', 'b'),
+ ('local', 'bigcouch-json', 'r'),
+ ('local', 'couchdb-multipart', 'g'),
+ ('local', 'bigcouch-multipart', 'm'),
+]
+
+
+# config the plot
+plt.xlabel('time')
+plt.ylabel('memory usage')
+plt.title('bigcouch versus couch memory usage')
+
+
+for fi in files:
+
+ machine = fi[0]
+ database = fi[1]
+ color = fi[2]
+ filename = '%s-%s.txt' % (machine, database)
+
+ x = []
+ y = []
+
+ xmax = None
+ xmin = None
+ ymax = None
+ ymin = None
+
+ # read data from file
+ with open(filename, 'r') as f:
+ line = f.readline()
+ while line is not None:
+ time, mem = tuple(line.strip().split(' '))
+ mem = float(mem) / (10**9)
+ x.append(float(time))
+ y.append(mem)
+ if ymax == None or mem > ymax:
+ ymax = mem
+ xmax = time
+ if ymin == None or mem < ymin:
+ ymin = mem
+ xmin = time
+ line = f.readline()
+ if line == '':
+ break
+
+ kwargs = {
+ 'linewidth': 1.0,
+ 'linestyle': '-',
+ # 'marker': '.',
+ 'color': color,
+ }
+ plt.plot(x, y, label=database, **kwargs)
+
+ #plt.axes().get_xaxis().set_ticks(x)
+ #plt.axes().get_xaxis().set_ticklabels(x)
+
+ # annotate max and min values
+ #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax))
+ #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin))
+
+
+plt.grid()
+plt.legend()
+plt.show()
+
diff --git a/scripts/profiling/sync/sync-many.py b/scripts/profiling/sync/sync-many.py
new file mode 100644
index 00000000..83793b0a
--- /dev/null
+++ b/scripts/profiling/sync/sync-many.py
@@ -0,0 +1,125 @@
+#!/usr/bin/python
+
+# The purpose of this script is to stress a soledad server by:
+#
+# - Instantiating multiple clients.
+# - Creating many documents in each client.
+# - Syncing all at the same time with th server multiple times, until
+# they've all reached an agreement on the state of the databases and
+# there's nothing else to be synced.
+
+
+import threading
+import tempfile
+import argparse
+import logging
+import re
+import getpass
+import time
+import shutil
+
+
+from client_side_db import get_soledad_instance
+
+
+from leap.soledad.client import BootstrapSequenceError
+
+
+NUMBER_OF_REPLICAS = 1
+DOCUMENTS_PER_REPLICA = 10
+
+
+# create a logger
+logger = logging.getLogger(__name__)
+LOG_FORMAT = '%(asctime)s %(message)s'
+logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+class WorkerThread(threading.Thread):
+
+ def __init__(self, thread_id, soledad, all_set):
+ threading.Thread.__init__(self)
+ self._id = thread_id
+ self._soledad = soledad
+ self._all_set = all_set
+ self._done_creating = threading.Event()
+
+ def run(self):
+ # create many documents
+ logger.info('[replica %d] creating documents...' % self._id)
+ for i in xrange(DOCUMENTS_PER_REPLICA):
+ self._soledad.create_doc({'a_doc': i})
+ # wait for others
+ self._done_creating.set()
+ logger.info('[replica %d] done creating documents.' % self._id)
+ self._all_set.wait()
+ # sync
+ successes = 0
+ while True:
+ logger.info('[replica %d] syncing.' % self._id)
+ if self._id == 1:
+ time.sleep(5)
+ old_gen = self._soledad.sync()
+ logger.info('[replica %d] synced.' % self._id)
+ new_gen = self._soledad._db._get_generation()
+ logger.info('[replica %d] old gen %d - new gen %d.' %
+ (self._id, old_gen, new_gen))
+ if old_gen == new_gen:
+ successes += 1
+ logger.info('[replica %d] sync not needed.' % self._id)
+ if successes == 3:
+ break
+
+
+def stress_test(username, provider, passphrase, basedir):
+ threads = []
+ all_set = threading.Event()
+ for i in xrange(NUMBER_OF_REPLICAS):
+ logging.info('[main] starting thread %d.' % i)
+ s = get_soledad_instance(
+ username,
+ provider,
+ passphrase,
+ tempfile.mkdtemp(dir=basedir))
+ t = WorkerThread(i, s, all_set)
+ t.start()
+ threads.append(t)
+ map(lambda t: t._done_creating.wait(), threads)
+ all_set.set()
+ map(lambda t: t.join(), threads)
+ logger.info('Removing dir %s' % basedir)
+ shutil.rmtree(basedir)
+
+
+# main program
+
+if __name__ == '__main__':
+
+ class ValidateUserHandle(argparse.Action):
+ def __call__(self, parser, namespace, values, option_string=None):
+ m = re.compile('^([^@]+)@([^@]+\.[^@]+)$')
+ res = m.match(values)
+ if res == None:
+ parser.error('User handle should have the form user@provider.')
+ setattr(namespace, 'username', res.groups()[0])
+ setattr(namespace, 'provider', res.groups()[1])
+
+ # parse command line
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'user@provider', action=ValidateUserHandle, help='the user handle')
+ parser.add_argument(
+ '-b', dest='basedir', required=False, default=None, help='the user handle')
+ args = parser.parse_args()
+
+ # get the password
+ passphrase = getpass.getpass(
+ 'Password for %s@%s: ' % (args.username, args.provider))
+
+ # get the basedir
+ basedir = args.basedir
+ if basedir is None:
+ basedir = tempfile.mkdtemp()
+ logger.info('[main] using %s as base directory.' % basedir)
+
+ stress_test(args.username, args.provider, passphrase, basedir)
diff --git a/scripts/update_design_docs.py b/scripts/update_design_docs.py
new file mode 100644
index 00000000..e7b5a29c
--- /dev/null
+++ b/scripts/update_design_docs.py
@@ -0,0 +1,147 @@
+#!/usr/bin/python
+
+# This script updates Soledad's design documents in the session database and
+# all user databases with contents from the installed leap.soledad.common
+# package.
+
+import json
+import logging
+import argparse
+import re
+import threading
+import binascii
+
+
+from getpass import getpass
+from ConfigParser import ConfigParser
+from couchdb.client import Server
+from couchdb.http import Resource, Session
+from datetime import datetime
+from urlparse import urlparse
+
+
+from leap.soledad.common import ddocs
+
+
+# parse command line for the log file name
+logger_fname = "/tmp/update-design-docs_%s.log" % \
+ str(datetime.now()).replace(' ', '_')
+parser = argparse.ArgumentParser()
+parser.add_argument('--log', action='store', default=logger_fname, type=str,
+ required=False, help='the name of the log file', nargs=1)
+args = parser.parse_args()
+
+
+# configure the logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+print "Logging to %s." % args.log
+logging.basicConfig(
+ filename=args.log,
+ format="%(asctime)-15s %(message)s")
+
+
+# configure threads
+max_threads = 20
+semaphore_pool = threading.BoundedSemaphore(value=max_threads)
+threads = []
+
+# get couch url
+cp = ConfigParser()
+cp.read('/etc/leap/soledad-server.conf')
+url = urlparse(cp.get('soledad-server', 'couch_url'))
+
+# get admin password
+netloc = re.sub('^.*@', '', url.netloc)
+url = url._replace(netloc=netloc)
+password = getpass("Admin password for %s: " % url.geturl())
+url = url._replace(netloc='admin:%s@%s' % (password, netloc))
+
+resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10))
+server = Server(url=resource)
+
+hidden_url = re.sub(
+ 'http://(.*):.*@',
+ 'http://\\1:xxxxx@',
+ url.geturl())
+
+print """
+==========
+ATTENTION!
+==========
+
+This script will modify Soledad's shared and user databases in:
+
+ %s
+
+This script does not make a backup of the couch db data, so make sure you
+have a copy or you may loose data.
+""" % hidden_url
+confirm = raw_input("Proceed (type uppercase YES)? ")
+
+if confirm != "YES":
+ exit(1)
+
+# convert design doc content
+
+design_docs = {
+ '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)),
+ '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)),
+ '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)),
+}
+
+#
+# Thread
+#
+
+class DBWorkerThread(threading.Thread):
+
+ def __init__(self, server, dbname, db_idx, db_len, release_fun):
+ threading.Thread.__init__(self)
+ self._dbname = dbname
+ self._cdb = server[self._dbname]
+ self._db_idx = db_idx
+ self._db_len = db_len
+ self._release_fun = release_fun
+
+ def run(self):
+
+ logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len,
+ self._dbname))
+
+ for doc_id in design_docs:
+ doc = self._cdb[doc_id]
+ for key in ['lists', 'views', 'updates']:
+ if key in design_docs[doc_id]:
+ doc[key] = design_docs[doc_id][key]
+ self._cdb.save(doc)
+
+ # release the semaphore
+ self._release_fun()
+
+
+db_idx = 0
+db_len = len(server)
+for dbname in server:
+
+ db_idx += 1
+
+ if not (dbname.startswith('user-') or dbname == 'shared') \
+ or dbname == 'user-test-db':
+ logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname))
+ continue
+
+
+ # get access to couch db
+ cdb = Server(url.geturl())[dbname]
+
+ #---------------------------------------------------------------------
+ # Start DB worker thread
+ #---------------------------------------------------------------------
+ semaphore_pool.acquire()
+ thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release)
+ thread.daemon = True
+ thread.start()
+ threads.append(thread)
+
+map(lambda thread: thread.join(), threads)