From 3cfe76a694bb19d5da21192e4a9a467a9b4c59e1 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 7 Apr 2014 11:40:40 -0300 Subject: Add script to compile design docs (#5315) --- scripts/compile_design_docs.py | 111 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 scripts/compile_design_docs.py (limited to 'scripts') diff --git a/scripts/compile_design_docs.py b/scripts/compile_design_docs.py new file mode 100644 index 00000000..7ffebb10 --- /dev/null +++ b/scripts/compile_design_docs.py @@ -0,0 +1,111 @@ +#!/usr/bin/python + + +# This script builds files for the design documents represented in the +# ../common/src/soledad/common/ddocs directory structure (relative to the +# current location of the script) into a target directory. + + +import argparse +from os import listdir +from os.path import realpath, dirname, isdir, join, isfile, basename +import json + +DDOCS_REL_PATH = ('..', 'common', 'src', 'leap', 'soledad', 'common', 'ddocs') + + +def build_ddocs(): + """ + Build design documents. + + For ease of development, couch backend design documents are stored as + `.js` files in subdirectories of + `../common/src/leap/soledad/common/ddocs`. This function scans that + directory for javascript files, and builds the design documents structure. + + This funciton uses the following conventions to generate design documents: + + - Design documents are represented by directories in the form + `/`, there prefix is the `src/leap/soledad/common/ddocs` + directory. + - Design document directories might contain `views`, `lists` and + `updates` subdirectories. + - Views subdirectories must contain a `map.js` file and may contain a + `reduce.js` file. + - List and updates subdirectories may contain any number of javascript + files (i.e. ending in `.js`) whose names will be mapped to the + corresponding list or update function name. + """ + ddocs = {} + + # design docs are represented by subdirectories of `DDOCS_REL_PATH` + cur_pwd = dirname(realpath(__file__)) + ddocs_path = join(cur_pwd, *DDOCS_REL_PATH) + for ddoc in [f for f in listdir(ddocs_path) + if isdir(join(ddocs_path, f))]: + + ddocs[ddoc] = {'_id': '_design/%s' % ddoc} + + for t in ['views', 'lists', 'updates']: + tdir = join(ddocs_path, ddoc, t) + if isdir(tdir): + + ddocs[ddoc][t] = {} + + if t == 'views': # handle views (with map/reduce functions) + for view in [f for f in listdir(tdir) + if isdir(join(tdir, f))]: + # look for map.js and reduce.js + mapfile = join(tdir, view, 'map.js') + reducefile = join(tdir, view, 'reduce.js') + mapfun = None + reducefun = None + try: + with open(mapfile) as f: + mapfun = f.read() + except IOError: + pass + try: + with open(reducefile) as f: + reducefun = f.read() + except IOError: + pass + ddocs[ddoc]['views'][view] = {} + + if mapfun is not None: + ddocs[ddoc]['views'][view]['map'] = mapfun + if reducefun is not None: + ddocs[ddoc]['views'][view]['reduce'] = reducefun + + else: # handle lists, updates, etc + for fun in [f for f in listdir(tdir) + if isfile(join(tdir, f))]: + funfile = join(tdir, fun) + funname = basename(funfile).replace('.js', '') + try: + with open(funfile) as f: + ddocs[ddoc][t][funname] = f.read() + except IOError: + pass + return ddocs + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + 'target', type=str, + help='the target dir where to store design documents') + args = parser.parse_args() + + # check if given target is a directory + if not isdir(args.target): + print 'Error: %s is not a directory.' % args.target + exit(1) + + # write desifgn docs files + ddocs = build_ddocs() + for ddoc in ddocs: + ddoc_filename = "%s.json" % ddoc + with open(join(args.target, ddoc_filename), 'w') as f: + f.write("%s" % json.dumps(ddocs[ddoc], indent=3)) + print "Wrote _design/%s content in %s" % (ddoc, join(args.target, ddoc_filename,)) -- cgit v1.2.3 From 573909b10d77ef3d889d33cfaeb3fdadd0135daf Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 7 Apr 2014 12:40:45 -0300 Subject: Reorganize scripts directory. --- scripts/backends_cpu_usage/log_cpu_usage.py | 46 ----- scripts/backends_cpu_usage/movingaverage.py | 209 --------------------- scripts/backends_cpu_usage/plot.py | 81 -------- scripts/backends_cpu_usage/test_u1db_sync.py | 113 ----------- scripts/ddocs/update_design_docs.py | 147 +++++++++++++++ .../doc_put_memory_usage/find_max_upload_size.py | 169 ----------------- scripts/doc_put_memory_usage/get-mem.py | 16 -- scripts/doc_put_memory_usage/plot-mem.py | 73 ------- .../profiling/backends_cpu_usage/log_cpu_usage.py | 46 +++++ .../profiling/backends_cpu_usage/movingaverage.py | 209 +++++++++++++++++++++ scripts/profiling/backends_cpu_usage/plot.py | 81 ++++++++ .../profiling/backends_cpu_usage/test_u1db_sync.py | 113 +++++++++++ .../doc_put_memory_usage/find_max_upload_size.py | 169 +++++++++++++++++ scripts/profiling/doc_put_memory_usage/get-mem.py | 16 ++ scripts/profiling/doc_put_memory_usage/plot-mem.py | 73 +++++++ scripts/update_design_docs.py | 147 --------------- 16 files changed, 854 insertions(+), 854 deletions(-) delete mode 100755 scripts/backends_cpu_usage/log_cpu_usage.py delete mode 100644 scripts/backends_cpu_usage/movingaverage.py delete mode 100755 scripts/backends_cpu_usage/plot.py delete mode 100755 scripts/backends_cpu_usage/test_u1db_sync.py create mode 100644 scripts/ddocs/update_design_docs.py delete mode 100755 scripts/doc_put_memory_usage/find_max_upload_size.py delete mode 100755 scripts/doc_put_memory_usage/get-mem.py delete mode 100755 scripts/doc_put_memory_usage/plot-mem.py create mode 100755 scripts/profiling/backends_cpu_usage/log_cpu_usage.py create mode 100644 scripts/profiling/backends_cpu_usage/movingaverage.py create mode 100755 scripts/profiling/backends_cpu_usage/plot.py create mode 100755 scripts/profiling/backends_cpu_usage/test_u1db_sync.py create mode 100755 scripts/profiling/doc_put_memory_usage/find_max_upload_size.py create mode 100755 scripts/profiling/doc_put_memory_usage/get-mem.py create mode 100755 scripts/profiling/doc_put_memory_usage/plot-mem.py delete mode 100644 scripts/update_design_docs.py (limited to 'scripts') diff --git a/scripts/backends_cpu_usage/log_cpu_usage.py b/scripts/backends_cpu_usage/log_cpu_usage.py deleted file mode 100755 index 2674e1ff..00000000 --- a/scripts/backends_cpu_usage/log_cpu_usage.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/python - - -# Get the CPU usage and print to file. - - -import psutil -import time -import argparse -import os -import threading - - -class LogCpuUsage(threading.Thread): - - def __init__(self, fname): - threading.Thread.__init__(self) - self._stopped = True - self._fname = fname - - def run(self): - self._stopped = False - with open(self._fname, 'w') as f: - start = time.time() - while self._stopped is False: - now = time.time() - f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) - time.sleep(0.01) - - def stop(self): - self._stopped = True - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('file', help='where to save output') - args = parser.parse_args() - - if os.path.isfile(args.file): - replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) - if replace.lower() != 'y': - print 'Bailing out.' - exit(1) - - log_cpu = LogCpuUsage(args.file) - log_cpu.run() diff --git a/scripts/backends_cpu_usage/movingaverage.py b/scripts/backends_cpu_usage/movingaverage.py deleted file mode 100644 index bac1b3e1..00000000 --- a/scripts/backends_cpu_usage/movingaverage.py +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env python -# -# Sean Reifschneider, tummy.com, ltd. -# Released into the Public Domain, 2011-02-06 - -import itertools -from itertools import islice -from collections import deque - - -######################################################### -def movingaverage(data, subset_size, data_is_list = None, - avoid_fp_drift = True): - '''Return the moving averages of the data, with a window size of - `subset_size`. `subset_size` must be an integer greater than 0 and - less than the length of the input data, or a ValueError will be raised. - - `data_is_list` can be used to tune the algorithm for list or iteratable - as an input. The default value, `None` will auto-detect this. - The algorithm used if `data` is a list is almost twice as fast as if - it is an iteratable. - - `avoid_fp_drift`, if True (the default) sums every sub-set rather than - keeping a "rolling sum" (which may be subject to floating-point drift). - While more correct, it is also dramatically slower for subset sizes - much larger than 20. - - NOTE: You really should consider setting `avoid_fp_drift = False` unless - you are dealing with very small numbers (say, far smaller than 0.00001) - or require extreme accuracy at the cost of execution time. For - `subset_size` < 20, the performance difference is very small. - ''' - if subset_size < 1: - raise ValueError('subset_size must be 1 or larger') - - if data_is_list is None: - data_is_list = hasattr(data, '__getslice__') - - divisor = float(subset_size) - if data_is_list: - # This only works if we can re-access old elements, but is much faster. - # In other words, it can't be just an iterable, it needs to be a list. - - if subset_size > len(data): - raise ValueError('subset_size must be smaller than data set size') - - if avoid_fp_drift: - for x in range(subset_size, len(data) + 1): - yield sum(data[x - subset_size:x]) / divisor - else: - cur = sum(data[0:subset_size]) - yield cur / divisor - for x in range(subset_size, len(data)): - cur += data[x] - data[x - subset_size] - yield cur / divisor - else: - # Based on the recipe at: - # http://docs.python.org/library/collections.html#deque-recipes - it = iter(data) - d = deque(islice(it, subset_size)) - - if subset_size > len(d): - raise ValueError('subset_size must be smaller than data set size') - - if avoid_fp_drift: - yield sum(d) / divisor - for elem in it: - d.popleft() - d.append(elem) - yield sum(d) / divisor - else: - s = sum(d) - yield s / divisor - for elem in it: - s += elem - d.popleft() - d.append(elem) - yield s / divisor - - -########################## -if __name__ == '__main__': - import unittest - - class TestMovingAverage(unittest.TestCase): - #################### - def test_List(self): - try: - list(movingaverage([1,2,3], 0)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage([1,2,3,4,5,6], 7)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) - - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, False)), [40.0,42.0,45.0,43.0]) - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, True)), [40.0,42.0,45.0,43.0]) - - - ###################### - def test_XRange(self): - try: - list(movingaverage(xrange(1, 4), 0)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage(xrange(1, 7), 7)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage(xrange(1, 7), 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), - 2)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) - self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) - - - ########################### - def test_ListRolling(self): - try: - list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, - avoid_fp_drift = False)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, - avoid_fp_drift = False)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, - avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, - avoid_fp_drift = False)), [2,3,4,5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, - avoid_fp_drift = False)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, - avoid_fp_drift = False)), [3,4]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, - avoid_fp_drift = False)), [3.5]) - - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) - - - ############################# - def test_XRangeRolling(self): - try: - list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage(xrange(1, 7), 1, - avoid_fp_drift = False)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage(xrange(1, 7), 2, - avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), - 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 3, - avoid_fp_drift = False)), [2,3,4,5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 4, - avoid_fp_drift = False)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 5, - avoid_fp_drift = False)), [3,4]) - self.assertEqual(list(movingaverage(xrange(1, 7), 6, - avoid_fp_drift = False)), [3.5]) - - - ###################################################################### - suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) - unittest.TextTestRunner(verbosity = 2).run(suite) - diff --git a/scripts/backends_cpu_usage/plot.py b/scripts/backends_cpu_usage/plot.py deleted file mode 100755 index 4e5083ad..00000000 --- a/scripts/backends_cpu_usage/plot.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/python - - -from matplotlib import pyplot as plt -from movingaverage import movingaverage - - -def smooth(l): - return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) - - -files = [ - ('sqlite', 'b'), - ('sqlcipher', 'r'), - ('u1dblite', 'g'), - ('u1dbcipher', 'm'), -] - - -# config the plot -plt.xlabel('time (s)') -plt.ylabel('cpu usage (%)') -plt.title('u1db backends CPU usage') - - -for fi in files: - - backend = fi[0] - color = fi[1] - filename = '%s.txt' % backend - - x = [] - y = [] - - xmax = None - xmin = None - ymax = None - ymin = None - - # read data from file - with open(filename, 'r') as f: - line = f.readline() - while line is not None: - time, cpu = tuple(line.strip().split(' ')) - cpu = float(cpu) - x.append(float(time)) - y.append(cpu) - if ymax == None or cpu > ymax: - ymax = cpu - xmax = time - if ymin == None or cpu < ymin: - ymin = cpu - xmin = time - line = f.readline() - if line == '': - break - - kwargs = { - 'linewidth': 1.0, - 'linestyle': '-', - # 'marker': '.', - 'color': color, - } - plt.plot( - [n for n in smooth(x)], - [n for n in smooth(y)], - label=backend, **kwargs) - - #plt.axes().get_xaxis().set_ticks(x) - #plt.axes().get_xaxis().set_ticklabels(x) - - # annotate max and min values - #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) - #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) - - -plt.ylim(0, 100) -plt.grid() -plt.legend() -plt.show() - diff --git a/scripts/backends_cpu_usage/test_u1db_sync.py b/scripts/backends_cpu_usage/test_u1db_sync.py deleted file mode 100755 index 26ef8f9f..00000000 --- a/scripts/backends_cpu_usage/test_u1db_sync.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/python - - -import u1db -import tempfile -import logging -import shutil -import os -import argparse -import time -import binascii -import random - - -from leap.soledad.client.sqlcipher import open as sqlcipher_open -from log_cpu_usage import LogCpuUsage -from u1dblite import open as u1dblite_open -from u1dbcipher import open as u1dbcipher_open - - -DOCS_TO_SYNC = 1000 -SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB -BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB - - -def get_data(size): - return binascii.hexlify(os.urandom(size/2)) - - -def run_test(testname, open_fun, tempdir, docs, *args): - logger.info('Starting test \"%s\".' % testname) - - # instantiate dbs - db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) - db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) - - # get sync target and synchsonizer - target = db2.get_sync_target() - synchronizer = u1db.sync.Synchronizer(db1, target) - - - # generate lots of small documents - logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) - for content in docs: - db1.create_doc(content) - logger.info('%d documents created in source db.' % DOCS_TO_SYNC) - - # run the test - filename = testname + '.txt' - logger.info('Logging CPU usage to %s.' % filename) - log_cpu = LogCpuUsage(filename) - tstart = time.time() - - # start logging cpu - log_cpu.start() - logger.info('Sleeping for 5 seconds...') - time.sleep(5) - - # sync - logger.info('Starting sync...') - sstart = time.time() - synchronizer.sync() - send = time.time() - logger.info('Sync finished.') - - # stop logging cpu - logger.info('Sleeping for 5 seconds...') - time.sleep(5) - tend = time.time() - log_cpu.stop() - - # report - logger.info('Total sync time: %f seconds' % (send - sstart)) - logger.info('Total test time: %f seconds' % (tend - tstart)) - logger.info('Finished test \"%s\".' % testname) - - # close dbs - db1.close() - db2.close() - - -if __name__ == '__main__': - - # configure logger - logger = logging.getLogger(__name__) - LOG_FORMAT = '%(asctime)s %(message)s' - logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) - - - # get a temporary dir - tempdir = tempfile.mkdtemp() - logger.info('Using temporary directory %s' % tempdir) - - - # create a lot of documents with random sizes - docs = [] - for i in xrange(DOCS_TO_SYNC): - docs.append({ - 'index': i, - #'data': get_data( - # random.randrange( - # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) - }) - - # run tests - run_test('sqlite', u1db.open, tempdir, docs, True) - run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) - run_test('u1dblite', u1dblite_open, tempdir, docs) - run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) - - # remove temporary dir - logger.info('Removing temporary directory %s' % tempdir) - shutil.rmtree(tempdir) diff --git a/scripts/ddocs/update_design_docs.py b/scripts/ddocs/update_design_docs.py new file mode 100644 index 00000000..e7b5a29c --- /dev/null +++ b/scripts/ddocs/update_design_docs.py @@ -0,0 +1,147 @@ +#!/usr/bin/python + +# This script updates Soledad's design documents in the session database and +# all user databases with contents from the installed leap.soledad.common +# package. + +import json +import logging +import argparse +import re +import threading +import binascii + + +from getpass import getpass +from ConfigParser import ConfigParser +from couchdb.client import Server +from couchdb.http import Resource, Session +from datetime import datetime +from urlparse import urlparse + + +from leap.soledad.common import ddocs + + +# parse command line for the log file name +logger_fname = "/tmp/update-design-docs_%s.log" % \ + str(datetime.now()).replace(' ', '_') +parser = argparse.ArgumentParser() +parser.add_argument('--log', action='store', default=logger_fname, type=str, + required=False, help='the name of the log file', nargs=1) +args = parser.parse_args() + + +# configure the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +print "Logging to %s." % args.log +logging.basicConfig( + filename=args.log, + format="%(asctime)-15s %(message)s") + + +# configure threads +max_threads = 20 +semaphore_pool = threading.BoundedSemaphore(value=max_threads) +threads = [] + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = urlparse(cp.get('soledad-server', 'couch_url')) + +# get admin password +netloc = re.sub('^.*@', '', url.netloc) +url = url._replace(netloc=netloc) +password = getpass("Admin password for %s: " % url.geturl()) +url = url._replace(netloc='admin:%s@%s' % (password, netloc)) + +resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10)) +server = Server(url=resource) + +hidden_url = re.sub( + 'http://(.*):.*@', + 'http://\\1:xxxxx@', + url.geturl()) + +print """ +========== +ATTENTION! +========== + +This script will modify Soledad's shared and user databases in: + + %s + +This script does not make a backup of the couch db data, so make sure you +have a copy or you may loose data. +""" % hidden_url +confirm = raw_input("Proceed (type uppercase YES)? ") + +if confirm != "YES": + exit(1) + +# convert design doc content + +design_docs = { + '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)), + '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)), + '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)), +} + +# +# Thread +# + +class DBWorkerThread(threading.Thread): + + def __init__(self, server, dbname, db_idx, db_len, release_fun): + threading.Thread.__init__(self) + self._dbname = dbname + self._cdb = server[self._dbname] + self._db_idx = db_idx + self._db_len = db_len + self._release_fun = release_fun + + def run(self): + + logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len, + self._dbname)) + + for doc_id in design_docs: + doc = self._cdb[doc_id] + for key in ['lists', 'views', 'updates']: + if key in design_docs[doc_id]: + doc[key] = design_docs[doc_id][key] + self._cdb.save(doc) + + # release the semaphore + self._release_fun() + + +db_idx = 0 +db_len = len(server) +for dbname in server: + + db_idx += 1 + + if not (dbname.startswith('user-') or dbname == 'shared') \ + or dbname == 'user-test-db': + logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) + continue + + + # get access to couch db + cdb = Server(url.geturl())[dbname] + + #--------------------------------------------------------------------- + # Start DB worker thread + #--------------------------------------------------------------------- + semaphore_pool.acquire() + thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release) + thread.daemon = True + thread.start() + threads.append(thread) + +map(lambda thread: thread.join(), threads) diff --git a/scripts/doc_put_memory_usage/find_max_upload_size.py b/scripts/doc_put_memory_usage/find_max_upload_size.py deleted file mode 100755 index 02c68015..00000000 --- a/scripts/doc_put_memory_usage/find_max_upload_size.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/python - -# This script finds the maximum upload size for a document in the current -# server. It pulls couch URL from Soledad config file and attempts multiple -# PUTs until it finds the maximum size supported by the server. -# -# As the Soledad couch user is not an admin, you have to pass a database into -# which the test will be run. The database should already exist and be -# initialized with soledad design documents. -# -# Use it like this: -# -# ./find_max_upload_size.py -# ./find_max_upload_size.py -h - -import os -import configparser -import logging -import argparse -import random -import string -import binascii -import json -import time -import uuid - - -from couchdb.client import Database -from socket import error as socket_error -from leap.soledad.common.couch import CouchDatabase - - -SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' -PREFIX = '/tmp/soledad_test' -LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' -RETRIES = 3 # number of times to retry uploading a document of a certain - # size after a failure - - -# configure logger -logger = logging.getLogger(__name__) - - -def config_log(level): - logging.basicConfig(format=LOG_FORMAT, level=level) - - -def log_to_file(filename): - handler = logging.FileHandler(filename, mode='a') - handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) - logger.addHandler(handler) - - -# create test dir -if not os.path.exists(PREFIX): - os.mkdir(PREFIX) - - -def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): - config = configparser.ConfigParser() - config.read(config_file) - return config['soledad-server']['couch_url'] - - -# generate or load an uploadable doc with the given size in mb -def get_content(size): - fname = os.path.join(PREFIX, 'content-%d.json' % size) - if os.path.exists(fname): - logger.debug('Loading content with %d MB...' % size) - with open(fname, 'r') as f: - return f.read() - else: - length = int(size * 1024 ** 2) - logger.debug('Generating body with %d MB...' % size) - content = binascii.hexlify(os.urandom(length))[:length] - with open(fname, 'w') as f: - f.write(content) - return content - - -def delete_doc(db): - doc = db.get('largedoc') - db.delete(doc) - - -def upload(db, size, couch_db): - # try many times to be sure that size is infeasible - for i in range(RETRIES): - # wait until server is up to upload - while True: - try: - 'largedoc' in couch_db - break - except socket_error: - logger.debug('Waiting for server to come up...') - time.sleep(1) - # attempt to upload - try: - logger.debug( - 'Trying to upload %d MB document (attempt %d/%d)...' % - (size, (i+1), RETRIES)) - content = get_content(size) - logger.debug('Starting upload of %d bytes.' % len(content)) - doc = db.create_doc({'data': content}, doc_id='largedoc') - delete_doc(couch_db) - logger.debug('Success uploading %d MB doc.' % size) - return True - except Exception as e: - logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) - return False - - -def find_max_upload_size(db_uri): - db = CouchDatabase.open_database(db_uri, False) - couch_db = Database(db_uri) - logger.debug('Database URI: %s' % db_uri) - # delete eventual leftover from last run - if 'largedoc' in couch_db: - delete_doc(couch_db) - # phase 1: increase upload size exponentially - logger.info('Starting phase 1: increasing size exponentially.') - size = 1 - #import ipdb; ipdb.set_trace() - while True: - if upload(db, size, couch_db): - size *= 2 - else: - break - - # phase 2: binary search for maximum value - unable = size - able = size / 2 - logger.info('Starting phase 2: binary search for maximum value.') - while unable - able > 1: - size = able + ((unable - able) / 2) - if upload(db, size, couch_db): - able = size - else: - unable = size - return able - - -if __name__ == '__main__': - # parse command line - parser = argparse.ArgumentParser() - parser.add_argument( - '-d', action='store_true', dest='debug', - help='print debugging information') - parser.add_argument( - '-l', dest='logfile', - help='log output to file') - parser.add_argument( - 'db_uri', help='the couch database URI to test') - args = parser.parse_args() - - # log to file - if args.logfile is not None: - log_to_file(args.logfile) - - # set loglevel - if args.debug is True: - config_log(logging.DEBUG) - else: - config_log(logging.INFO) - - # run test and report - logger.info('Will test using db at %s.' % args.db_uri) - maxsize = find_max_upload_size(args.db_uri) - logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/doc_put_memory_usage/get-mem.py b/scripts/doc_put_memory_usage/get-mem.py deleted file mode 100755 index d64875fc..00000000 --- a/scripts/doc_put_memory_usage/get-mem.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/python - - -import psutil -import time - - -delta = 50 * 60 -start = time.time() - -while True: - now = time.time() - print "%s %s" % (now - start, psutil.phymem_usage().used) - time.sleep(0.1) - if now > start + delta: - break diff --git a/scripts/doc_put_memory_usage/plot-mem.py b/scripts/doc_put_memory_usage/plot-mem.py deleted file mode 100755 index e24679a2..00000000 --- a/scripts/doc_put_memory_usage/plot-mem.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/python - - -from matplotlib import pyplot as plt - - -files = [ - ('local', 'couchdb-json', 'b'), - ('local', 'bigcouch-json', 'r'), - ('local', 'couchdb-multipart', 'g'), - ('local', 'bigcouch-multipart', 'm'), -] - - -# config the plot -plt.xlabel('time') -plt.ylabel('memory usage') -plt.title('bigcouch versus couch memory usage') - - -for fi in files: - - machine = fi[0] - database = fi[1] - color = fi[2] - filename = '%s-%s.txt' % (machine, database) - - x = [] - y = [] - - xmax = None - xmin = None - ymax = None - ymin = None - - # read data from file - with open(filename, 'r') as f: - line = f.readline() - while line is not None: - time, mem = tuple(line.strip().split(' ')) - mem = float(mem) / (10**9) - x.append(float(time)) - y.append(mem) - if ymax == None or mem > ymax: - ymax = mem - xmax = time - if ymin == None or mem < ymin: - ymin = mem - xmin = time - line = f.readline() - if line == '': - break - - kwargs = { - 'linewidth': 1.0, - 'linestyle': '-', - # 'marker': '.', - 'color': color, - } - plt.plot(x, y, label=database, **kwargs) - - #plt.axes().get_xaxis().set_ticks(x) - #plt.axes().get_xaxis().set_ticklabels(x) - - # annotate max and min values - #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) - #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) - - -plt.grid() -plt.legend() -plt.show() - diff --git a/scripts/profiling/backends_cpu_usage/log_cpu_usage.py b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py new file mode 100755 index 00000000..2674e1ff --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + + +# Get the CPU usage and print to file. + + +import psutil +import time +import argparse +import os +import threading + + +class LogCpuUsage(threading.Thread): + + def __init__(self, fname): + threading.Thread.__init__(self) + self._stopped = True + self._fname = fname + + def run(self): + self._stopped = False + with open(self._fname, 'w') as f: + start = time.time() + while self._stopped is False: + now = time.time() + f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) + time.sleep(0.01) + + def stop(self): + self._stopped = True + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('file', help='where to save output') + args = parser.parse_args() + + if os.path.isfile(args.file): + replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) + if replace.lower() != 'y': + print 'Bailing out.' + exit(1) + + log_cpu = LogCpuUsage(args.file) + log_cpu.run() diff --git a/scripts/profiling/backends_cpu_usage/movingaverage.py b/scripts/profiling/backends_cpu_usage/movingaverage.py new file mode 100644 index 00000000..bac1b3e1 --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/movingaverage.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +# Sean Reifschneider, tummy.com, ltd. +# Released into the Public Domain, 2011-02-06 + +import itertools +from itertools import islice +from collections import deque + + +######################################################### +def movingaverage(data, subset_size, data_is_list = None, + avoid_fp_drift = True): + '''Return the moving averages of the data, with a window size of + `subset_size`. `subset_size` must be an integer greater than 0 and + less than the length of the input data, or a ValueError will be raised. + + `data_is_list` can be used to tune the algorithm for list or iteratable + as an input. The default value, `None` will auto-detect this. + The algorithm used if `data` is a list is almost twice as fast as if + it is an iteratable. + + `avoid_fp_drift`, if True (the default) sums every sub-set rather than + keeping a "rolling sum" (which may be subject to floating-point drift). + While more correct, it is also dramatically slower for subset sizes + much larger than 20. + + NOTE: You really should consider setting `avoid_fp_drift = False` unless + you are dealing with very small numbers (say, far smaller than 0.00001) + or require extreme accuracy at the cost of execution time. For + `subset_size` < 20, the performance difference is very small. + ''' + if subset_size < 1: + raise ValueError('subset_size must be 1 or larger') + + if data_is_list is None: + data_is_list = hasattr(data, '__getslice__') + + divisor = float(subset_size) + if data_is_list: + # This only works if we can re-access old elements, but is much faster. + # In other words, it can't be just an iterable, it needs to be a list. + + if subset_size > len(data): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + for x in range(subset_size, len(data) + 1): + yield sum(data[x - subset_size:x]) / divisor + else: + cur = sum(data[0:subset_size]) + yield cur / divisor + for x in range(subset_size, len(data)): + cur += data[x] - data[x - subset_size] + yield cur / divisor + else: + # Based on the recipe at: + # http://docs.python.org/library/collections.html#deque-recipes + it = iter(data) + d = deque(islice(it, subset_size)) + + if subset_size > len(d): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + yield sum(d) / divisor + for elem in it: + d.popleft() + d.append(elem) + yield sum(d) / divisor + else: + s = sum(d) + yield s / divisor + for elem in it: + s += elem - d.popleft() + d.append(elem) + yield s / divisor + + +########################## +if __name__ == '__main__': + import unittest + + class TestMovingAverage(unittest.TestCase): + #################### + def test_List(self): + try: + list(movingaverage([1,2,3], 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True)), [40.0,42.0,45.0,43.0]) + + + ###################### + def test_XRange(self): + try: + list(movingaverage(xrange(1, 4), 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) + + + ########################### + def test_ListRolling(self): + try: + list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, + avoid_fp_drift = False)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, + avoid_fp_drift = False)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + + + ############################# + def test_XRangeRolling(self): + try: + list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6, + avoid_fp_drift = False)), [3.5]) + + + ###################################################################### + suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) + unittest.TextTestRunner(verbosity = 2).run(suite) + diff --git a/scripts/profiling/backends_cpu_usage/plot.py b/scripts/profiling/backends_cpu_usage/plot.py new file mode 100755 index 00000000..4e5083ad --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/plot.py @@ -0,0 +1,81 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt +from movingaverage import movingaverage + + +def smooth(l): + return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) + + +files = [ + ('sqlite', 'b'), + ('sqlcipher', 'r'), + ('u1dblite', 'g'), + ('u1dbcipher', 'm'), +] + + +# config the plot +plt.xlabel('time (s)') +plt.ylabel('cpu usage (%)') +plt.title('u1db backends CPU usage') + + +for fi in files: + + backend = fi[0] + color = fi[1] + filename = '%s.txt' % backend + + x = [] + y = [] + + xmax = None + xmin = None + ymax = None + ymin = None + + # read data from file + with open(filename, 'r') as f: + line = f.readline() + while line is not None: + time, cpu = tuple(line.strip().split(' ')) + cpu = float(cpu) + x.append(float(time)) + y.append(cpu) + if ymax == None or cpu > ymax: + ymax = cpu + xmax = time + if ymin == None or cpu < ymin: + ymin = cpu + xmin = time + line = f.readline() + if line == '': + break + + kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', + # 'marker': '.', + 'color': color, + } + plt.plot( + [n for n in smooth(x)], + [n for n in smooth(y)], + label=backend, **kwargs) + + #plt.axes().get_xaxis().set_ticks(x) + #plt.axes().get_xaxis().set_ticklabels(x) + + # annotate max and min values + #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) + #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.ylim(0, 100) +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/profiling/backends_cpu_usage/test_u1db_sync.py b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py new file mode 100755 index 00000000..26ef8f9f --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py @@ -0,0 +1,113 @@ +#!/usr/bin/python + + +import u1db +import tempfile +import logging +import shutil +import os +import argparse +import time +import binascii +import random + + +from leap.soledad.client.sqlcipher import open as sqlcipher_open +from log_cpu_usage import LogCpuUsage +from u1dblite import open as u1dblite_open +from u1dbcipher import open as u1dbcipher_open + + +DOCS_TO_SYNC = 1000 +SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB +BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB + + +def get_data(size): + return binascii.hexlify(os.urandom(size/2)) + + +def run_test(testname, open_fun, tempdir, docs, *args): + logger.info('Starting test \"%s\".' % testname) + + # instantiate dbs + db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) + db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) + + # get sync target and synchsonizer + target = db2.get_sync_target() + synchronizer = u1db.sync.Synchronizer(db1, target) + + + # generate lots of small documents + logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) + for content in docs: + db1.create_doc(content) + logger.info('%d documents created in source db.' % DOCS_TO_SYNC) + + # run the test + filename = testname + '.txt' + logger.info('Logging CPU usage to %s.' % filename) + log_cpu = LogCpuUsage(filename) + tstart = time.time() + + # start logging cpu + log_cpu.start() + logger.info('Sleeping for 5 seconds...') + time.sleep(5) + + # sync + logger.info('Starting sync...') + sstart = time.time() + synchronizer.sync() + send = time.time() + logger.info('Sync finished.') + + # stop logging cpu + logger.info('Sleeping for 5 seconds...') + time.sleep(5) + tend = time.time() + log_cpu.stop() + + # report + logger.info('Total sync time: %f seconds' % (send - sstart)) + logger.info('Total test time: %f seconds' % (tend - tstart)) + logger.info('Finished test \"%s\".' % testname) + + # close dbs + db1.close() + db2.close() + + +if __name__ == '__main__': + + # configure logger + logger = logging.getLogger(__name__) + LOG_FORMAT = '%(asctime)s %(message)s' + logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + + # get a temporary dir + tempdir = tempfile.mkdtemp() + logger.info('Using temporary directory %s' % tempdir) + + + # create a lot of documents with random sizes + docs = [] + for i in xrange(DOCS_TO_SYNC): + docs.append({ + 'index': i, + #'data': get_data( + # random.randrange( + # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) + }) + + # run tests + run_test('sqlite', u1db.open, tempdir, docs, True) + run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) + run_test('u1dblite', u1dblite_open, tempdir, docs) + run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) + + # remove temporary dir + logger.info('Removing temporary directory %s' % tempdir) + shutil.rmtree(tempdir) diff --git a/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py b/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py new file mode 100755 index 00000000..02c68015 --- /dev/null +++ b/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py @@ -0,0 +1,169 @@ +#!/usr/bin/python + +# This script finds the maximum upload size for a document in the current +# server. It pulls couch URL from Soledad config file and attempts multiple +# PUTs until it finds the maximum size supported by the server. +# +# As the Soledad couch user is not an admin, you have to pass a database into +# which the test will be run. The database should already exist and be +# initialized with soledad design documents. +# +# Use it like this: +# +# ./find_max_upload_size.py +# ./find_max_upload_size.py -h + +import os +import configparser +import logging +import argparse +import random +import string +import binascii +import json +import time +import uuid + + +from couchdb.client import Database +from socket import error as socket_error +from leap.soledad.common.couch import CouchDatabase + + +SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' +PREFIX = '/tmp/soledad_test' +LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' +RETRIES = 3 # number of times to retry uploading a document of a certain + # size after a failure + + +# configure logger +logger = logging.getLogger(__name__) + + +def config_log(level): + logging.basicConfig(format=LOG_FORMAT, level=level) + + +def log_to_file(filename): + handler = logging.FileHandler(filename, mode='a') + handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) + logger.addHandler(handler) + + +# create test dir +if not os.path.exists(PREFIX): + os.mkdir(PREFIX) + + +def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): + config = configparser.ConfigParser() + config.read(config_file) + return config['soledad-server']['couch_url'] + + +# generate or load an uploadable doc with the given size in mb +def get_content(size): + fname = os.path.join(PREFIX, 'content-%d.json' % size) + if os.path.exists(fname): + logger.debug('Loading content with %d MB...' % size) + with open(fname, 'r') as f: + return f.read() + else: + length = int(size * 1024 ** 2) + logger.debug('Generating body with %d MB...' % size) + content = binascii.hexlify(os.urandom(length))[:length] + with open(fname, 'w') as f: + f.write(content) + return content + + +def delete_doc(db): + doc = db.get('largedoc') + db.delete(doc) + + +def upload(db, size, couch_db): + # try many times to be sure that size is infeasible + for i in range(RETRIES): + # wait until server is up to upload + while True: + try: + 'largedoc' in couch_db + break + except socket_error: + logger.debug('Waiting for server to come up...') + time.sleep(1) + # attempt to upload + try: + logger.debug( + 'Trying to upload %d MB document (attempt %d/%d)...' % + (size, (i+1), RETRIES)) + content = get_content(size) + logger.debug('Starting upload of %d bytes.' % len(content)) + doc = db.create_doc({'data': content}, doc_id='largedoc') + delete_doc(couch_db) + logger.debug('Success uploading %d MB doc.' % size) + return True + except Exception as e: + logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) + return False + + +def find_max_upload_size(db_uri): + db = CouchDatabase.open_database(db_uri, False) + couch_db = Database(db_uri) + logger.debug('Database URI: %s' % db_uri) + # delete eventual leftover from last run + if 'largedoc' in couch_db: + delete_doc(couch_db) + # phase 1: increase upload size exponentially + logger.info('Starting phase 1: increasing size exponentially.') + size = 1 + #import ipdb; ipdb.set_trace() + while True: + if upload(db, size, couch_db): + size *= 2 + else: + break + + # phase 2: binary search for maximum value + unable = size + able = size / 2 + logger.info('Starting phase 2: binary search for maximum value.') + while unable - able > 1: + size = able + ((unable - able) / 2) + if upload(db, size, couch_db): + able = size + else: + unable = size + return able + + +if __name__ == '__main__': + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + '-d', action='store_true', dest='debug', + help='print debugging information') + parser.add_argument( + '-l', dest='logfile', + help='log output to file') + parser.add_argument( + 'db_uri', help='the couch database URI to test') + args = parser.parse_args() + + # log to file + if args.logfile is not None: + log_to_file(args.logfile) + + # set loglevel + if args.debug is True: + config_log(logging.DEBUG) + else: + config_log(logging.INFO) + + # run test and report + logger.info('Will test using db at %s.' % args.db_uri) + maxsize = find_max_upload_size(args.db_uri) + logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/profiling/doc_put_memory_usage/get-mem.py b/scripts/profiling/doc_put_memory_usage/get-mem.py new file mode 100755 index 00000000..d64875fc --- /dev/null +++ b/scripts/profiling/doc_put_memory_usage/get-mem.py @@ -0,0 +1,16 @@ +#!/usr/bin/python + + +import psutil +import time + + +delta = 50 * 60 +start = time.time() + +while True: + now = time.time() + print "%s %s" % (now - start, psutil.phymem_usage().used) + time.sleep(0.1) + if now > start + delta: + break diff --git a/scripts/profiling/doc_put_memory_usage/plot-mem.py b/scripts/profiling/doc_put_memory_usage/plot-mem.py new file mode 100755 index 00000000..e24679a2 --- /dev/null +++ b/scripts/profiling/doc_put_memory_usage/plot-mem.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt + + +files = [ + ('local', 'couchdb-json', 'b'), + ('local', 'bigcouch-json', 'r'), + ('local', 'couchdb-multipart', 'g'), + ('local', 'bigcouch-multipart', 'm'), +] + + +# config the plot +plt.xlabel('time') +plt.ylabel('memory usage') +plt.title('bigcouch versus couch memory usage') + + +for fi in files: + + machine = fi[0] + database = fi[1] + color = fi[2] + filename = '%s-%s.txt' % (machine, database) + + x = [] + y = [] + + xmax = None + xmin = None + ymax = None + ymin = None + + # read data from file + with open(filename, 'r') as f: + line = f.readline() + while line is not None: + time, mem = tuple(line.strip().split(' ')) + mem = float(mem) / (10**9) + x.append(float(time)) + y.append(mem) + if ymax == None or mem > ymax: + ymax = mem + xmax = time + if ymin == None or mem < ymin: + ymin = mem + xmin = time + line = f.readline() + if line == '': + break + + kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', + # 'marker': '.', + 'color': color, + } + plt.plot(x, y, label=database, **kwargs) + + #plt.axes().get_xaxis().set_ticks(x) + #plt.axes().get_xaxis().set_ticklabels(x) + + # annotate max and min values + #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) + #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/update_design_docs.py b/scripts/update_design_docs.py deleted file mode 100644 index e7b5a29c..00000000 --- a/scripts/update_design_docs.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/python - -# This script updates Soledad's design documents in the session database and -# all user databases with contents from the installed leap.soledad.common -# package. - -import json -import logging -import argparse -import re -import threading -import binascii - - -from getpass import getpass -from ConfigParser import ConfigParser -from couchdb.client import Server -from couchdb.http import Resource, Session -from datetime import datetime -from urlparse import urlparse - - -from leap.soledad.common import ddocs - - -# parse command line for the log file name -logger_fname = "/tmp/update-design-docs_%s.log" % \ - str(datetime.now()).replace(' ', '_') -parser = argparse.ArgumentParser() -parser.add_argument('--log', action='store', default=logger_fname, type=str, - required=False, help='the name of the log file', nargs=1) -args = parser.parse_args() - - -# configure the logger -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -print "Logging to %s." % args.log -logging.basicConfig( - filename=args.log, - format="%(asctime)-15s %(message)s") - - -# configure threads -max_threads = 20 -semaphore_pool = threading.BoundedSemaphore(value=max_threads) -threads = [] - -# get couch url -cp = ConfigParser() -cp.read('/etc/leap/soledad-server.conf') -url = urlparse(cp.get('soledad-server', 'couch_url')) - -# get admin password -netloc = re.sub('^.*@', '', url.netloc) -url = url._replace(netloc=netloc) -password = getpass("Admin password for %s: " % url.geturl()) -url = url._replace(netloc='admin:%s@%s' % (password, netloc)) - -resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10)) -server = Server(url=resource) - -hidden_url = re.sub( - 'http://(.*):.*@', - 'http://\\1:xxxxx@', - url.geturl()) - -print """ -========== -ATTENTION! -========== - -This script will modify Soledad's shared and user databases in: - - %s - -This script does not make a backup of the couch db data, so make sure you -have a copy or you may loose data. -""" % hidden_url -confirm = raw_input("Proceed (type uppercase YES)? ") - -if confirm != "YES": - exit(1) - -# convert design doc content - -design_docs = { - '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)), - '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)), - '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)), -} - -# -# Thread -# - -class DBWorkerThread(threading.Thread): - - def __init__(self, server, dbname, db_idx, db_len, release_fun): - threading.Thread.__init__(self) - self._dbname = dbname - self._cdb = server[self._dbname] - self._db_idx = db_idx - self._db_len = db_len - self._release_fun = release_fun - - def run(self): - - logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len, - self._dbname)) - - for doc_id in design_docs: - doc = self._cdb[doc_id] - for key in ['lists', 'views', 'updates']: - if key in design_docs[doc_id]: - doc[key] = design_docs[doc_id][key] - self._cdb.save(doc) - - # release the semaphore - self._release_fun() - - -db_idx = 0 -db_len = len(server) -for dbname in server: - - db_idx += 1 - - if not (dbname.startswith('user-') or dbname == 'shared') \ - or dbname == 'user-test-db': - logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) - continue - - - # get access to couch db - cdb = Server(url.geturl())[dbname] - - #--------------------------------------------------------------------- - # Start DB worker thread - #--------------------------------------------------------------------- - semaphore_pool.acquire() - thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release) - thread.daemon = True - thread.start() - threads.append(thread) - -map(lambda thread: thread.join(), threads) -- cgit v1.2.3