From 573909b10d77ef3d889d33cfaeb3fdadd0135daf Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 7 Apr 2014 12:40:45 -0300 Subject: Reorganize scripts directory. --- scripts/backends_cpu_usage/log_cpu_usage.py | 46 ----- scripts/backends_cpu_usage/movingaverage.py | 209 --------------------- scripts/backends_cpu_usage/plot.py | 81 -------- scripts/backends_cpu_usage/test_u1db_sync.py | 113 ----------- scripts/ddocs/update_design_docs.py | 147 +++++++++++++++ .../doc_put_memory_usage/find_max_upload_size.py | 169 ----------------- scripts/doc_put_memory_usage/get-mem.py | 16 -- scripts/doc_put_memory_usage/plot-mem.py | 73 ------- .../profiling/backends_cpu_usage/log_cpu_usage.py | 46 +++++ .../profiling/backends_cpu_usage/movingaverage.py | 209 +++++++++++++++++++++ scripts/profiling/backends_cpu_usage/plot.py | 81 ++++++++ .../profiling/backends_cpu_usage/test_u1db_sync.py | 113 +++++++++++ .../doc_put_memory_usage/find_max_upload_size.py | 169 +++++++++++++++++ scripts/profiling/doc_put_memory_usage/get-mem.py | 16 ++ scripts/profiling/doc_put_memory_usage/plot-mem.py | 73 +++++++ scripts/update_design_docs.py | 147 --------------- 16 files changed, 854 insertions(+), 854 deletions(-) delete mode 100755 scripts/backends_cpu_usage/log_cpu_usage.py delete mode 100644 scripts/backends_cpu_usage/movingaverage.py delete mode 100755 scripts/backends_cpu_usage/plot.py delete mode 100755 scripts/backends_cpu_usage/test_u1db_sync.py create mode 100644 scripts/ddocs/update_design_docs.py delete mode 100755 scripts/doc_put_memory_usage/find_max_upload_size.py delete mode 100755 scripts/doc_put_memory_usage/get-mem.py delete mode 100755 scripts/doc_put_memory_usage/plot-mem.py create mode 100755 scripts/profiling/backends_cpu_usage/log_cpu_usage.py create mode 100644 scripts/profiling/backends_cpu_usage/movingaverage.py create mode 100755 scripts/profiling/backends_cpu_usage/plot.py create mode 100755 scripts/profiling/backends_cpu_usage/test_u1db_sync.py create mode 100755 scripts/profiling/doc_put_memory_usage/find_max_upload_size.py create mode 100755 scripts/profiling/doc_put_memory_usage/get-mem.py create mode 100755 scripts/profiling/doc_put_memory_usage/plot-mem.py delete mode 100644 scripts/update_design_docs.py diff --git a/scripts/backends_cpu_usage/log_cpu_usage.py b/scripts/backends_cpu_usage/log_cpu_usage.py deleted file mode 100755 index 2674e1ff..00000000 --- a/scripts/backends_cpu_usage/log_cpu_usage.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/python - - -# Get the CPU usage and print to file. - - -import psutil -import time -import argparse -import os -import threading - - -class LogCpuUsage(threading.Thread): - - def __init__(self, fname): - threading.Thread.__init__(self) - self._stopped = True - self._fname = fname - - def run(self): - self._stopped = False - with open(self._fname, 'w') as f: - start = time.time() - while self._stopped is False: - now = time.time() - f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) - time.sleep(0.01) - - def stop(self): - self._stopped = True - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('file', help='where to save output') - args = parser.parse_args() - - if os.path.isfile(args.file): - replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) - if replace.lower() != 'y': - print 'Bailing out.' - exit(1) - - log_cpu = LogCpuUsage(args.file) - log_cpu.run() diff --git a/scripts/backends_cpu_usage/movingaverage.py b/scripts/backends_cpu_usage/movingaverage.py deleted file mode 100644 index bac1b3e1..00000000 --- a/scripts/backends_cpu_usage/movingaverage.py +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env python -# -# Sean Reifschneider, tummy.com, ltd. -# Released into the Public Domain, 2011-02-06 - -import itertools -from itertools import islice -from collections import deque - - -######################################################### -def movingaverage(data, subset_size, data_is_list = None, - avoid_fp_drift = True): - '''Return the moving averages of the data, with a window size of - `subset_size`. `subset_size` must be an integer greater than 0 and - less than the length of the input data, or a ValueError will be raised. - - `data_is_list` can be used to tune the algorithm for list or iteratable - as an input. The default value, `None` will auto-detect this. - The algorithm used if `data` is a list is almost twice as fast as if - it is an iteratable. - - `avoid_fp_drift`, if True (the default) sums every sub-set rather than - keeping a "rolling sum" (which may be subject to floating-point drift). - While more correct, it is also dramatically slower for subset sizes - much larger than 20. - - NOTE: You really should consider setting `avoid_fp_drift = False` unless - you are dealing with very small numbers (say, far smaller than 0.00001) - or require extreme accuracy at the cost of execution time. For - `subset_size` < 20, the performance difference is very small. - ''' - if subset_size < 1: - raise ValueError('subset_size must be 1 or larger') - - if data_is_list is None: - data_is_list = hasattr(data, '__getslice__') - - divisor = float(subset_size) - if data_is_list: - # This only works if we can re-access old elements, but is much faster. - # In other words, it can't be just an iterable, it needs to be a list. - - if subset_size > len(data): - raise ValueError('subset_size must be smaller than data set size') - - if avoid_fp_drift: - for x in range(subset_size, len(data) + 1): - yield sum(data[x - subset_size:x]) / divisor - else: - cur = sum(data[0:subset_size]) - yield cur / divisor - for x in range(subset_size, len(data)): - cur += data[x] - data[x - subset_size] - yield cur / divisor - else: - # Based on the recipe at: - # http://docs.python.org/library/collections.html#deque-recipes - it = iter(data) - d = deque(islice(it, subset_size)) - - if subset_size > len(d): - raise ValueError('subset_size must be smaller than data set size') - - if avoid_fp_drift: - yield sum(d) / divisor - for elem in it: - d.popleft() - d.append(elem) - yield sum(d) / divisor - else: - s = sum(d) - yield s / divisor - for elem in it: - s += elem - d.popleft() - d.append(elem) - yield s / divisor - - -########################## -if __name__ == '__main__': - import unittest - - class TestMovingAverage(unittest.TestCase): - #################### - def test_List(self): - try: - list(movingaverage([1,2,3], 0)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage([1,2,3,4,5,6], 7)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) - - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, False)), [40.0,42.0,45.0,43.0]) - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, True)), [40.0,42.0,45.0,43.0]) - - - ###################### - def test_XRange(self): - try: - list(movingaverage(xrange(1, 4), 0)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage(xrange(1, 7), 7)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage(xrange(1, 7), 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), - 2)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) - self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) - - - ########################### - def test_ListRolling(self): - try: - list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, - avoid_fp_drift = False)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, - avoid_fp_drift = False)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, - avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, - avoid_fp_drift = False)), [2,3,4,5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, - avoid_fp_drift = False)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, - avoid_fp_drift = False)), [3,4]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, - avoid_fp_drift = False)), [3.5]) - - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) - - - ############################# - def test_XRangeRolling(self): - try: - list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage(xrange(1, 7), 1, - avoid_fp_drift = False)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage(xrange(1, 7), 2, - avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), - 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 3, - avoid_fp_drift = False)), [2,3,4,5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 4, - avoid_fp_drift = False)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 5, - avoid_fp_drift = False)), [3,4]) - self.assertEqual(list(movingaverage(xrange(1, 7), 6, - avoid_fp_drift = False)), [3.5]) - - - ###################################################################### - suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) - unittest.TextTestRunner(verbosity = 2).run(suite) - diff --git a/scripts/backends_cpu_usage/plot.py b/scripts/backends_cpu_usage/plot.py deleted file mode 100755 index 4e5083ad..00000000 --- a/scripts/backends_cpu_usage/plot.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/python - - -from matplotlib import pyplot as plt -from movingaverage import movingaverage - - -def smooth(l): - return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) - - -files = [ - ('sqlite', 'b'), - ('sqlcipher', 'r'), - ('u1dblite', 'g'), - ('u1dbcipher', 'm'), -] - - -# config the plot -plt.xlabel('time (s)') -plt.ylabel('cpu usage (%)') -plt.title('u1db backends CPU usage') - - -for fi in files: - - backend = fi[0] - color = fi[1] - filename = '%s.txt' % backend - - x = [] - y = [] - - xmax = None - xmin = None - ymax = None - ymin = None - - # read data from file - with open(filename, 'r') as f: - line = f.readline() - while line is not None: - time, cpu = tuple(line.strip().split(' ')) - cpu = float(cpu) - x.append(float(time)) - y.append(cpu) - if ymax == None or cpu > ymax: - ymax = cpu - xmax = time - if ymin == None or cpu < ymin: - ymin = cpu - xmin = time - line = f.readline() - if line == '': - break - - kwargs = { - 'linewidth': 1.0, - 'linestyle': '-', - # 'marker': '.', - 'color': color, - } - plt.plot( - [n for n in smooth(x)], - [n for n in smooth(y)], - label=backend, **kwargs) - - #plt.axes().get_xaxis().set_ticks(x) - #plt.axes().get_xaxis().set_ticklabels(x) - - # annotate max and min values - #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) - #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) - - -plt.ylim(0, 100) -plt.grid() -plt.legend() -plt.show() - diff --git a/scripts/backends_cpu_usage/test_u1db_sync.py b/scripts/backends_cpu_usage/test_u1db_sync.py deleted file mode 100755 index 26ef8f9f..00000000 --- a/scripts/backends_cpu_usage/test_u1db_sync.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/python - - -import u1db -import tempfile -import logging -import shutil -import os -import argparse -import time -import binascii -import random - - -from leap.soledad.client.sqlcipher import open as sqlcipher_open -from log_cpu_usage import LogCpuUsage -from u1dblite import open as u1dblite_open -from u1dbcipher import open as u1dbcipher_open - - -DOCS_TO_SYNC = 1000 -SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB -BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB - - -def get_data(size): - return binascii.hexlify(os.urandom(size/2)) - - -def run_test(testname, open_fun, tempdir, docs, *args): - logger.info('Starting test \"%s\".' % testname) - - # instantiate dbs - db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) - db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) - - # get sync target and synchsonizer - target = db2.get_sync_target() - synchronizer = u1db.sync.Synchronizer(db1, target) - - - # generate lots of small documents - logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) - for content in docs: - db1.create_doc(content) - logger.info('%d documents created in source db.' % DOCS_TO_SYNC) - - # run the test - filename = testname + '.txt' - logger.info('Logging CPU usage to %s.' % filename) - log_cpu = LogCpuUsage(filename) - tstart = time.time() - - # start logging cpu - log_cpu.start() - logger.info('Sleeping for 5 seconds...') - time.sleep(5) - - # sync - logger.info('Starting sync...') - sstart = time.time() - synchronizer.sync() - send = time.time() - logger.info('Sync finished.') - - # stop logging cpu - logger.info('Sleeping for 5 seconds...') - time.sleep(5) - tend = time.time() - log_cpu.stop() - - # report - logger.info('Total sync time: %f seconds' % (send - sstart)) - logger.info('Total test time: %f seconds' % (tend - tstart)) - logger.info('Finished test \"%s\".' % testname) - - # close dbs - db1.close() - db2.close() - - -if __name__ == '__main__': - - # configure logger - logger = logging.getLogger(__name__) - LOG_FORMAT = '%(asctime)s %(message)s' - logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) - - - # get a temporary dir - tempdir = tempfile.mkdtemp() - logger.info('Using temporary directory %s' % tempdir) - - - # create a lot of documents with random sizes - docs = [] - for i in xrange(DOCS_TO_SYNC): - docs.append({ - 'index': i, - #'data': get_data( - # random.randrange( - # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) - }) - - # run tests - run_test('sqlite', u1db.open, tempdir, docs, True) - run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) - run_test('u1dblite', u1dblite_open, tempdir, docs) - run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) - - # remove temporary dir - logger.info('Removing temporary directory %s' % tempdir) - shutil.rmtree(tempdir) diff --git a/scripts/ddocs/update_design_docs.py b/scripts/ddocs/update_design_docs.py new file mode 100644 index 00000000..e7b5a29c --- /dev/null +++ b/scripts/ddocs/update_design_docs.py @@ -0,0 +1,147 @@ +#!/usr/bin/python + +# This script updates Soledad's design documents in the session database and +# all user databases with contents from the installed leap.soledad.common +# package. + +import json +import logging +import argparse +import re +import threading +import binascii + + +from getpass import getpass +from ConfigParser import ConfigParser +from couchdb.client import Server +from couchdb.http import Resource, Session +from datetime import datetime +from urlparse import urlparse + + +from leap.soledad.common import ddocs + + +# parse command line for the log file name +logger_fname = "/tmp/update-design-docs_%s.log" % \ + str(datetime.now()).replace(' ', '_') +parser = argparse.ArgumentParser() +parser.add_argument('--log', action='store', default=logger_fname, type=str, + required=False, help='the name of the log file', nargs=1) +args = parser.parse_args() + + +# configure the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +print "Logging to %s." % args.log +logging.basicConfig( + filename=args.log, + format="%(asctime)-15s %(message)s") + + +# configure threads +max_threads = 20 +semaphore_pool = threading.BoundedSemaphore(value=max_threads) +threads = [] + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = urlparse(cp.get('soledad-server', 'couch_url')) + +# get admin password +netloc = re.sub('^.*@', '', url.netloc) +url = url._replace(netloc=netloc) +password = getpass("Admin password for %s: " % url.geturl()) +url = url._replace(netloc='admin:%s@%s' % (password, netloc)) + +resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10)) +server = Server(url=resource) + +hidden_url = re.sub( + 'http://(.*):.*@', + 'http://\\1:xxxxx@', + url.geturl()) + +print """ +========== +ATTENTION! +========== + +This script will modify Soledad's shared and user databases in: + + %s + +This script does not make a backup of the couch db data, so make sure you +have a copy or you may loose data. +""" % hidden_url +confirm = raw_input("Proceed (type uppercase YES)? ") + +if confirm != "YES": + exit(1) + +# convert design doc content + +design_docs = { + '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)), + '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)), + '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)), +} + +# +# Thread +# + +class DBWorkerThread(threading.Thread): + + def __init__(self, server, dbname, db_idx, db_len, release_fun): + threading.Thread.__init__(self) + self._dbname = dbname + self._cdb = server[self._dbname] + self._db_idx = db_idx + self._db_len = db_len + self._release_fun = release_fun + + def run(self): + + logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len, + self._dbname)) + + for doc_id in design_docs: + doc = self._cdb[doc_id] + for key in ['lists', 'views', 'updates']: + if key in design_docs[doc_id]: + doc[key] = design_docs[doc_id][key] + self._cdb.save(doc) + + # release the semaphore + self._release_fun() + + +db_idx = 0 +db_len = len(server) +for dbname in server: + + db_idx += 1 + + if not (dbname.startswith('user-') or dbname == 'shared') \ + or dbname == 'user-test-db': + logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) + continue + + + # get access to couch db + cdb = Server(url.geturl())[dbname] + + #--------------------------------------------------------------------- + # Start DB worker thread + #--------------------------------------------------------------------- + semaphore_pool.acquire() + thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release) + thread.daemon = True + thread.start() + threads.append(thread) + +map(lambda thread: thread.join(), threads) diff --git a/scripts/doc_put_memory_usage/find_max_upload_size.py b/scripts/doc_put_memory_usage/find_max_upload_size.py deleted file mode 100755 index 02c68015..00000000 --- a/scripts/doc_put_memory_usage/find_max_upload_size.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/python - -# This script finds the maximum upload size for a document in the current -# server. It pulls couch URL from Soledad config file and attempts multiple -# PUTs until it finds the maximum size supported by the server. -# -# As the Soledad couch user is not an admin, you have to pass a database into -# which the test will be run. The database should already exist and be -# initialized with soledad design documents. -# -# Use it like this: -# -# ./find_max_upload_size.py -# ./find_max_upload_size.py -h - -import os -import configparser -import logging -import argparse -import random -import string -import binascii -import json -import time -import uuid - - -from couchdb.client import Database -from socket import error as socket_error -from leap.soledad.common.couch import CouchDatabase - - -SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' -PREFIX = '/tmp/soledad_test' -LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' -RETRIES = 3 # number of times to retry uploading a document of a certain - # size after a failure - - -# configure logger -logger = logging.getLogger(__name__) - - -def config_log(level): - logging.basicConfig(format=LOG_FORMAT, level=level) - - -def log_to_file(filename): - handler = logging.FileHandler(filename, mode='a') - handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) - logger.addHandler(handler) - - -# create test dir -if not os.path.exists(PREFIX): - os.mkdir(PREFIX) - - -def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): - config = configparser.ConfigParser() - config.read(config_file) - return config['soledad-server']['couch_url'] - - -# generate or load an uploadable doc with the given size in mb -def get_content(size): - fname = os.path.join(PREFIX, 'content-%d.json' % size) - if os.path.exists(fname): - logger.debug('Loading content with %d MB...' % size) - with open(fname, 'r') as f: - return f.read() - else: - length = int(size * 1024 ** 2) - logger.debug('Generating body with %d MB...' % size) - content = binascii.hexlify(os.urandom(length))[:length] - with open(fname, 'w') as f: - f.write(content) - return content - - -def delete_doc(db): - doc = db.get('largedoc') - db.delete(doc) - - -def upload(db, size, couch_db): - # try many times to be sure that size is infeasible - for i in range(RETRIES): - # wait until server is up to upload - while True: - try: - 'largedoc' in couch_db - break - except socket_error: - logger.debug('Waiting for server to come up...') - time.sleep(1) - # attempt to upload - try: - logger.debug( - 'Trying to upload %d MB document (attempt %d/%d)...' % - (size, (i+1), RETRIES)) - content = get_content(size) - logger.debug('Starting upload of %d bytes.' % len(content)) - doc = db.create_doc({'data': content}, doc_id='largedoc') - delete_doc(couch_db) - logger.debug('Success uploading %d MB doc.' % size) - return True - except Exception as e: - logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) - return False - - -def find_max_upload_size(db_uri): - db = CouchDatabase.open_database(db_uri, False) - couch_db = Database(db_uri) - logger.debug('Database URI: %s' % db_uri) - # delete eventual leftover from last run - if 'largedoc' in couch_db: - delete_doc(couch_db) - # phase 1: increase upload size exponentially - logger.info('Starting phase 1: increasing size exponentially.') - size = 1 - #import ipdb; ipdb.set_trace() - while True: - if upload(db, size, couch_db): - size *= 2 - else: - break - - # phase 2: binary search for maximum value - unable = size - able = size / 2 - logger.info('Starting phase 2: binary search for maximum value.') - while unable - able > 1: - size = able + ((unable - able) / 2) - if upload(db, size, couch_db): - able = size - else: - unable = size - return able - - -if __name__ == '__main__': - # parse command line - parser = argparse.ArgumentParser() - parser.add_argument( - '-d', action='store_true', dest='debug', - help='print debugging information') - parser.add_argument( - '-l', dest='logfile', - help='log output to file') - parser.add_argument( - 'db_uri', help='the couch database URI to test') - args = parser.parse_args() - - # log to file - if args.logfile is not None: - log_to_file(args.logfile) - - # set loglevel - if args.debug is True: - config_log(logging.DEBUG) - else: - config_log(logging.INFO) - - # run test and report - logger.info('Will test using db at %s.' % args.db_uri) - maxsize = find_max_upload_size(args.db_uri) - logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/doc_put_memory_usage/get-mem.py b/scripts/doc_put_memory_usage/get-mem.py deleted file mode 100755 index d64875fc..00000000 --- a/scripts/doc_put_memory_usage/get-mem.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/python - - -import psutil -import time - - -delta = 50 * 60 -start = time.time() - -while True: - now = time.time() - print "%s %s" % (now - start, psutil.phymem_usage().used) - time.sleep(0.1) - if now > start + delta: - break diff --git a/scripts/doc_put_memory_usage/plot-mem.py b/scripts/doc_put_memory_usage/plot-mem.py deleted file mode 100755 index e24679a2..00000000 --- a/scripts/doc_put_memory_usage/plot-mem.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/python - - -from matplotlib import pyplot as plt - - -files = [ - ('local', 'couchdb-json', 'b'), - ('local', 'bigcouch-json', 'r'), - ('local', 'couchdb-multipart', 'g'), - ('local', 'bigcouch-multipart', 'm'), -] - - -# config the plot -plt.xlabel('time') -plt.ylabel('memory usage') -plt.title('bigcouch versus couch memory usage') - - -for fi in files: - - machine = fi[0] - database = fi[1] - color = fi[2] - filename = '%s-%s.txt' % (machine, database) - - x = [] - y = [] - - xmax = None - xmin = None - ymax = None - ymin = None - - # read data from file - with open(filename, 'r') as f: - line = f.readline() - while line is not None: - time, mem = tuple(line.strip().split(' ')) - mem = float(mem) / (10**9) - x.append(float(time)) - y.append(mem) - if ymax == None or mem > ymax: - ymax = mem - xmax = time - if ymin == None or mem < ymin: - ymin = mem - xmin = time - line = f.readline() - if line == '': - break - - kwargs = { - 'linewidth': 1.0, - 'linestyle': '-', - # 'marker': '.', - 'color': color, - } - plt.plot(x, y, label=database, **kwargs) - - #plt.axes().get_xaxis().set_ticks(x) - #plt.axes().get_xaxis().set_ticklabels(x) - - # annotate max and min values - #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) - #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) - - -plt.grid() -plt.legend() -plt.show() - diff --git a/scripts/profiling/backends_cpu_usage/log_cpu_usage.py b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py new file mode 100755 index 00000000..2674e1ff --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + + +# Get the CPU usage and print to file. + + +import psutil +import time +import argparse +import os +import threading + + +class LogCpuUsage(threading.Thread): + + def __init__(self, fname): + threading.Thread.__init__(self) + self._stopped = True + self._fname = fname + + def run(self): + self._stopped = False + with open(self._fname, 'w') as f: + start = time.time() + while self._stopped is False: + now = time.time() + f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) + time.sleep(0.01) + + def stop(self): + self._stopped = True + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('file', help='where to save output') + args = parser.parse_args() + + if os.path.isfile(args.file): + replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) + if replace.lower() != 'y': + print 'Bailing out.' + exit(1) + + log_cpu = LogCpuUsage(args.file) + log_cpu.run() diff --git a/scripts/profiling/backends_cpu_usage/movingaverage.py b/scripts/profiling/backends_cpu_usage/movingaverage.py new file mode 100644 index 00000000..bac1b3e1 --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/movingaverage.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +# Sean Reifschneider, tummy.com, ltd. +# Released into the Public Domain, 2011-02-06 + +import itertools +from itertools import islice +from collections import deque + + +######################################################### +def movingaverage(data, subset_size, data_is_list = None, + avoid_fp_drift = True): + '''Return the moving averages of the data, with a window size of + `subset_size`. `subset_size` must be an integer greater than 0 and + less than the length of the input data, or a ValueError will be raised. + + `data_is_list` can be used to tune the algorithm for list or iteratable + as an input. The default value, `None` will auto-detect this. + The algorithm used if `data` is a list is almost twice as fast as if + it is an iteratable. + + `avoid_fp_drift`, if True (the default) sums every sub-set rather than + keeping a "rolling sum" (which may be subject to floating-point drift). + While more correct, it is also dramatically slower for subset sizes + much larger than 20. + + NOTE: You really should consider setting `avoid_fp_drift = False` unless + you are dealing with very small numbers (say, far smaller than 0.00001) + or require extreme accuracy at the cost of execution time. For + `subset_size` < 20, the performance difference is very small. + ''' + if subset_size < 1: + raise ValueError('subset_size must be 1 or larger') + + if data_is_list is None: + data_is_list = hasattr(data, '__getslice__') + + divisor = float(subset_size) + if data_is_list: + # This only works if we can re-access old elements, but is much faster. + # In other words, it can't be just an iterable, it needs to be a list. + + if subset_size > len(data): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + for x in range(subset_size, len(data) + 1): + yield sum(data[x - subset_size:x]) / divisor + else: + cur = sum(data[0:subset_size]) + yield cur / divisor + for x in range(subset_size, len(data)): + cur += data[x] - data[x - subset_size] + yield cur / divisor + else: + # Based on the recipe at: + # http://docs.python.org/library/collections.html#deque-recipes + it = iter(data) + d = deque(islice(it, subset_size)) + + if subset_size > len(d): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + yield sum(d) / divisor + for elem in it: + d.popleft() + d.append(elem) + yield sum(d) / divisor + else: + s = sum(d) + yield s / divisor + for elem in it: + s += elem - d.popleft() + d.append(elem) + yield s / divisor + + +########################## +if __name__ == '__main__': + import unittest + + class TestMovingAverage(unittest.TestCase): + #################### + def test_List(self): + try: + list(movingaverage([1,2,3], 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True)), [40.0,42.0,45.0,43.0]) + + + ###################### + def test_XRange(self): + try: + list(movingaverage(xrange(1, 4), 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) + + + ########################### + def test_ListRolling(self): + try: + list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, + avoid_fp_drift = False)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, + avoid_fp_drift = False)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + + + ############################# + def test_XRangeRolling(self): + try: + list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6, + avoid_fp_drift = False)), [3.5]) + + + ###################################################################### + suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) + unittest.TextTestRunner(verbosity = 2).run(suite) + diff --git a/scripts/profiling/backends_cpu_usage/plot.py b/scripts/profiling/backends_cpu_usage/plot.py new file mode 100755 index 00000000..4e5083ad --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/plot.py @@ -0,0 +1,81 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt +from movingaverage import movingaverage + + +def smooth(l): + return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) + + +files = [ + ('sqlite', 'b'), + ('sqlcipher', 'r'), + ('u1dblite', 'g'), + ('u1dbcipher', 'm'), +] + + +# config the plot +plt.xlabel('time (s)') +plt.ylabel('cpu usage (%)') +plt.title('u1db backends CPU usage') + + +for fi in files: + + backend = fi[0] + color = fi[1] + filename = '%s.txt' % backend + + x = [] + y = [] + + xmax = None + xmin = None + ymax = None + ymin = None + + # read data from file + with open(filename, 'r') as f: + line = f.readline() + while line is not None: + time, cpu = tuple(line.strip().split(' ')) + cpu = float(cpu) + x.append(float(time)) + y.append(cpu) + if ymax == None or cpu > ymax: + ymax = cpu + xmax = time + if ymin == None or cpu < ymin: + ymin = cpu + xmin = time + line = f.readline() + if line == '': + break + + kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', + # 'marker': '.', + 'color': color, + } + plt.plot( + [n for n in smooth(x)], + [n for n in smooth(y)], + label=backend, **kwargs) + + #plt.axes().get_xaxis().set_ticks(x) + #plt.axes().get_xaxis().set_ticklabels(x) + + # annotate max and min values + #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) + #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.ylim(0, 100) +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/profiling/backends_cpu_usage/test_u1db_sync.py b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py new file mode 100755 index 00000000..26ef8f9f --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py @@ -0,0 +1,113 @@ +#!/usr/bin/python + + +import u1db +import tempfile +import logging +import shutil +import os +import argparse +import time +import binascii +import random + + +from leap.soledad.client.sqlcipher import open as sqlcipher_open +from log_cpu_usage import LogCpuUsage +from u1dblite import open as u1dblite_open +from u1dbcipher import open as u1dbcipher_open + + +DOCS_TO_SYNC = 1000 +SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB +BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB + + +def get_data(size): + return binascii.hexlify(os.urandom(size/2)) + + +def run_test(testname, open_fun, tempdir, docs, *args): + logger.info('Starting test \"%s\".' % testname) + + # instantiate dbs + db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) + db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) + + # get sync target and synchsonizer + target = db2.get_sync_target() + synchronizer = u1db.sync.Synchronizer(db1, target) + + + # generate lots of small documents + logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) + for content in docs: + db1.create_doc(content) + logger.info('%d documents created in source db.' % DOCS_TO_SYNC) + + # run the test + filename = testname + '.txt' + logger.info('Logging CPU usage to %s.' % filename) + log_cpu = LogCpuUsage(filename) + tstart = time.time() + + # start logging cpu + log_cpu.start() + logger.info('Sleeping for 5 seconds...') + time.sleep(5) + + # sync + logger.info('Starting sync...') + sstart = time.time() + synchronizer.sync() + send = time.time() + logger.info('Sync finished.') + + # stop logging cpu + logger.info('Sleeping for 5 seconds...') + time.sleep(5) + tend = time.time() + log_cpu.stop() + + # report + logger.info('Total sync time: %f seconds' % (send - sstart)) + logger.info('Total test time: %f seconds' % (tend - tstart)) + logger.info('Finished test \"%s\".' % testname) + + # close dbs + db1.close() + db2.close() + + +if __name__ == '__main__': + + # configure logger + logger = logging.getLogger(__name__) + LOG_FORMAT = '%(asctime)s %(message)s' + logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + + # get a temporary dir + tempdir = tempfile.mkdtemp() + logger.info('Using temporary directory %s' % tempdir) + + + # create a lot of documents with random sizes + docs = [] + for i in xrange(DOCS_TO_SYNC): + docs.append({ + 'index': i, + #'data': get_data( + # random.randrange( + # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) + }) + + # run tests + run_test('sqlite', u1db.open, tempdir, docs, True) + run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) + run_test('u1dblite', u1dblite_open, tempdir, docs) + run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) + + # remove temporary dir + logger.info('Removing temporary directory %s' % tempdir) + shutil.rmtree(tempdir) diff --git a/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py b/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py new file mode 100755 index 00000000..02c68015 --- /dev/null +++ b/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py @@ -0,0 +1,169 @@ +#!/usr/bin/python + +# This script finds the maximum upload size for a document in the current +# server. It pulls couch URL from Soledad config file and attempts multiple +# PUTs until it finds the maximum size supported by the server. +# +# As the Soledad couch user is not an admin, you have to pass a database into +# which the test will be run. The database should already exist and be +# initialized with soledad design documents. +# +# Use it like this: +# +# ./find_max_upload_size.py +# ./find_max_upload_size.py -h + +import os +import configparser +import logging +import argparse +import random +import string +import binascii +import json +import time +import uuid + + +from couchdb.client import Database +from socket import error as socket_error +from leap.soledad.common.couch import CouchDatabase + + +SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' +PREFIX = '/tmp/soledad_test' +LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' +RETRIES = 3 # number of times to retry uploading a document of a certain + # size after a failure + + +# configure logger +logger = logging.getLogger(__name__) + + +def config_log(level): + logging.basicConfig(format=LOG_FORMAT, level=level) + + +def log_to_file(filename): + handler = logging.FileHandler(filename, mode='a') + handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) + logger.addHandler(handler) + + +# create test dir +if not os.path.exists(PREFIX): + os.mkdir(PREFIX) + + +def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): + config = configparser.ConfigParser() + config.read(config_file) + return config['soledad-server']['couch_url'] + + +# generate or load an uploadable doc with the given size in mb +def get_content(size): + fname = os.path.join(PREFIX, 'content-%d.json' % size) + if os.path.exists(fname): + logger.debug('Loading content with %d MB...' % size) + with open(fname, 'r') as f: + return f.read() + else: + length = int(size * 1024 ** 2) + logger.debug('Generating body with %d MB...' % size) + content = binascii.hexlify(os.urandom(length))[:length] + with open(fname, 'w') as f: + f.write(content) + return content + + +def delete_doc(db): + doc = db.get('largedoc') + db.delete(doc) + + +def upload(db, size, couch_db): + # try many times to be sure that size is infeasible + for i in range(RETRIES): + # wait until server is up to upload + while True: + try: + 'largedoc' in couch_db + break + except socket_error: + logger.debug('Waiting for server to come up...') + time.sleep(1) + # attempt to upload + try: + logger.debug( + 'Trying to upload %d MB document (attempt %d/%d)...' % + (size, (i+1), RETRIES)) + content = get_content(size) + logger.debug('Starting upload of %d bytes.' % len(content)) + doc = db.create_doc({'data': content}, doc_id='largedoc') + delete_doc(couch_db) + logger.debug('Success uploading %d MB doc.' % size) + return True + except Exception as e: + logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) + return False + + +def find_max_upload_size(db_uri): + db = CouchDatabase.open_database(db_uri, False) + couch_db = Database(db_uri) + logger.debug('Database URI: %s' % db_uri) + # delete eventual leftover from last run + if 'largedoc' in couch_db: + delete_doc(couch_db) + # phase 1: increase upload size exponentially + logger.info('Starting phase 1: increasing size exponentially.') + size = 1 + #import ipdb; ipdb.set_trace() + while True: + if upload(db, size, couch_db): + size *= 2 + else: + break + + # phase 2: binary search for maximum value + unable = size + able = size / 2 + logger.info('Starting phase 2: binary search for maximum value.') + while unable - able > 1: + size = able + ((unable - able) / 2) + if upload(db, size, couch_db): + able = size + else: + unable = size + return able + + +if __name__ == '__main__': + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + '-d', action='store_true', dest='debug', + help='print debugging information') + parser.add_argument( + '-l', dest='logfile', + help='log output to file') + parser.add_argument( + 'db_uri', help='the couch database URI to test') + args = parser.parse_args() + + # log to file + if args.logfile is not None: + log_to_file(args.logfile) + + # set loglevel + if args.debug is True: + config_log(logging.DEBUG) + else: + config_log(logging.INFO) + + # run test and report + logger.info('Will test using db at %s.' % args.db_uri) + maxsize = find_max_upload_size(args.db_uri) + logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/profiling/doc_put_memory_usage/get-mem.py b/scripts/profiling/doc_put_memory_usage/get-mem.py new file mode 100755 index 00000000..d64875fc --- /dev/null +++ b/scripts/profiling/doc_put_memory_usage/get-mem.py @@ -0,0 +1,16 @@ +#!/usr/bin/python + + +import psutil +import time + + +delta = 50 * 60 +start = time.time() + +while True: + now = time.time() + print "%s %s" % (now - start, psutil.phymem_usage().used) + time.sleep(0.1) + if now > start + delta: + break diff --git a/scripts/profiling/doc_put_memory_usage/plot-mem.py b/scripts/profiling/doc_put_memory_usage/plot-mem.py new file mode 100755 index 00000000..e24679a2 --- /dev/null +++ b/scripts/profiling/doc_put_memory_usage/plot-mem.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt + + +files = [ + ('local', 'couchdb-json', 'b'), + ('local', 'bigcouch-json', 'r'), + ('local', 'couchdb-multipart', 'g'), + ('local', 'bigcouch-multipart', 'm'), +] + + +# config the plot +plt.xlabel('time') +plt.ylabel('memory usage') +plt.title('bigcouch versus couch memory usage') + + +for fi in files: + + machine = fi[0] + database = fi[1] + color = fi[2] + filename = '%s-%s.txt' % (machine, database) + + x = [] + y = [] + + xmax = None + xmin = None + ymax = None + ymin = None + + # read data from file + with open(filename, 'r') as f: + line = f.readline() + while line is not None: + time, mem = tuple(line.strip().split(' ')) + mem = float(mem) / (10**9) + x.append(float(time)) + y.append(mem) + if ymax == None or mem > ymax: + ymax = mem + xmax = time + if ymin == None or mem < ymin: + ymin = mem + xmin = time + line = f.readline() + if line == '': + break + + kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', + # 'marker': '.', + 'color': color, + } + plt.plot(x, y, label=database, **kwargs) + + #plt.axes().get_xaxis().set_ticks(x) + #plt.axes().get_xaxis().set_ticklabels(x) + + # annotate max and min values + #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) + #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/update_design_docs.py b/scripts/update_design_docs.py deleted file mode 100644 index e7b5a29c..00000000 --- a/scripts/update_design_docs.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/python - -# This script updates Soledad's design documents in the session database and -# all user databases with contents from the installed leap.soledad.common -# package. - -import json -import logging -import argparse -import re -import threading -import binascii - - -from getpass import getpass -from ConfigParser import ConfigParser -from couchdb.client import Server -from couchdb.http import Resource, Session -from datetime import datetime -from urlparse import urlparse - - -from leap.soledad.common import ddocs - - -# parse command line for the log file name -logger_fname = "/tmp/update-design-docs_%s.log" % \ - str(datetime.now()).replace(' ', '_') -parser = argparse.ArgumentParser() -parser.add_argument('--log', action='store', default=logger_fname, type=str, - required=False, help='the name of the log file', nargs=1) -args = parser.parse_args() - - -# configure the logger -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -print "Logging to %s." % args.log -logging.basicConfig( - filename=args.log, - format="%(asctime)-15s %(message)s") - - -# configure threads -max_threads = 20 -semaphore_pool = threading.BoundedSemaphore(value=max_threads) -threads = [] - -# get couch url -cp = ConfigParser() -cp.read('/etc/leap/soledad-server.conf') -url = urlparse(cp.get('soledad-server', 'couch_url')) - -# get admin password -netloc = re.sub('^.*@', '', url.netloc) -url = url._replace(netloc=netloc) -password = getpass("Admin password for %s: " % url.geturl()) -url = url._replace(netloc='admin:%s@%s' % (password, netloc)) - -resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10)) -server = Server(url=resource) - -hidden_url = re.sub( - 'http://(.*):.*@', - 'http://\\1:xxxxx@', - url.geturl()) - -print """ -========== -ATTENTION! -========== - -This script will modify Soledad's shared and user databases in: - - %s - -This script does not make a backup of the couch db data, so make sure you -have a copy or you may loose data. -""" % hidden_url -confirm = raw_input("Proceed (type uppercase YES)? ") - -if confirm != "YES": - exit(1) - -# convert design doc content - -design_docs = { - '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)), - '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)), - '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)), -} - -# -# Thread -# - -class DBWorkerThread(threading.Thread): - - def __init__(self, server, dbname, db_idx, db_len, release_fun): - threading.Thread.__init__(self) - self._dbname = dbname - self._cdb = server[self._dbname] - self._db_idx = db_idx - self._db_len = db_len - self._release_fun = release_fun - - def run(self): - - logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len, - self._dbname)) - - for doc_id in design_docs: - doc = self._cdb[doc_id] - for key in ['lists', 'views', 'updates']: - if key in design_docs[doc_id]: - doc[key] = design_docs[doc_id][key] - self._cdb.save(doc) - - # release the semaphore - self._release_fun() - - -db_idx = 0 -db_len = len(server) -for dbname in server: - - db_idx += 1 - - if not (dbname.startswith('user-') or dbname == 'shared') \ - or dbname == 'user-test-db': - logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) - continue - - - # get access to couch db - cdb = Server(url.geturl())[dbname] - - #--------------------------------------------------------------------- - # Start DB worker thread - #--------------------------------------------------------------------- - semaphore_pool.acquire() - thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release) - thread.daemon = True - thread.start() - threads.append(thread) - -map(lambda thread: thread.join(), threads) -- cgit v1.2.3 From 45ffe1dd22d6e902a7e8eeca786b7ad63ec1d139 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 7 Apr 2014 12:48:25 -0300 Subject: Prevent Couch Server State from making one unneeded GET request (#5386). --- .../changes/feature_5386_prevent-uneeded-get-on-couch-server-state | 2 ++ common/src/leap/soledad/common/couch.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state diff --git a/common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state b/common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state new file mode 100644 index 00000000..95919873 --- /dev/null +++ b/common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state @@ -0,0 +1,2 @@ + o Prevent Couch Server State from making one uneeded GET request on + instantiation (#5386). diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index b836c997..9fb717c2 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -1489,9 +1489,9 @@ class CouchServerState(ServerState): :return: The CouchDatabase object. :rtype: CouchDatabase """ - return CouchDatabase.open_database( - self._couch_url + '/' + dbname, - create=False, + return CouchDatabase( + self._couch_url, + dbname, ensure_ddocs=False) def ensure_database(self, dbname): -- cgit v1.2.3 From 764fb1e8060cb7169744dd6602d2550ad0c02178 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 7 Apr 2014 14:55:26 -0300 Subject: Fix class doc. --- common/src/leap/soledad/common/couch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 9fb717c2..a7828e98 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -353,7 +353,7 @@ class CouchDatabase(CommonBackend): release_fun): """ :param db: The database from where to get the document. - :type db: u1db.Database + :type db: CouchDatabase :param doc_id: The doc_id of the document to be retrieved. :type doc_id: str :param check_for_conflicts: Whether the get_doc() method should -- cgit v1.2.3 From 11757fb0d071b753819a04d8504a72baed80db2f Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 8 Apr 2014 11:20:19 -0300 Subject: Prevent couch instantiation from creating the db (#5386). --- ..._prevent-couch-backend-from-trying-to-create-db | 1 + common/src/leap/soledad/common/couch.py | 36 +++++------ common/src/leap/soledad/common/tests/test_couch.py | 69 ++++++++++++++-------- .../tests/test_couch_operations_atomicity.py | 11 +++- .../src/leap/soledad/common/tests/test_server.py | 58 +++++++++--------- 5 files changed, 100 insertions(+), 75 deletions(-) create mode 100644 common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db diff --git a/common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db b/common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db new file mode 100644 index 00000000..9bbac329 --- /dev/null +++ b/common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db @@ -0,0 +1 @@ + o Prevent couch backend from always trying to create the database (#5386). diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index a7828e98..8ed704ba 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -31,9 +31,10 @@ import threading from StringIO import StringIO from collections import defaultdict +from urlparse import urljoin -from couchdb.client import Server +from couchdb.client import Server, Database from couchdb.http import ( ResourceConflict, ResourceNotFound, @@ -380,7 +381,7 @@ class CouchDatabase(CommonBackend): self._release_fun() @classmethod - def open_database(cls, url, create, ensure_ddocs=False): + def open_database(cls, url, create, replica_uid=None, ensure_ddocs=False): """ Open a U1DB database using CouchDB as backend. @@ -388,6 +389,8 @@ class CouchDatabase(CommonBackend): :type url: str :param create: should the replica be created if it does not exist? :type create: bool + :param replica_uid: an optional unique replica identifier + :type replica_uid: str :param ensure_ddocs: Ensure that the design docs exist on server. :type ensure_ddocs: bool @@ -406,10 +409,11 @@ class CouchDatabase(CommonBackend): except ResourceNotFound: if not create: raise DatabaseDoesNotExist() - return cls(url, dbname, ensure_ddocs=ensure_ddocs) + server.create(dbname) + return cls(url, dbname, replica_uid=replica_uid, ensure_ddocs=ensure_ddocs) - def __init__(self, url, dbname, replica_uid=None, full_commit=True, - session=None, ensure_ddocs=True): + def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True, + session=None): """ Create a new Couch data container. @@ -419,31 +423,23 @@ class CouchDatabase(CommonBackend): :type dbname: str :param replica_uid: an optional unique replica identifier :type replica_uid: str - :param full_commit: turn on the X-Couch-Full-Commit header - :type full_commit: bool - :param session: an http.Session instance or None for a default session - :type session: http.Session :param ensure_ddocs: Ensure that the design docs exist on server. :type ensure_ddocs: bool + :param session: an http.Session instance or None for a default session + :type session: http.Session """ # save params self._url = url - self._full_commit = full_commit if session is None: session = Session(timeout=COUCH_TIMEOUT) self._session = session self._factory = CouchDocument self._real_replica_uid = None # configure couch - self._server = Server(url=self._url, - full_commit=self._full_commit, - session=self._session) self._dbname = dbname - try: - self._database = self._server[self._dbname] - except ResourceNotFound: - self._server.create(self._dbname) - self._database = self._server[self._dbname] + self._database = Database( + urljoin(self._url, self._dbname), + self._session) if replica_uid is not None: self._set_replica_uid(replica_uid) if ensure_ddocs: @@ -482,7 +478,8 @@ class CouchDatabase(CommonBackend): """ Delete a U1DB CouchDB database. """ - del(self._server[self._dbname]) + server = Server(url=self._url) + del(server[self._dbname]) def close(self): """ @@ -494,7 +491,6 @@ class CouchDatabase(CommonBackend): self._url = None self._full_commit = None self._session = None - self._server = None self._database = None return True diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index 86bb4b93..77c46e61 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -25,6 +25,7 @@ import copy import shutil from base64 import b64decode from mock import Mock +from urlparse import urljoin from couchdb.client import Server from u1db import errors as u1db_errors @@ -151,8 +152,11 @@ class CouchDBTestCase(unittest.TestCase): class TestCouchBackendImpl(CouchDBTestCase): def test__allocate_doc_id(self): - db = couch.CouchDatabase('http://localhost:' + str(self.wrapper.port), - 'u1db_tests', ensure_ddocs=True) + db = couch.CouchDatabase.open_database( + urljoin( + 'http://localhost:' + str(self.wrapper.port), 'u1db_tests'), + create=True, + ensure_ddocs=True) doc_id1 = db._allocate_doc_id() self.assertTrue(doc_id1.startswith('D-')) self.assertEqual(34, len(doc_id1)) @@ -166,28 +170,35 @@ class TestCouchBackendImpl(CouchDBTestCase): def make_couch_database_for_test(test, replica_uid): port = str(test.wrapper.port) - return couch.CouchDatabase('http://localhost:' + port, replica_uid, - replica_uid=replica_uid or 'test', - ensure_ddocs=True) + return couch.CouchDatabase.open_database( + urljoin('http://localhost:' + port, replica_uid), + create=True, + replica_uid=replica_uid or 'test', + ensure_ddocs=True) def copy_couch_database_for_test(test, db): port = str(test.wrapper.port) couch_url = 'http://localhost:' + port new_dbname = db._replica_uid + '_copy' - new_db = couch.CouchDatabase(couch_url, - new_dbname, - replica_uid=db._replica_uid or 'test') + new_db = couch.CouchDatabase.open_database( + urljoin(couch_url, new_dbname), + create=True, + replica_uid=db._replica_uid or 'test') # copy all docs old_couch_db = Server(couch_url)[db._replica_uid] new_couch_db = Server(couch_url)[new_dbname] for doc_id in old_couch_db: doc = old_couch_db.get(doc_id) + # bypass u1db_config document + if doc_id == 'u1db_config': + pass # copy design docs - if ('u1db_rev' not in doc): + elif doc_id.startswith('_design'): + del doc['_rev'] new_couch_db.save(doc) # copy u1db docs - else: + elif 'u1db_rev' in doc: new_doc = { '_id': doc['_id'], 'u1db_transactions': doc['u1db_transactions'], @@ -228,7 +239,7 @@ class CouchTests(test_backends.AllDatabaseTests, CouchDBTestCase): def setUp(self): test_backends.AllDatabaseTests.setUp(self) # save db info because of test_close - self._server = self.db._server + self._url = self.db._url self._dbname = self.db._dbname def tearDown(self): @@ -238,7 +249,8 @@ class CouchTests(test_backends.AllDatabaseTests, CouchDBTestCase): if self.id() == \ 'leap.soledad.common.tests.test_couch.CouchTests.' \ 'test_close(couch)': - del(self._server[self._dbname]) + server = Server(url=self._url) + del(server[self._dbname]) else: self.db.delete_database() test_backends.AllDatabaseTests.tearDown(self) @@ -355,10 +367,10 @@ from u1db.backends.inmemory import InMemoryIndex class IndexedCouchDatabase(couch.CouchDatabase): - def __init__(self, url, dbname, replica_uid=None, full_commit=True, - session=None, ensure_ddocs=True): - old_class.__init__(self, url, dbname, replica_uid, full_commit, - session, ensure_ddocs=ensure_ddocs) + def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True, + session=None): + old_class.__init__(self, url, dbname, replica_uid=replica_uid, + ensure_ddocs=ensure_ddocs, session=session) self._indexes = {} def _put_doc(self, old_doc, doc): @@ -467,8 +479,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): def setUp(self): CouchDBTestCase.setUp(self) - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=False) # note that we don't enforce ddocs here def tearDown(self): @@ -509,8 +522,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): Test that all methods that access design documents list functions will raise if the functions are not present. """ - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=True) # erase views from _design/transactions transactions = self.db._database['_design/transactions'] @@ -538,8 +552,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): Test that all methods that access design documents list functions will raise if the functions are not present. """ - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=True) # erase views from _design/transactions transactions = self.db._database['_design/transactions'] @@ -567,8 +582,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): Test that all methods that access design documents' named views will raise if the views are not present. """ - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=True) # erase views from _design/docs docs = self.db._database['_design/docs'] @@ -608,8 +624,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): Test that all methods that access design documents will raise if the design docs are not present. """ - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=True) # delete _design/docs del self.db._database['_design/docs'] diff --git a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py index 3c457cc5..3c219b91 100644 --- a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py +++ b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py @@ -24,6 +24,10 @@ import mock import tempfile import threading + +from urlparse import urljoin + + from leap.soledad.client import Soledad from leap.soledad.common.couch import CouchDatabase, CouchServerState from leap.soledad.common.tests.test_couch import CouchDBTestCase @@ -101,8 +105,11 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): TestCaseWithServer.setUp(self) CouchDBTestCase.setUp(self) self._couch_url = 'http://localhost:' + str(self.wrapper.port) - self.db = CouchDatabase( - self._couch_url, 'user-user-uuid', replica_uid='replica') + self.db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + replica_uid='replica', + ensure_ddocs=True) self.tempdir = tempfile.mkdtemp(prefix="leap_tests-") def tearDown(self): diff --git a/common/src/leap/soledad/common/tests/test_server.py b/common/src/leap/soledad/common/tests/test_server.py index f8d2a64f..6fe9211c 100644 --- a/common/src/leap/soledad/common/tests/test_server.py +++ b/common/src/leap/soledad/common/tests/test_server.py @@ -27,6 +27,7 @@ import mock import time import binascii +from urlparse import urljoin from leap.common.testing.basetest import BaseLeapTest from leap.soledad.common.couch import ( @@ -56,7 +57,8 @@ from leap.soledad.server.auth import URLToAuthorization def _couch_ensure_database(self, dbname): db = CouchDatabase.open_database( self._couch_url + '/' + dbname, - create=True) + create=True, + ensure_ddocs=True) return db, db._replica_uid CouchServerState.ensure_database = _couch_ensure_database @@ -352,11 +354,10 @@ class EncryptedSyncTestCase( self.assertEqual([], doclist) doc1 = sol1.create_doc(json.loads(simple_doc)) # ensure remote db exists before syncing - db = CouchDatabase( - self._couch_url, - # the name of the user database is "user-". - 'user-user-uuid', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + ensure_ddocs=True) # sync with server sol1._server_url = self.getURL() sol1.sync() @@ -408,11 +409,10 @@ class EncryptedSyncTestCase( self.assertEqual([], doclist) doc1 = sol1.create_doc(json.loads(simple_doc)) # ensure remote db exists before syncing - db = CouchDatabase( - self._couch_url, - # the name of the user database is "user-". - 'user-user-uuid', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + ensure_ddocs=True) # sync with server sol1._server_url = self.getURL() sol1.sync() @@ -468,11 +468,10 @@ class EncryptedSyncTestCase( content = binascii.hexlify(os.urandom(length/2)) # len() == length doc1 = sol1.create_doc({'data': content}) # ensure remote db exists before syncing - db = CouchDatabase( - self._couch_url, - # the name of the user database is "user-". - 'user-user-uuid', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + ensure_ddocs=True) # sync with server sol1._server_url = self.getURL() sol1.sync() @@ -512,11 +511,10 @@ class EncryptedSyncTestCase( for i in range(0, number_of_docs): sol1.create_doc(json.loads(simple_doc)) # ensure remote db exists before syncing - db = CouchDatabase( - self._couch_url, - # the name of the user database is "user-". - 'user-user-uuid', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + ensure_ddocs=True) # sync with server sol1._server_url = self.getURL() sol1.sync() @@ -558,8 +556,14 @@ class LockResourceTestCase( self.tempdir = tempfile.mkdtemp(prefix="leap_tests-") self._couch_url = 'http://localhost:' + str(self.wrapper.port) # create the databases - CouchDatabase(self._couch_url, 'shared') - CouchDatabase(self._couch_url, 'tokens') + CouchDatabase.open_database( + urljoin(self._couch_url, 'shared'), + create=True, + ensure_ddocs=True) + CouchDatabase.open_database( + urljoin(self._couch_url, 'tokens'), + create=True, + ensure_ddocs=True) self._state = CouchServerState( self._couch_url, 'shared', 'tokens') @@ -567,10 +571,10 @@ class LockResourceTestCase( CouchDBTestCase.tearDown(self) TestCaseWithServer.tearDown(self) # delete remote database - db = CouchDatabase( - self._couch_url, - 'shared', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'shared'), + create=True, + ensure_ddocs=True) db.delete_database() def test__try_obtain_filesystem_lock(self): -- cgit v1.2.3