From 3cfe76a694bb19d5da21192e4a9a467a9b4c59e1 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 7 Apr 2014 11:40:40 -0300 Subject: Add script to compile design docs (#5315) --- common/src/leap/soledad/common/couch.py | 2 +- scripts/compile_design_docs.py | 111 ++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 scripts/compile_design_docs.py diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 8e8613a1..b836c997 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -1411,7 +1411,7 @@ class CouchDatabase(CommonBackend): # strptime here by evaluating the conversion of an arbitrary date. # This will not be needed when/if we switch from python-couchdb to # paisley. - time.strptime('Mar 4 1917', '%b %d %Y') + time.strptime('Mar 8 1917', '%b %d %Y') # spawn threads to retrieve docs threads = [] for doc_id in doc_ids: diff --git a/scripts/compile_design_docs.py b/scripts/compile_design_docs.py new file mode 100644 index 00000000..7ffebb10 --- /dev/null +++ b/scripts/compile_design_docs.py @@ -0,0 +1,111 @@ +#!/usr/bin/python + + +# This script builds files for the design documents represented in the +# ../common/src/soledad/common/ddocs directory structure (relative to the +# current location of the script) into a target directory. + + +import argparse +from os import listdir +from os.path import realpath, dirname, isdir, join, isfile, basename +import json + +DDOCS_REL_PATH = ('..', 'common', 'src', 'leap', 'soledad', 'common', 'ddocs') + + +def build_ddocs(): + """ + Build design documents. + + For ease of development, couch backend design documents are stored as + `.js` files in subdirectories of + `../common/src/leap/soledad/common/ddocs`. This function scans that + directory for javascript files, and builds the design documents structure. + + This funciton uses the following conventions to generate design documents: + + - Design documents are represented by directories in the form + `/`, there prefix is the `src/leap/soledad/common/ddocs` + directory. + - Design document directories might contain `views`, `lists` and + `updates` subdirectories. + - Views subdirectories must contain a `map.js` file and may contain a + `reduce.js` file. + - List and updates subdirectories may contain any number of javascript + files (i.e. ending in `.js`) whose names will be mapped to the + corresponding list or update function name. + """ + ddocs = {} + + # design docs are represented by subdirectories of `DDOCS_REL_PATH` + cur_pwd = dirname(realpath(__file__)) + ddocs_path = join(cur_pwd, *DDOCS_REL_PATH) + for ddoc in [f for f in listdir(ddocs_path) + if isdir(join(ddocs_path, f))]: + + ddocs[ddoc] = {'_id': '_design/%s' % ddoc} + + for t in ['views', 'lists', 'updates']: + tdir = join(ddocs_path, ddoc, t) + if isdir(tdir): + + ddocs[ddoc][t] = {} + + if t == 'views': # handle views (with map/reduce functions) + for view in [f for f in listdir(tdir) + if isdir(join(tdir, f))]: + # look for map.js and reduce.js + mapfile = join(tdir, view, 'map.js') + reducefile = join(tdir, view, 'reduce.js') + mapfun = None + reducefun = None + try: + with open(mapfile) as f: + mapfun = f.read() + except IOError: + pass + try: + with open(reducefile) as f: + reducefun = f.read() + except IOError: + pass + ddocs[ddoc]['views'][view] = {} + + if mapfun is not None: + ddocs[ddoc]['views'][view]['map'] = mapfun + if reducefun is not None: + ddocs[ddoc]['views'][view]['reduce'] = reducefun + + else: # handle lists, updates, etc + for fun in [f for f in listdir(tdir) + if isfile(join(tdir, f))]: + funfile = join(tdir, fun) + funname = basename(funfile).replace('.js', '') + try: + with open(funfile) as f: + ddocs[ddoc][t][funname] = f.read() + except IOError: + pass + return ddocs + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + 'target', type=str, + help='the target dir where to store design documents') + args = parser.parse_args() + + # check if given target is a directory + if not isdir(args.target): + print 'Error: %s is not a directory.' % args.target + exit(1) + + # write desifgn docs files + ddocs = build_ddocs() + for ddoc in ddocs: + ddoc_filename = "%s.json" % ddoc + with open(join(args.target, ddoc_filename), 'w') as f: + f.write("%s" % json.dumps(ddocs[ddoc], indent=3)) + print "Wrote _design/%s content in %s" % (ddoc, join(args.target, ddoc_filename,)) -- cgit v1.2.3 From 573909b10d77ef3d889d33cfaeb3fdadd0135daf Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 7 Apr 2014 12:40:45 -0300 Subject: Reorganize scripts directory. --- scripts/backends_cpu_usage/log_cpu_usage.py | 46 ----- scripts/backends_cpu_usage/movingaverage.py | 209 --------------------- scripts/backends_cpu_usage/plot.py | 81 -------- scripts/backends_cpu_usage/test_u1db_sync.py | 113 ----------- scripts/ddocs/update_design_docs.py | 147 +++++++++++++++ .../doc_put_memory_usage/find_max_upload_size.py | 169 ----------------- scripts/doc_put_memory_usage/get-mem.py | 16 -- scripts/doc_put_memory_usage/plot-mem.py | 73 ------- .../profiling/backends_cpu_usage/log_cpu_usage.py | 46 +++++ .../profiling/backends_cpu_usage/movingaverage.py | 209 +++++++++++++++++++++ scripts/profiling/backends_cpu_usage/plot.py | 81 ++++++++ .../profiling/backends_cpu_usage/test_u1db_sync.py | 113 +++++++++++ .../doc_put_memory_usage/find_max_upload_size.py | 169 +++++++++++++++++ scripts/profiling/doc_put_memory_usage/get-mem.py | 16 ++ scripts/profiling/doc_put_memory_usage/plot-mem.py | 73 +++++++ scripts/update_design_docs.py | 147 --------------- 16 files changed, 854 insertions(+), 854 deletions(-) delete mode 100755 scripts/backends_cpu_usage/log_cpu_usage.py delete mode 100644 scripts/backends_cpu_usage/movingaverage.py delete mode 100755 scripts/backends_cpu_usage/plot.py delete mode 100755 scripts/backends_cpu_usage/test_u1db_sync.py create mode 100644 scripts/ddocs/update_design_docs.py delete mode 100755 scripts/doc_put_memory_usage/find_max_upload_size.py delete mode 100755 scripts/doc_put_memory_usage/get-mem.py delete mode 100755 scripts/doc_put_memory_usage/plot-mem.py create mode 100755 scripts/profiling/backends_cpu_usage/log_cpu_usage.py create mode 100644 scripts/profiling/backends_cpu_usage/movingaverage.py create mode 100755 scripts/profiling/backends_cpu_usage/plot.py create mode 100755 scripts/profiling/backends_cpu_usage/test_u1db_sync.py create mode 100755 scripts/profiling/doc_put_memory_usage/find_max_upload_size.py create mode 100755 scripts/profiling/doc_put_memory_usage/get-mem.py create mode 100755 scripts/profiling/doc_put_memory_usage/plot-mem.py delete mode 100644 scripts/update_design_docs.py diff --git a/scripts/backends_cpu_usage/log_cpu_usage.py b/scripts/backends_cpu_usage/log_cpu_usage.py deleted file mode 100755 index 2674e1ff..00000000 --- a/scripts/backends_cpu_usage/log_cpu_usage.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/python - - -# Get the CPU usage and print to file. - - -import psutil -import time -import argparse -import os -import threading - - -class LogCpuUsage(threading.Thread): - - def __init__(self, fname): - threading.Thread.__init__(self) - self._stopped = True - self._fname = fname - - def run(self): - self._stopped = False - with open(self._fname, 'w') as f: - start = time.time() - while self._stopped is False: - now = time.time() - f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) - time.sleep(0.01) - - def stop(self): - self._stopped = True - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('file', help='where to save output') - args = parser.parse_args() - - if os.path.isfile(args.file): - replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) - if replace.lower() != 'y': - print 'Bailing out.' - exit(1) - - log_cpu = LogCpuUsage(args.file) - log_cpu.run() diff --git a/scripts/backends_cpu_usage/movingaverage.py b/scripts/backends_cpu_usage/movingaverage.py deleted file mode 100644 index bac1b3e1..00000000 --- a/scripts/backends_cpu_usage/movingaverage.py +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env python -# -# Sean Reifschneider, tummy.com, ltd. -# Released into the Public Domain, 2011-02-06 - -import itertools -from itertools import islice -from collections import deque - - -######################################################### -def movingaverage(data, subset_size, data_is_list = None, - avoid_fp_drift = True): - '''Return the moving averages of the data, with a window size of - `subset_size`. `subset_size` must be an integer greater than 0 and - less than the length of the input data, or a ValueError will be raised. - - `data_is_list` can be used to tune the algorithm for list or iteratable - as an input. The default value, `None` will auto-detect this. - The algorithm used if `data` is a list is almost twice as fast as if - it is an iteratable. - - `avoid_fp_drift`, if True (the default) sums every sub-set rather than - keeping a "rolling sum" (which may be subject to floating-point drift). - While more correct, it is also dramatically slower for subset sizes - much larger than 20. - - NOTE: You really should consider setting `avoid_fp_drift = False` unless - you are dealing with very small numbers (say, far smaller than 0.00001) - or require extreme accuracy at the cost of execution time. For - `subset_size` < 20, the performance difference is very small. - ''' - if subset_size < 1: - raise ValueError('subset_size must be 1 or larger') - - if data_is_list is None: - data_is_list = hasattr(data, '__getslice__') - - divisor = float(subset_size) - if data_is_list: - # This only works if we can re-access old elements, but is much faster. - # In other words, it can't be just an iterable, it needs to be a list. - - if subset_size > len(data): - raise ValueError('subset_size must be smaller than data set size') - - if avoid_fp_drift: - for x in range(subset_size, len(data) + 1): - yield sum(data[x - subset_size:x]) / divisor - else: - cur = sum(data[0:subset_size]) - yield cur / divisor - for x in range(subset_size, len(data)): - cur += data[x] - data[x - subset_size] - yield cur / divisor - else: - # Based on the recipe at: - # http://docs.python.org/library/collections.html#deque-recipes - it = iter(data) - d = deque(islice(it, subset_size)) - - if subset_size > len(d): - raise ValueError('subset_size must be smaller than data set size') - - if avoid_fp_drift: - yield sum(d) / divisor - for elem in it: - d.popleft() - d.append(elem) - yield sum(d) / divisor - else: - s = sum(d) - yield s / divisor - for elem in it: - s += elem - d.popleft() - d.append(elem) - yield s / divisor - - -########################## -if __name__ == '__main__': - import unittest - - class TestMovingAverage(unittest.TestCase): - #################### - def test_List(self): - try: - list(movingaverage([1,2,3], 0)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage([1,2,3,4,5,6], 7)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) - - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, False)), [40.0,42.0,45.0,43.0]) - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, True)), [40.0,42.0,45.0,43.0]) - - - ###################### - def test_XRange(self): - try: - list(movingaverage(xrange(1, 4), 0)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage(xrange(1, 7), 7)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage(xrange(1, 7), 2)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), - 2)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) - self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) - - - ########################### - def test_ListRolling(self): - try: - list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, - avoid_fp_drift = False)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, - avoid_fp_drift = False)), - [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, - avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, - avoid_fp_drift = False)), [2,3,4,5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, - avoid_fp_drift = False)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, - avoid_fp_drift = False)), [3,4]) - self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, - avoid_fp_drift = False)), [3.5]) - - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) - self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], - 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) - - - ############################# - def test_XRangeRolling(self): - try: - list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size=0') - except ValueError: - pass - - try: - list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) - self.fail('Did not raise ValueError on subset_size > len(data)') - except ValueError: - pass - - self.assertEqual(list(movingaverage(xrange(1, 7), 1, - avoid_fp_drift = False)), [1,2,3,4,5,6]) - self.assertEqual(list(movingaverage(xrange(1, 7), 2, - avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), - 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 3, - avoid_fp_drift = False)), [2,3,4,5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 4, - avoid_fp_drift = False)), [2.5,3.5,4.5]) - self.assertEqual(list(movingaverage(xrange(1, 7), 5, - avoid_fp_drift = False)), [3,4]) - self.assertEqual(list(movingaverage(xrange(1, 7), 6, - avoid_fp_drift = False)), [3.5]) - - - ###################################################################### - suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) - unittest.TextTestRunner(verbosity = 2).run(suite) - diff --git a/scripts/backends_cpu_usage/plot.py b/scripts/backends_cpu_usage/plot.py deleted file mode 100755 index 4e5083ad..00000000 --- a/scripts/backends_cpu_usage/plot.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/python - - -from matplotlib import pyplot as plt -from movingaverage import movingaverage - - -def smooth(l): - return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) - - -files = [ - ('sqlite', 'b'), - ('sqlcipher', 'r'), - ('u1dblite', 'g'), - ('u1dbcipher', 'm'), -] - - -# config the plot -plt.xlabel('time (s)') -plt.ylabel('cpu usage (%)') -plt.title('u1db backends CPU usage') - - -for fi in files: - - backend = fi[0] - color = fi[1] - filename = '%s.txt' % backend - - x = [] - y = [] - - xmax = None - xmin = None - ymax = None - ymin = None - - # read data from file - with open(filename, 'r') as f: - line = f.readline() - while line is not None: - time, cpu = tuple(line.strip().split(' ')) - cpu = float(cpu) - x.append(float(time)) - y.append(cpu) - if ymax == None or cpu > ymax: - ymax = cpu - xmax = time - if ymin == None or cpu < ymin: - ymin = cpu - xmin = time - line = f.readline() - if line == '': - break - - kwargs = { - 'linewidth': 1.0, - 'linestyle': '-', - # 'marker': '.', - 'color': color, - } - plt.plot( - [n for n in smooth(x)], - [n for n in smooth(y)], - label=backend, **kwargs) - - #plt.axes().get_xaxis().set_ticks(x) - #plt.axes().get_xaxis().set_ticklabels(x) - - # annotate max and min values - #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) - #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) - - -plt.ylim(0, 100) -plt.grid() -plt.legend() -plt.show() - diff --git a/scripts/backends_cpu_usage/test_u1db_sync.py b/scripts/backends_cpu_usage/test_u1db_sync.py deleted file mode 100755 index 26ef8f9f..00000000 --- a/scripts/backends_cpu_usage/test_u1db_sync.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/python - - -import u1db -import tempfile -import logging -import shutil -import os -import argparse -import time -import binascii -import random - - -from leap.soledad.client.sqlcipher import open as sqlcipher_open -from log_cpu_usage import LogCpuUsage -from u1dblite import open as u1dblite_open -from u1dbcipher import open as u1dbcipher_open - - -DOCS_TO_SYNC = 1000 -SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB -BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB - - -def get_data(size): - return binascii.hexlify(os.urandom(size/2)) - - -def run_test(testname, open_fun, tempdir, docs, *args): - logger.info('Starting test \"%s\".' % testname) - - # instantiate dbs - db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) - db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) - - # get sync target and synchsonizer - target = db2.get_sync_target() - synchronizer = u1db.sync.Synchronizer(db1, target) - - - # generate lots of small documents - logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) - for content in docs: - db1.create_doc(content) - logger.info('%d documents created in source db.' % DOCS_TO_SYNC) - - # run the test - filename = testname + '.txt' - logger.info('Logging CPU usage to %s.' % filename) - log_cpu = LogCpuUsage(filename) - tstart = time.time() - - # start logging cpu - log_cpu.start() - logger.info('Sleeping for 5 seconds...') - time.sleep(5) - - # sync - logger.info('Starting sync...') - sstart = time.time() - synchronizer.sync() - send = time.time() - logger.info('Sync finished.') - - # stop logging cpu - logger.info('Sleeping for 5 seconds...') - time.sleep(5) - tend = time.time() - log_cpu.stop() - - # report - logger.info('Total sync time: %f seconds' % (send - sstart)) - logger.info('Total test time: %f seconds' % (tend - tstart)) - logger.info('Finished test \"%s\".' % testname) - - # close dbs - db1.close() - db2.close() - - -if __name__ == '__main__': - - # configure logger - logger = logging.getLogger(__name__) - LOG_FORMAT = '%(asctime)s %(message)s' - logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) - - - # get a temporary dir - tempdir = tempfile.mkdtemp() - logger.info('Using temporary directory %s' % tempdir) - - - # create a lot of documents with random sizes - docs = [] - for i in xrange(DOCS_TO_SYNC): - docs.append({ - 'index': i, - #'data': get_data( - # random.randrange( - # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) - }) - - # run tests - run_test('sqlite', u1db.open, tempdir, docs, True) - run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) - run_test('u1dblite', u1dblite_open, tempdir, docs) - run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) - - # remove temporary dir - logger.info('Removing temporary directory %s' % tempdir) - shutil.rmtree(tempdir) diff --git a/scripts/ddocs/update_design_docs.py b/scripts/ddocs/update_design_docs.py new file mode 100644 index 00000000..e7b5a29c --- /dev/null +++ b/scripts/ddocs/update_design_docs.py @@ -0,0 +1,147 @@ +#!/usr/bin/python + +# This script updates Soledad's design documents in the session database and +# all user databases with contents from the installed leap.soledad.common +# package. + +import json +import logging +import argparse +import re +import threading +import binascii + + +from getpass import getpass +from ConfigParser import ConfigParser +from couchdb.client import Server +from couchdb.http import Resource, Session +from datetime import datetime +from urlparse import urlparse + + +from leap.soledad.common import ddocs + + +# parse command line for the log file name +logger_fname = "/tmp/update-design-docs_%s.log" % \ + str(datetime.now()).replace(' ', '_') +parser = argparse.ArgumentParser() +parser.add_argument('--log', action='store', default=logger_fname, type=str, + required=False, help='the name of the log file', nargs=1) +args = parser.parse_args() + + +# configure the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +print "Logging to %s." % args.log +logging.basicConfig( + filename=args.log, + format="%(asctime)-15s %(message)s") + + +# configure threads +max_threads = 20 +semaphore_pool = threading.BoundedSemaphore(value=max_threads) +threads = [] + +# get couch url +cp = ConfigParser() +cp.read('/etc/leap/soledad-server.conf') +url = urlparse(cp.get('soledad-server', 'couch_url')) + +# get admin password +netloc = re.sub('^.*@', '', url.netloc) +url = url._replace(netloc=netloc) +password = getpass("Admin password for %s: " % url.geturl()) +url = url._replace(netloc='admin:%s@%s' % (password, netloc)) + +resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10)) +server = Server(url=resource) + +hidden_url = re.sub( + 'http://(.*):.*@', + 'http://\\1:xxxxx@', + url.geturl()) + +print """ +========== +ATTENTION! +========== + +This script will modify Soledad's shared and user databases in: + + %s + +This script does not make a backup of the couch db data, so make sure you +have a copy or you may loose data. +""" % hidden_url +confirm = raw_input("Proceed (type uppercase YES)? ") + +if confirm != "YES": + exit(1) + +# convert design doc content + +design_docs = { + '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)), + '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)), + '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)), +} + +# +# Thread +# + +class DBWorkerThread(threading.Thread): + + def __init__(self, server, dbname, db_idx, db_len, release_fun): + threading.Thread.__init__(self) + self._dbname = dbname + self._cdb = server[self._dbname] + self._db_idx = db_idx + self._db_len = db_len + self._release_fun = release_fun + + def run(self): + + logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len, + self._dbname)) + + for doc_id in design_docs: + doc = self._cdb[doc_id] + for key in ['lists', 'views', 'updates']: + if key in design_docs[doc_id]: + doc[key] = design_docs[doc_id][key] + self._cdb.save(doc) + + # release the semaphore + self._release_fun() + + +db_idx = 0 +db_len = len(server) +for dbname in server: + + db_idx += 1 + + if not (dbname.startswith('user-') or dbname == 'shared') \ + or dbname == 'user-test-db': + logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) + continue + + + # get access to couch db + cdb = Server(url.geturl())[dbname] + + #--------------------------------------------------------------------- + # Start DB worker thread + #--------------------------------------------------------------------- + semaphore_pool.acquire() + thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release) + thread.daemon = True + thread.start() + threads.append(thread) + +map(lambda thread: thread.join(), threads) diff --git a/scripts/doc_put_memory_usage/find_max_upload_size.py b/scripts/doc_put_memory_usage/find_max_upload_size.py deleted file mode 100755 index 02c68015..00000000 --- a/scripts/doc_put_memory_usage/find_max_upload_size.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/python - -# This script finds the maximum upload size for a document in the current -# server. It pulls couch URL from Soledad config file and attempts multiple -# PUTs until it finds the maximum size supported by the server. -# -# As the Soledad couch user is not an admin, you have to pass a database into -# which the test will be run. The database should already exist and be -# initialized with soledad design documents. -# -# Use it like this: -# -# ./find_max_upload_size.py -# ./find_max_upload_size.py -h - -import os -import configparser -import logging -import argparse -import random -import string -import binascii -import json -import time -import uuid - - -from couchdb.client import Database -from socket import error as socket_error -from leap.soledad.common.couch import CouchDatabase - - -SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' -PREFIX = '/tmp/soledad_test' -LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' -RETRIES = 3 # number of times to retry uploading a document of a certain - # size after a failure - - -# configure logger -logger = logging.getLogger(__name__) - - -def config_log(level): - logging.basicConfig(format=LOG_FORMAT, level=level) - - -def log_to_file(filename): - handler = logging.FileHandler(filename, mode='a') - handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) - logger.addHandler(handler) - - -# create test dir -if not os.path.exists(PREFIX): - os.mkdir(PREFIX) - - -def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): - config = configparser.ConfigParser() - config.read(config_file) - return config['soledad-server']['couch_url'] - - -# generate or load an uploadable doc with the given size in mb -def get_content(size): - fname = os.path.join(PREFIX, 'content-%d.json' % size) - if os.path.exists(fname): - logger.debug('Loading content with %d MB...' % size) - with open(fname, 'r') as f: - return f.read() - else: - length = int(size * 1024 ** 2) - logger.debug('Generating body with %d MB...' % size) - content = binascii.hexlify(os.urandom(length))[:length] - with open(fname, 'w') as f: - f.write(content) - return content - - -def delete_doc(db): - doc = db.get('largedoc') - db.delete(doc) - - -def upload(db, size, couch_db): - # try many times to be sure that size is infeasible - for i in range(RETRIES): - # wait until server is up to upload - while True: - try: - 'largedoc' in couch_db - break - except socket_error: - logger.debug('Waiting for server to come up...') - time.sleep(1) - # attempt to upload - try: - logger.debug( - 'Trying to upload %d MB document (attempt %d/%d)...' % - (size, (i+1), RETRIES)) - content = get_content(size) - logger.debug('Starting upload of %d bytes.' % len(content)) - doc = db.create_doc({'data': content}, doc_id='largedoc') - delete_doc(couch_db) - logger.debug('Success uploading %d MB doc.' % size) - return True - except Exception as e: - logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) - return False - - -def find_max_upload_size(db_uri): - db = CouchDatabase.open_database(db_uri, False) - couch_db = Database(db_uri) - logger.debug('Database URI: %s' % db_uri) - # delete eventual leftover from last run - if 'largedoc' in couch_db: - delete_doc(couch_db) - # phase 1: increase upload size exponentially - logger.info('Starting phase 1: increasing size exponentially.') - size = 1 - #import ipdb; ipdb.set_trace() - while True: - if upload(db, size, couch_db): - size *= 2 - else: - break - - # phase 2: binary search for maximum value - unable = size - able = size / 2 - logger.info('Starting phase 2: binary search for maximum value.') - while unable - able > 1: - size = able + ((unable - able) / 2) - if upload(db, size, couch_db): - able = size - else: - unable = size - return able - - -if __name__ == '__main__': - # parse command line - parser = argparse.ArgumentParser() - parser.add_argument( - '-d', action='store_true', dest='debug', - help='print debugging information') - parser.add_argument( - '-l', dest='logfile', - help='log output to file') - parser.add_argument( - 'db_uri', help='the couch database URI to test') - args = parser.parse_args() - - # log to file - if args.logfile is not None: - log_to_file(args.logfile) - - # set loglevel - if args.debug is True: - config_log(logging.DEBUG) - else: - config_log(logging.INFO) - - # run test and report - logger.info('Will test using db at %s.' % args.db_uri) - maxsize = find_max_upload_size(args.db_uri) - logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/doc_put_memory_usage/get-mem.py b/scripts/doc_put_memory_usage/get-mem.py deleted file mode 100755 index d64875fc..00000000 --- a/scripts/doc_put_memory_usage/get-mem.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/python - - -import psutil -import time - - -delta = 50 * 60 -start = time.time() - -while True: - now = time.time() - print "%s %s" % (now - start, psutil.phymem_usage().used) - time.sleep(0.1) - if now > start + delta: - break diff --git a/scripts/doc_put_memory_usage/plot-mem.py b/scripts/doc_put_memory_usage/plot-mem.py deleted file mode 100755 index e24679a2..00000000 --- a/scripts/doc_put_memory_usage/plot-mem.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/python - - -from matplotlib import pyplot as plt - - -files = [ - ('local', 'couchdb-json', 'b'), - ('local', 'bigcouch-json', 'r'), - ('local', 'couchdb-multipart', 'g'), - ('local', 'bigcouch-multipart', 'm'), -] - - -# config the plot -plt.xlabel('time') -plt.ylabel('memory usage') -plt.title('bigcouch versus couch memory usage') - - -for fi in files: - - machine = fi[0] - database = fi[1] - color = fi[2] - filename = '%s-%s.txt' % (machine, database) - - x = [] - y = [] - - xmax = None - xmin = None - ymax = None - ymin = None - - # read data from file - with open(filename, 'r') as f: - line = f.readline() - while line is not None: - time, mem = tuple(line.strip().split(' ')) - mem = float(mem) / (10**9) - x.append(float(time)) - y.append(mem) - if ymax == None or mem > ymax: - ymax = mem - xmax = time - if ymin == None or mem < ymin: - ymin = mem - xmin = time - line = f.readline() - if line == '': - break - - kwargs = { - 'linewidth': 1.0, - 'linestyle': '-', - # 'marker': '.', - 'color': color, - } - plt.plot(x, y, label=database, **kwargs) - - #plt.axes().get_xaxis().set_ticks(x) - #plt.axes().get_xaxis().set_ticklabels(x) - - # annotate max and min values - #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) - #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) - - -plt.grid() -plt.legend() -plt.show() - diff --git a/scripts/profiling/backends_cpu_usage/log_cpu_usage.py b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py new file mode 100755 index 00000000..2674e1ff --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + + +# Get the CPU usage and print to file. + + +import psutil +import time +import argparse +import os +import threading + + +class LogCpuUsage(threading.Thread): + + def __init__(self, fname): + threading.Thread.__init__(self) + self._stopped = True + self._fname = fname + + def run(self): + self._stopped = False + with open(self._fname, 'w') as f: + start = time.time() + while self._stopped is False: + now = time.time() + f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) + time.sleep(0.01) + + def stop(self): + self._stopped = True + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('file', help='where to save output') + args = parser.parse_args() + + if os.path.isfile(args.file): + replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) + if replace.lower() != 'y': + print 'Bailing out.' + exit(1) + + log_cpu = LogCpuUsage(args.file) + log_cpu.run() diff --git a/scripts/profiling/backends_cpu_usage/movingaverage.py b/scripts/profiling/backends_cpu_usage/movingaverage.py new file mode 100644 index 00000000..bac1b3e1 --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/movingaverage.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +# Sean Reifschneider, tummy.com, ltd. +# Released into the Public Domain, 2011-02-06 + +import itertools +from itertools import islice +from collections import deque + + +######################################################### +def movingaverage(data, subset_size, data_is_list = None, + avoid_fp_drift = True): + '''Return the moving averages of the data, with a window size of + `subset_size`. `subset_size` must be an integer greater than 0 and + less than the length of the input data, or a ValueError will be raised. + + `data_is_list` can be used to tune the algorithm for list or iteratable + as an input. The default value, `None` will auto-detect this. + The algorithm used if `data` is a list is almost twice as fast as if + it is an iteratable. + + `avoid_fp_drift`, if True (the default) sums every sub-set rather than + keeping a "rolling sum" (which may be subject to floating-point drift). + While more correct, it is also dramatically slower for subset sizes + much larger than 20. + + NOTE: You really should consider setting `avoid_fp_drift = False` unless + you are dealing with very small numbers (say, far smaller than 0.00001) + or require extreme accuracy at the cost of execution time. For + `subset_size` < 20, the performance difference is very small. + ''' + if subset_size < 1: + raise ValueError('subset_size must be 1 or larger') + + if data_is_list is None: + data_is_list = hasattr(data, '__getslice__') + + divisor = float(subset_size) + if data_is_list: + # This only works if we can re-access old elements, but is much faster. + # In other words, it can't be just an iterable, it needs to be a list. + + if subset_size > len(data): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + for x in range(subset_size, len(data) + 1): + yield sum(data[x - subset_size:x]) / divisor + else: + cur = sum(data[0:subset_size]) + yield cur / divisor + for x in range(subset_size, len(data)): + cur += data[x] - data[x - subset_size] + yield cur / divisor + else: + # Based on the recipe at: + # http://docs.python.org/library/collections.html#deque-recipes + it = iter(data) + d = deque(islice(it, subset_size)) + + if subset_size > len(d): + raise ValueError('subset_size must be smaller than data set size') + + if avoid_fp_drift: + yield sum(d) / divisor + for elem in it: + d.popleft() + d.append(elem) + yield sum(d) / divisor + else: + s = sum(d) + yield s / divisor + for elem in it: + s += elem - d.popleft() + d.append(elem) + yield s / divisor + + +########################## +if __name__ == '__main__': + import unittest + + class TestMovingAverage(unittest.TestCase): + #################### + def test_List(self): + try: + list(movingaverage([1,2,3], 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True)), [40.0,42.0,45.0,43.0]) + + + ###################### + def test_XRange(self): + try: + list(movingaverage(xrange(1, 4), 0)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) + + + ########################### + def test_ListRolling(self): + try: + list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, + avoid_fp_drift = False)), + [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, + avoid_fp_drift = False)), [3.5]) + + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], + 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + + + ############################# + def test_XRangeRolling(self): + try: + list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size=0') + except ValueError: + pass + + try: + list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) + self.fail('Did not raise ValueError on subset_size > len(data)') + except ValueError: + pass + + self.assertEqual(list(movingaverage(xrange(1, 7), 1, + avoid_fp_drift = False)), [1,2,3,4,5,6]) + self.assertEqual(list(movingaverage(xrange(1, 7), 2, + avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), + 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 3, + avoid_fp_drift = False)), [2,3,4,5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 4, + avoid_fp_drift = False)), [2.5,3.5,4.5]) + self.assertEqual(list(movingaverage(xrange(1, 7), 5, + avoid_fp_drift = False)), [3,4]) + self.assertEqual(list(movingaverage(xrange(1, 7), 6, + avoid_fp_drift = False)), [3.5]) + + + ###################################################################### + suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) + unittest.TextTestRunner(verbosity = 2).run(suite) + diff --git a/scripts/profiling/backends_cpu_usage/plot.py b/scripts/profiling/backends_cpu_usage/plot.py new file mode 100755 index 00000000..4e5083ad --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/plot.py @@ -0,0 +1,81 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt +from movingaverage import movingaverage + + +def smooth(l): + return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) + + +files = [ + ('sqlite', 'b'), + ('sqlcipher', 'r'), + ('u1dblite', 'g'), + ('u1dbcipher', 'm'), +] + + +# config the plot +plt.xlabel('time (s)') +plt.ylabel('cpu usage (%)') +plt.title('u1db backends CPU usage') + + +for fi in files: + + backend = fi[0] + color = fi[1] + filename = '%s.txt' % backend + + x = [] + y = [] + + xmax = None + xmin = None + ymax = None + ymin = None + + # read data from file + with open(filename, 'r') as f: + line = f.readline() + while line is not None: + time, cpu = tuple(line.strip().split(' ')) + cpu = float(cpu) + x.append(float(time)) + y.append(cpu) + if ymax == None or cpu > ymax: + ymax = cpu + xmax = time + if ymin == None or cpu < ymin: + ymin = cpu + xmin = time + line = f.readline() + if line == '': + break + + kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', + # 'marker': '.', + 'color': color, + } + plt.plot( + [n for n in smooth(x)], + [n for n in smooth(y)], + label=backend, **kwargs) + + #plt.axes().get_xaxis().set_ticks(x) + #plt.axes().get_xaxis().set_ticklabels(x) + + # annotate max and min values + #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) + #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.ylim(0, 100) +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/profiling/backends_cpu_usage/test_u1db_sync.py b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py new file mode 100755 index 00000000..26ef8f9f --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py @@ -0,0 +1,113 @@ +#!/usr/bin/python + + +import u1db +import tempfile +import logging +import shutil +import os +import argparse +import time +import binascii +import random + + +from leap.soledad.client.sqlcipher import open as sqlcipher_open +from log_cpu_usage import LogCpuUsage +from u1dblite import open as u1dblite_open +from u1dbcipher import open as u1dbcipher_open + + +DOCS_TO_SYNC = 1000 +SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB +BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB + + +def get_data(size): + return binascii.hexlify(os.urandom(size/2)) + + +def run_test(testname, open_fun, tempdir, docs, *args): + logger.info('Starting test \"%s\".' % testname) + + # instantiate dbs + db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) + db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) + + # get sync target and synchsonizer + target = db2.get_sync_target() + synchronizer = u1db.sync.Synchronizer(db1, target) + + + # generate lots of small documents + logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) + for content in docs: + db1.create_doc(content) + logger.info('%d documents created in source db.' % DOCS_TO_SYNC) + + # run the test + filename = testname + '.txt' + logger.info('Logging CPU usage to %s.' % filename) + log_cpu = LogCpuUsage(filename) + tstart = time.time() + + # start logging cpu + log_cpu.start() + logger.info('Sleeping for 5 seconds...') + time.sleep(5) + + # sync + logger.info('Starting sync...') + sstart = time.time() + synchronizer.sync() + send = time.time() + logger.info('Sync finished.') + + # stop logging cpu + logger.info('Sleeping for 5 seconds...') + time.sleep(5) + tend = time.time() + log_cpu.stop() + + # report + logger.info('Total sync time: %f seconds' % (send - sstart)) + logger.info('Total test time: %f seconds' % (tend - tstart)) + logger.info('Finished test \"%s\".' % testname) + + # close dbs + db1.close() + db2.close() + + +if __name__ == '__main__': + + # configure logger + logger = logging.getLogger(__name__) + LOG_FORMAT = '%(asctime)s %(message)s' + logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + + # get a temporary dir + tempdir = tempfile.mkdtemp() + logger.info('Using temporary directory %s' % tempdir) + + + # create a lot of documents with random sizes + docs = [] + for i in xrange(DOCS_TO_SYNC): + docs.append({ + 'index': i, + #'data': get_data( + # random.randrange( + # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) + }) + + # run tests + run_test('sqlite', u1db.open, tempdir, docs, True) + run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) + run_test('u1dblite', u1dblite_open, tempdir, docs) + run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) + + # remove temporary dir + logger.info('Removing temporary directory %s' % tempdir) + shutil.rmtree(tempdir) diff --git a/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py b/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py new file mode 100755 index 00000000..02c68015 --- /dev/null +++ b/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py @@ -0,0 +1,169 @@ +#!/usr/bin/python + +# This script finds the maximum upload size for a document in the current +# server. It pulls couch URL from Soledad config file and attempts multiple +# PUTs until it finds the maximum size supported by the server. +# +# As the Soledad couch user is not an admin, you have to pass a database into +# which the test will be run. The database should already exist and be +# initialized with soledad design documents. +# +# Use it like this: +# +# ./find_max_upload_size.py +# ./find_max_upload_size.py -h + +import os +import configparser +import logging +import argparse +import random +import string +import binascii +import json +import time +import uuid + + +from couchdb.client import Database +from socket import error as socket_error +from leap.soledad.common.couch import CouchDatabase + + +SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf' +PREFIX = '/tmp/soledad_test' +LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' +RETRIES = 3 # number of times to retry uploading a document of a certain + # size after a failure + + +# configure logger +logger = logging.getLogger(__name__) + + +def config_log(level): + logging.basicConfig(format=LOG_FORMAT, level=level) + + +def log_to_file(filename): + handler = logging.FileHandler(filename, mode='a') + handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) + logger.addHandler(handler) + + +# create test dir +if not os.path.exists(PREFIX): + os.mkdir(PREFIX) + + +def get_couch_url(config_file=SOLEDAD_CONFIG_FILE): + config = configparser.ConfigParser() + config.read(config_file) + return config['soledad-server']['couch_url'] + + +# generate or load an uploadable doc with the given size in mb +def get_content(size): + fname = os.path.join(PREFIX, 'content-%d.json' % size) + if os.path.exists(fname): + logger.debug('Loading content with %d MB...' % size) + with open(fname, 'r') as f: + return f.read() + else: + length = int(size * 1024 ** 2) + logger.debug('Generating body with %d MB...' % size) + content = binascii.hexlify(os.urandom(length))[:length] + with open(fname, 'w') as f: + f.write(content) + return content + + +def delete_doc(db): + doc = db.get('largedoc') + db.delete(doc) + + +def upload(db, size, couch_db): + # try many times to be sure that size is infeasible + for i in range(RETRIES): + # wait until server is up to upload + while True: + try: + 'largedoc' in couch_db + break + except socket_error: + logger.debug('Waiting for server to come up...') + time.sleep(1) + # attempt to upload + try: + logger.debug( + 'Trying to upload %d MB document (attempt %d/%d)...' % + (size, (i+1), RETRIES)) + content = get_content(size) + logger.debug('Starting upload of %d bytes.' % len(content)) + doc = db.create_doc({'data': content}, doc_id='largedoc') + delete_doc(couch_db) + logger.debug('Success uploading %d MB doc.' % size) + return True + except Exception as e: + logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) + return False + + +def find_max_upload_size(db_uri): + db = CouchDatabase.open_database(db_uri, False) + couch_db = Database(db_uri) + logger.debug('Database URI: %s' % db_uri) + # delete eventual leftover from last run + if 'largedoc' in couch_db: + delete_doc(couch_db) + # phase 1: increase upload size exponentially + logger.info('Starting phase 1: increasing size exponentially.') + size = 1 + #import ipdb; ipdb.set_trace() + while True: + if upload(db, size, couch_db): + size *= 2 + else: + break + + # phase 2: binary search for maximum value + unable = size + able = size / 2 + logger.info('Starting phase 2: binary search for maximum value.') + while unable - able > 1: + size = able + ((unable - able) / 2) + if upload(db, size, couch_db): + able = size + else: + unable = size + return able + + +if __name__ == '__main__': + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + '-d', action='store_true', dest='debug', + help='print debugging information') + parser.add_argument( + '-l', dest='logfile', + help='log output to file') + parser.add_argument( + 'db_uri', help='the couch database URI to test') + args = parser.parse_args() + + # log to file + if args.logfile is not None: + log_to_file(args.logfile) + + # set loglevel + if args.debug is True: + config_log(logging.DEBUG) + else: + config_log(logging.INFO) + + # run test and report + logger.info('Will test using db at %s.' % args.db_uri) + maxsize = find_max_upload_size(args.db_uri) + logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/profiling/doc_put_memory_usage/get-mem.py b/scripts/profiling/doc_put_memory_usage/get-mem.py new file mode 100755 index 00000000..d64875fc --- /dev/null +++ b/scripts/profiling/doc_put_memory_usage/get-mem.py @@ -0,0 +1,16 @@ +#!/usr/bin/python + + +import psutil +import time + + +delta = 50 * 60 +start = time.time() + +while True: + now = time.time() + print "%s %s" % (now - start, psutil.phymem_usage().used) + time.sleep(0.1) + if now > start + delta: + break diff --git a/scripts/profiling/doc_put_memory_usage/plot-mem.py b/scripts/profiling/doc_put_memory_usage/plot-mem.py new file mode 100755 index 00000000..e24679a2 --- /dev/null +++ b/scripts/profiling/doc_put_memory_usage/plot-mem.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt + + +files = [ + ('local', 'couchdb-json', 'b'), + ('local', 'bigcouch-json', 'r'), + ('local', 'couchdb-multipart', 'g'), + ('local', 'bigcouch-multipart', 'm'), +] + + +# config the plot +plt.xlabel('time') +plt.ylabel('memory usage') +plt.title('bigcouch versus couch memory usage') + + +for fi in files: + + machine = fi[0] + database = fi[1] + color = fi[2] + filename = '%s-%s.txt' % (machine, database) + + x = [] + y = [] + + xmax = None + xmin = None + ymax = None + ymin = None + + # read data from file + with open(filename, 'r') as f: + line = f.readline() + while line is not None: + time, mem = tuple(line.strip().split(' ')) + mem = float(mem) / (10**9) + x.append(float(time)) + y.append(mem) + if ymax == None or mem > ymax: + ymax = mem + xmax = time + if ymin == None or mem < ymin: + ymin = mem + xmin = time + line = f.readline() + if line == '': + break + + kwargs = { + 'linewidth': 1.0, + 'linestyle': '-', + # 'marker': '.', + 'color': color, + } + plt.plot(x, y, label=database, **kwargs) + + #plt.axes().get_xaxis().set_ticks(x) + #plt.axes().get_xaxis().set_ticklabels(x) + + # annotate max and min values + #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) + #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/update_design_docs.py b/scripts/update_design_docs.py deleted file mode 100644 index e7b5a29c..00000000 --- a/scripts/update_design_docs.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/python - -# This script updates Soledad's design documents in the session database and -# all user databases with contents from the installed leap.soledad.common -# package. - -import json -import logging -import argparse -import re -import threading -import binascii - - -from getpass import getpass -from ConfigParser import ConfigParser -from couchdb.client import Server -from couchdb.http import Resource, Session -from datetime import datetime -from urlparse import urlparse - - -from leap.soledad.common import ddocs - - -# parse command line for the log file name -logger_fname = "/tmp/update-design-docs_%s.log" % \ - str(datetime.now()).replace(' ', '_') -parser = argparse.ArgumentParser() -parser.add_argument('--log', action='store', default=logger_fname, type=str, - required=False, help='the name of the log file', nargs=1) -args = parser.parse_args() - - -# configure the logger -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -print "Logging to %s." % args.log -logging.basicConfig( - filename=args.log, - format="%(asctime)-15s %(message)s") - - -# configure threads -max_threads = 20 -semaphore_pool = threading.BoundedSemaphore(value=max_threads) -threads = [] - -# get couch url -cp = ConfigParser() -cp.read('/etc/leap/soledad-server.conf') -url = urlparse(cp.get('soledad-server', 'couch_url')) - -# get admin password -netloc = re.sub('^.*@', '', url.netloc) -url = url._replace(netloc=netloc) -password = getpass("Admin password for %s: " % url.geturl()) -url = url._replace(netloc='admin:%s@%s' % (password, netloc)) - -resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10)) -server = Server(url=resource) - -hidden_url = re.sub( - 'http://(.*):.*@', - 'http://\\1:xxxxx@', - url.geturl()) - -print """ -========== -ATTENTION! -========== - -This script will modify Soledad's shared and user databases in: - - %s - -This script does not make a backup of the couch db data, so make sure you -have a copy or you may loose data. -""" % hidden_url -confirm = raw_input("Proceed (type uppercase YES)? ") - -if confirm != "YES": - exit(1) - -# convert design doc content - -design_docs = { - '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)), - '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)), - '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)), -} - -# -# Thread -# - -class DBWorkerThread(threading.Thread): - - def __init__(self, server, dbname, db_idx, db_len, release_fun): - threading.Thread.__init__(self) - self._dbname = dbname - self._cdb = server[self._dbname] - self._db_idx = db_idx - self._db_len = db_len - self._release_fun = release_fun - - def run(self): - - logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len, - self._dbname)) - - for doc_id in design_docs: - doc = self._cdb[doc_id] - for key in ['lists', 'views', 'updates']: - if key in design_docs[doc_id]: - doc[key] = design_docs[doc_id][key] - self._cdb.save(doc) - - # release the semaphore - self._release_fun() - - -db_idx = 0 -db_len = len(server) -for dbname in server: - - db_idx += 1 - - if not (dbname.startswith('user-') or dbname == 'shared') \ - or dbname == 'user-test-db': - logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname)) - continue - - - # get access to couch db - cdb = Server(url.geturl())[dbname] - - #--------------------------------------------------------------------- - # Start DB worker thread - #--------------------------------------------------------------------- - semaphore_pool.acquire() - thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release) - thread.daemon = True - thread.start() - threads.append(thread) - -map(lambda thread: thread.join(), threads) -- cgit v1.2.3 From 45ffe1dd22d6e902a7e8eeca786b7ad63ec1d139 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 7 Apr 2014 12:48:25 -0300 Subject: Prevent Couch Server State from making one unneeded GET request (#5386). --- .../changes/feature_5386_prevent-uneeded-get-on-couch-server-state | 2 ++ common/src/leap/soledad/common/couch.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state diff --git a/common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state b/common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state new file mode 100644 index 00000000..95919873 --- /dev/null +++ b/common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state @@ -0,0 +1,2 @@ + o Prevent Couch Server State from making one uneeded GET request on + instantiation (#5386). diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index b836c997..9fb717c2 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -1489,9 +1489,9 @@ class CouchServerState(ServerState): :return: The CouchDatabase object. :rtype: CouchDatabase """ - return CouchDatabase.open_database( - self._couch_url + '/' + dbname, - create=False, + return CouchDatabase( + self._couch_url, + dbname, ensure_ddocs=False) def ensure_database(self, dbname): -- cgit v1.2.3 From 764fb1e8060cb7169744dd6602d2550ad0c02178 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 7 Apr 2014 14:55:26 -0300 Subject: Fix class doc. --- common/src/leap/soledad/common/couch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 9fb717c2..a7828e98 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -353,7 +353,7 @@ class CouchDatabase(CommonBackend): release_fun): """ :param db: The database from where to get the document. - :type db: u1db.Database + :type db: CouchDatabase :param doc_id: The doc_id of the document to be retrieved. :type doc_id: str :param check_for_conflicts: Whether the get_doc() method should -- cgit v1.2.3 From 4500e86c944ea2c64837127f1f27f2da607a1670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Touceda?= Date: Wed, 9 Apr 2014 15:24:10 -0300 Subject: Improve changelog readability --- CHANGELOG | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a1876ef2..d3a5b9b4 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -0.4.5 Apr 4: +0.5.0 Apr 4, 2014: Client: o Catch lock timeout exception. Fixes #4435. o Add lock for create_doc and update_indexes call, prevents @@ -62,9 +62,7 @@ Server: Server application. Fixes #5302. o Enable Gzip compression on the soledad wsgi app. --- 2014 -- - -0.4.4 Dec 6: +0.4.4 Dec 6, 2013: Client: o Add MAC verirication to the recovery document and soledad.json. Closes #4348. @@ -72,14 +70,14 @@ Common: o Add unicode conversion to put_doc(). Closes #4095. o Remove tests dependency on nose2. Closes #4258. -0.4.3 Nov 15: +0.4.3 Nov 15, 2013: Client: o Defaults detected encoding to utf-8 to avoid bug if detected encoding is None. Closes: #4417 o Open db in autocommit mode, to avoid nested transactions problems. Closes: #4400 -0.4.2 Nov 1: +0.4.2 Nov 1, 2013: Client: o Support non-ascii passwords. Closes #4001. o Change error severity for missing secrets path. @@ -93,11 +91,11 @@ Common: Server: o Allow for locking the shared database. Closes #4097. -0.4.1 Oct 4: +0.4.1 Oct 4, 2013: Client: o Save only UTF8 strings. Related to #3660. -0.4.0 Sep 20: +0.4.0 Sep 20, 2013: Client: o Remove redundant logging when creating data dirs. Server: @@ -107,14 +105,14 @@ Common: o Turn couchdb dependency for common into optional. Closes #2167. o Add verification for couch permissions. Closes #3501. -0.3.2 Sep 6: +0.3.2 Sep 6, 2013: Client: o Use dirspec instead of plain xdg. Closes #3574. Server: o Fix the init script for soledad server so that it uses the new package namespace. -0.3.1 Aug 23: +0.3.1 Aug 23, 2013: Client: o Add libsqlite3-dev requirement for soledad. o Check for None in private methods that depend on _db. Closes: @@ -134,7 +132,7 @@ Common: #3487. o Add versioneer, parse_requirements -0.3.0 Aug 9: +0.3.0 Aug 9, 2013: Client: o Thread safe wrapper for pysqlcipher. o Fix a couple of typos that prevented certain functionality to @@ -143,7 +141,7 @@ Server: o A plaintext port is not opened by soledad server initscript call to twistd web anymore. Closes #3254. -0.2.3 Jul 26: +0.2.3 Jul 26, 2013: Client: o Avoid possible timing attack in document's mac comparison by comparing hashes instead of plain macs. Closes #3243. @@ -154,13 +152,13 @@ Server: pointing the PRIVKEY_PATH to /etc/leap/soledad-server.key. Fixes #3174. -0.2.2 Jul 12: +0.2.2 Jul 12, 2013: Client: o Add method for password change. Server: o Use the right name as the WSGI server -0.2.1 Jun 28: +0.2.1 Jun 28, 2013: Client: o Do not list the backends in the __init__'s __all__ to allow not supporting couch on the client side until the code is diveded into -- cgit v1.2.3 From 11757fb0d071b753819a04d8504a72baed80db2f Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 8 Apr 2014 11:20:19 -0300 Subject: Prevent couch instantiation from creating the db (#5386). --- ..._prevent-couch-backend-from-trying-to-create-db | 1 + common/src/leap/soledad/common/couch.py | 36 +++++------ common/src/leap/soledad/common/tests/test_couch.py | 69 ++++++++++++++-------- .../tests/test_couch_operations_atomicity.py | 11 +++- .../src/leap/soledad/common/tests/test_server.py | 58 +++++++++--------- 5 files changed, 100 insertions(+), 75 deletions(-) create mode 100644 common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db diff --git a/common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db b/common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db new file mode 100644 index 00000000..9bbac329 --- /dev/null +++ b/common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db @@ -0,0 +1 @@ + o Prevent couch backend from always trying to create the database (#5386). diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index a7828e98..8ed704ba 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -31,9 +31,10 @@ import threading from StringIO import StringIO from collections import defaultdict +from urlparse import urljoin -from couchdb.client import Server +from couchdb.client import Server, Database from couchdb.http import ( ResourceConflict, ResourceNotFound, @@ -380,7 +381,7 @@ class CouchDatabase(CommonBackend): self._release_fun() @classmethod - def open_database(cls, url, create, ensure_ddocs=False): + def open_database(cls, url, create, replica_uid=None, ensure_ddocs=False): """ Open a U1DB database using CouchDB as backend. @@ -388,6 +389,8 @@ class CouchDatabase(CommonBackend): :type url: str :param create: should the replica be created if it does not exist? :type create: bool + :param replica_uid: an optional unique replica identifier + :type replica_uid: str :param ensure_ddocs: Ensure that the design docs exist on server. :type ensure_ddocs: bool @@ -406,10 +409,11 @@ class CouchDatabase(CommonBackend): except ResourceNotFound: if not create: raise DatabaseDoesNotExist() - return cls(url, dbname, ensure_ddocs=ensure_ddocs) + server.create(dbname) + return cls(url, dbname, replica_uid=replica_uid, ensure_ddocs=ensure_ddocs) - def __init__(self, url, dbname, replica_uid=None, full_commit=True, - session=None, ensure_ddocs=True): + def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True, + session=None): """ Create a new Couch data container. @@ -419,31 +423,23 @@ class CouchDatabase(CommonBackend): :type dbname: str :param replica_uid: an optional unique replica identifier :type replica_uid: str - :param full_commit: turn on the X-Couch-Full-Commit header - :type full_commit: bool - :param session: an http.Session instance or None for a default session - :type session: http.Session :param ensure_ddocs: Ensure that the design docs exist on server. :type ensure_ddocs: bool + :param session: an http.Session instance or None for a default session + :type session: http.Session """ # save params self._url = url - self._full_commit = full_commit if session is None: session = Session(timeout=COUCH_TIMEOUT) self._session = session self._factory = CouchDocument self._real_replica_uid = None # configure couch - self._server = Server(url=self._url, - full_commit=self._full_commit, - session=self._session) self._dbname = dbname - try: - self._database = self._server[self._dbname] - except ResourceNotFound: - self._server.create(self._dbname) - self._database = self._server[self._dbname] + self._database = Database( + urljoin(self._url, self._dbname), + self._session) if replica_uid is not None: self._set_replica_uid(replica_uid) if ensure_ddocs: @@ -482,7 +478,8 @@ class CouchDatabase(CommonBackend): """ Delete a U1DB CouchDB database. """ - del(self._server[self._dbname]) + server = Server(url=self._url) + del(server[self._dbname]) def close(self): """ @@ -494,7 +491,6 @@ class CouchDatabase(CommonBackend): self._url = None self._full_commit = None self._session = None - self._server = None self._database = None return True diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index 86bb4b93..77c46e61 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -25,6 +25,7 @@ import copy import shutil from base64 import b64decode from mock import Mock +from urlparse import urljoin from couchdb.client import Server from u1db import errors as u1db_errors @@ -151,8 +152,11 @@ class CouchDBTestCase(unittest.TestCase): class TestCouchBackendImpl(CouchDBTestCase): def test__allocate_doc_id(self): - db = couch.CouchDatabase('http://localhost:' + str(self.wrapper.port), - 'u1db_tests', ensure_ddocs=True) + db = couch.CouchDatabase.open_database( + urljoin( + 'http://localhost:' + str(self.wrapper.port), 'u1db_tests'), + create=True, + ensure_ddocs=True) doc_id1 = db._allocate_doc_id() self.assertTrue(doc_id1.startswith('D-')) self.assertEqual(34, len(doc_id1)) @@ -166,28 +170,35 @@ class TestCouchBackendImpl(CouchDBTestCase): def make_couch_database_for_test(test, replica_uid): port = str(test.wrapper.port) - return couch.CouchDatabase('http://localhost:' + port, replica_uid, - replica_uid=replica_uid or 'test', - ensure_ddocs=True) + return couch.CouchDatabase.open_database( + urljoin('http://localhost:' + port, replica_uid), + create=True, + replica_uid=replica_uid or 'test', + ensure_ddocs=True) def copy_couch_database_for_test(test, db): port = str(test.wrapper.port) couch_url = 'http://localhost:' + port new_dbname = db._replica_uid + '_copy' - new_db = couch.CouchDatabase(couch_url, - new_dbname, - replica_uid=db._replica_uid or 'test') + new_db = couch.CouchDatabase.open_database( + urljoin(couch_url, new_dbname), + create=True, + replica_uid=db._replica_uid or 'test') # copy all docs old_couch_db = Server(couch_url)[db._replica_uid] new_couch_db = Server(couch_url)[new_dbname] for doc_id in old_couch_db: doc = old_couch_db.get(doc_id) + # bypass u1db_config document + if doc_id == 'u1db_config': + pass # copy design docs - if ('u1db_rev' not in doc): + elif doc_id.startswith('_design'): + del doc['_rev'] new_couch_db.save(doc) # copy u1db docs - else: + elif 'u1db_rev' in doc: new_doc = { '_id': doc['_id'], 'u1db_transactions': doc['u1db_transactions'], @@ -228,7 +239,7 @@ class CouchTests(test_backends.AllDatabaseTests, CouchDBTestCase): def setUp(self): test_backends.AllDatabaseTests.setUp(self) # save db info because of test_close - self._server = self.db._server + self._url = self.db._url self._dbname = self.db._dbname def tearDown(self): @@ -238,7 +249,8 @@ class CouchTests(test_backends.AllDatabaseTests, CouchDBTestCase): if self.id() == \ 'leap.soledad.common.tests.test_couch.CouchTests.' \ 'test_close(couch)': - del(self._server[self._dbname]) + server = Server(url=self._url) + del(server[self._dbname]) else: self.db.delete_database() test_backends.AllDatabaseTests.tearDown(self) @@ -355,10 +367,10 @@ from u1db.backends.inmemory import InMemoryIndex class IndexedCouchDatabase(couch.CouchDatabase): - def __init__(self, url, dbname, replica_uid=None, full_commit=True, - session=None, ensure_ddocs=True): - old_class.__init__(self, url, dbname, replica_uid, full_commit, - session, ensure_ddocs=ensure_ddocs) + def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True, + session=None): + old_class.__init__(self, url, dbname, replica_uid=replica_uid, + ensure_ddocs=ensure_ddocs, session=session) self._indexes = {} def _put_doc(self, old_doc, doc): @@ -467,8 +479,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): def setUp(self): CouchDBTestCase.setUp(self) - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=False) # note that we don't enforce ddocs here def tearDown(self): @@ -509,8 +522,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): Test that all methods that access design documents list functions will raise if the functions are not present. """ - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=True) # erase views from _design/transactions transactions = self.db._database['_design/transactions'] @@ -538,8 +552,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): Test that all methods that access design documents list functions will raise if the functions are not present. """ - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=True) # erase views from _design/transactions transactions = self.db._database['_design/transactions'] @@ -567,8 +582,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): Test that all methods that access design documents' named views will raise if the views are not present. """ - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=True) # erase views from _design/docs docs = self.db._database['_design/docs'] @@ -608,8 +624,9 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): Test that all methods that access design documents will raise if the design docs are not present. """ - self.db = couch.CouchDatabase( - 'http://127.0.0.1:%d' % self.wrapper.port, 'test', + self.db = couch.CouchDatabase.open_database( + urljoin('http://127.0.0.1:%d' % self.wrapper.port, 'test'), + create=True, ensure_ddocs=True) # delete _design/docs del self.db._database['_design/docs'] diff --git a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py index 3c457cc5..3c219b91 100644 --- a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py +++ b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py @@ -24,6 +24,10 @@ import mock import tempfile import threading + +from urlparse import urljoin + + from leap.soledad.client import Soledad from leap.soledad.common.couch import CouchDatabase, CouchServerState from leap.soledad.common.tests.test_couch import CouchDBTestCase @@ -101,8 +105,11 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): TestCaseWithServer.setUp(self) CouchDBTestCase.setUp(self) self._couch_url = 'http://localhost:' + str(self.wrapper.port) - self.db = CouchDatabase( - self._couch_url, 'user-user-uuid', replica_uid='replica') + self.db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + replica_uid='replica', + ensure_ddocs=True) self.tempdir = tempfile.mkdtemp(prefix="leap_tests-") def tearDown(self): diff --git a/common/src/leap/soledad/common/tests/test_server.py b/common/src/leap/soledad/common/tests/test_server.py index f8d2a64f..6fe9211c 100644 --- a/common/src/leap/soledad/common/tests/test_server.py +++ b/common/src/leap/soledad/common/tests/test_server.py @@ -27,6 +27,7 @@ import mock import time import binascii +from urlparse import urljoin from leap.common.testing.basetest import BaseLeapTest from leap.soledad.common.couch import ( @@ -56,7 +57,8 @@ from leap.soledad.server.auth import URLToAuthorization def _couch_ensure_database(self, dbname): db = CouchDatabase.open_database( self._couch_url + '/' + dbname, - create=True) + create=True, + ensure_ddocs=True) return db, db._replica_uid CouchServerState.ensure_database = _couch_ensure_database @@ -352,11 +354,10 @@ class EncryptedSyncTestCase( self.assertEqual([], doclist) doc1 = sol1.create_doc(json.loads(simple_doc)) # ensure remote db exists before syncing - db = CouchDatabase( - self._couch_url, - # the name of the user database is "user-". - 'user-user-uuid', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + ensure_ddocs=True) # sync with server sol1._server_url = self.getURL() sol1.sync() @@ -408,11 +409,10 @@ class EncryptedSyncTestCase( self.assertEqual([], doclist) doc1 = sol1.create_doc(json.loads(simple_doc)) # ensure remote db exists before syncing - db = CouchDatabase( - self._couch_url, - # the name of the user database is "user-". - 'user-user-uuid', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + ensure_ddocs=True) # sync with server sol1._server_url = self.getURL() sol1.sync() @@ -468,11 +468,10 @@ class EncryptedSyncTestCase( content = binascii.hexlify(os.urandom(length/2)) # len() == length doc1 = sol1.create_doc({'data': content}) # ensure remote db exists before syncing - db = CouchDatabase( - self._couch_url, - # the name of the user database is "user-". - 'user-user-uuid', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + ensure_ddocs=True) # sync with server sol1._server_url = self.getURL() sol1.sync() @@ -512,11 +511,10 @@ class EncryptedSyncTestCase( for i in range(0, number_of_docs): sol1.create_doc(json.loads(simple_doc)) # ensure remote db exists before syncing - db = CouchDatabase( - self._couch_url, - # the name of the user database is "user-". - 'user-user-uuid', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'user-user-uuid'), + create=True, + ensure_ddocs=True) # sync with server sol1._server_url = self.getURL() sol1.sync() @@ -558,8 +556,14 @@ class LockResourceTestCase( self.tempdir = tempfile.mkdtemp(prefix="leap_tests-") self._couch_url = 'http://localhost:' + str(self.wrapper.port) # create the databases - CouchDatabase(self._couch_url, 'shared') - CouchDatabase(self._couch_url, 'tokens') + CouchDatabase.open_database( + urljoin(self._couch_url, 'shared'), + create=True, + ensure_ddocs=True) + CouchDatabase.open_database( + urljoin(self._couch_url, 'tokens'), + create=True, + ensure_ddocs=True) self._state = CouchServerState( self._couch_url, 'shared', 'tokens') @@ -567,10 +571,10 @@ class LockResourceTestCase( CouchDBTestCase.tearDown(self) TestCaseWithServer.tearDown(self) # delete remote database - db = CouchDatabase( - self._couch_url, - 'shared', - ) + db = CouchDatabase.open_database( + urljoin(self._couch_url, 'shared'), + create=True, + ensure_ddocs=True) db.delete_database() def test__try_obtain_filesystem_lock(self): -- cgit v1.2.3 From ae5b0b57a14e0df45e2ed708eb5c8a495530ddde Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 10 Apr 2014 13:17:37 -0300 Subject: Properly close connections on couch backend (#5493). --- ...493_properly-close-connections-on-couch-backend | 2 + common/src/leap/soledad/common/couch.py | 87 +++++++++++++++------- 2 files changed, 62 insertions(+), 27 deletions(-) create mode 100644 common/changes/bug_5493_properly-close-connections-on-couch-backend diff --git a/common/changes/bug_5493_properly-close-connections-on-couch-backend b/common/changes/bug_5493_properly-close-connections-on-couch-backend new file mode 100644 index 00000000..3cb55168 --- /dev/null +++ b/common/changes/bug_5493_properly-close-connections-on-couch-backend @@ -0,0 +1,2 @@ + o Properly close connections on couch backend. Also prevent file descriptor + leaks on tests. Closes #5493. diff --git a/common/src/leap/soledad/common/couch.py b/common/src/leap/soledad/common/couch.py index 8ed704ba..0aa84170 100644 --- a/common/src/leap/soledad/common/couch.py +++ b/common/src/leap/soledad/common/couch.py @@ -32,6 +32,7 @@ import threading from StringIO import StringIO from collections import defaultdict from urlparse import urljoin +from contextlib import contextmanager from couchdb.client import Server, Database @@ -39,7 +40,7 @@ from couchdb.http import ( ResourceConflict, ResourceNotFound, ServerError, - Session, + Session as CouchHTTPSession, ) from u1db import query_parser, vectorclock from u1db.errors import ( @@ -332,6 +333,35 @@ class MultipartWriter(object): self.headers[name] = value +class Session(CouchHTTPSession): + """ + An HTTP session that can be closed. + """ + + def close_connections(self): + for key, conns in list(self.conns.items()): + for conn in conns: + conn.close() + + +@contextmanager +def couch_server(url): + """ + Provide a connection to a couch server and cleanup after use. + + For database creation and deletion we use an ephemeral connection to the + couch server. That connection has to be properly closed, so we provide it + as a context manager. + + :param url: The URL of the Couch server. + :type url: str + """ + session = Session(timeout=COUCH_TIMEOUT) + server = Server(url=url, session=session) + yield server + session.close_connections() + + class CouchDatabase(CommonBackend): """ A U1DB implementation that uses CouchDB as its persistence layer. @@ -403,17 +433,16 @@ class CouchDatabase(CommonBackend): raise InvalidURLError url = m.group(1) dbname = m.group(2) - server = Server(url=url) - try: - server[dbname] - except ResourceNotFound: - if not create: - raise DatabaseDoesNotExist() - server.create(dbname) + with couch_server(url) as server: + try: + server[dbname] + except ResourceNotFound: + if not create: + raise DatabaseDoesNotExist() + server.create(dbname) return cls(url, dbname, replica_uid=replica_uid, ensure_ddocs=ensure_ddocs) - def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True, - session=None): + def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True): """ Create a new Couch data container. @@ -425,14 +454,10 @@ class CouchDatabase(CommonBackend): :type replica_uid: str :param ensure_ddocs: Ensure that the design docs exist on server. :type ensure_ddocs: bool - :param session: an http.Session instance or None for a default session - :type session: http.Session """ # save params self._url = url - if session is None: - session = Session(timeout=COUCH_TIMEOUT) - self._session = session + self._session = Session(timeout=COUCH_TIMEOUT) self._factory = CouchDocument self._real_replica_uid = None # configure couch @@ -478,8 +503,9 @@ class CouchDatabase(CommonBackend): """ Delete a U1DB CouchDB database. """ - server = Server(url=self._url) - del(server[self._dbname]) + with couch_server(self._url) as server: + del(server[self._dbname]) + self.close_connections() def close(self): """ @@ -488,12 +514,26 @@ class CouchDatabase(CommonBackend): :return: True if db was succesfully closed. :rtype: bool """ + self.close_connections() self._url = None self._full_commit = None self._session = None self._database = None return True + def close_connections(self): + """ + Close all open connections to the couch server. + """ + if self._session is not None: + self._session.close_connections() + + def __del__(self): + """ + Close the database upon garbage collection. + """ + self.close() + def _set_replica_uid(self, replica_uid): """ Force the replica uid to be set. @@ -851,7 +891,9 @@ class CouchDatabase(CommonBackend): try: self._database.resource.put_json( doc.doc_id, body=buf.getvalue(), headers=envelope.headers) - self._renew_couch_session() + # What follows is a workaround for an ugly bug. See: + # https://leap.se/code/issues/5448 + self.close_connections() except ResourceConflict: raise RevisionConflict() @@ -1423,15 +1465,6 @@ class CouchDatabase(CommonBackend): continue yield t._doc - def _renew_couch_session(self): - """ - Create a new couch connection session. - - This is a workaround for #5448. Will not be needed once bigcouch is - merged with couchdb. - """ - self._database.resource.session = Session(timeout=COUCH_TIMEOUT) - class CouchSyncTarget(CommonSyncTarget): """ -- cgit v1.2.3 From f20d32698576f521f2fb53de0e636d318e826729 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 10 Apr 2014 13:18:02 -0300 Subject: Prevent file descriptor leaks on tests. --- common/src/leap/soledad/common/tests/test_couch.py | 42 ++++++++------ .../tests/test_couch_operations_atomicity.py | 4 ++ .../src/leap/soledad/common/tests/test_crypto.py | 5 +- .../src/leap/soledad/common/tests/test_server.py | 12 ++++ .../src/leap/soledad/common/tests/test_soledad.py | 18 +++++- .../leap/soledad/common/tests/test_sqlcipher.py | 4 -- .../src/leap/soledad/common/tests/test_target.py | 3 +- .../soledad/common/tests/u1db_tests/__init__.py | 11 +++- .../common/tests/u1db_tests/test_backends.py | 3 + .../soledad/common/tests/u1db_tests/test_sync.py | 64 +++++++++++----------- 10 files changed, 106 insertions(+), 60 deletions(-) diff --git a/common/src/leap/soledad/common/tests/test_couch.py b/common/src/leap/soledad/common/tests/test_couch.py index 77c46e61..17d4a519 100644 --- a/common/src/leap/soledad/common/tests/test_couch.py +++ b/common/src/leap/soledad/common/tests/test_couch.py @@ -27,8 +27,8 @@ from base64 import b64decode from mock import Mock from urlparse import urljoin -from couchdb.client import Server from u1db import errors as u1db_errors +from couchdb.client import Server from leap.common.files import mkdir_p @@ -186,8 +186,9 @@ def copy_couch_database_for_test(test, db): create=True, replica_uid=db._replica_uid or 'test') # copy all docs - old_couch_db = Server(couch_url)[db._replica_uid] - new_couch_db = Server(couch_url)[new_dbname] + session = couch.Session() + old_couch_db = Server(couch_url, session=session)[db._replica_uid] + new_couch_db = Server(couch_url, session=session)[new_dbname] for doc_id in old_couch_db: doc = old_couch_db.get(doc_id) # bypass u1db_config document @@ -217,6 +218,8 @@ def copy_couch_database_for_test(test, db): if (att is not None): new_couch_db.put_attachment(new_doc, att, filename=att_name) + # cleanup connections to prevent file descriptor leaking + session.close_connections() return new_db @@ -249,8 +252,10 @@ class CouchTests(test_backends.AllDatabaseTests, CouchDBTestCase): if self.id() == \ 'leap.soledad.common.tests.test_couch.CouchTests.' \ 'test_close(couch)': - server = Server(url=self._url) + session = couch.Session() + server = Server(url=self._url, session=session) del(server[self._dbname]) + session.close_connections() else: self.db.delete_database() test_backends.AllDatabaseTests.tearDown(self) @@ -367,10 +372,9 @@ from u1db.backends.inmemory import InMemoryIndex class IndexedCouchDatabase(couch.CouchDatabase): - def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True, - session=None): + def __init__(self, url, dbname, replica_uid=None, ensure_ddocs=True): old_class.__init__(self, url, dbname, replica_uid=replica_uid, - ensure_ddocs=ensure_ddocs, session=session) + ensure_ddocs=ensure_ddocs) self._indexes = {} def _put_doc(self, old_doc, doc): @@ -459,19 +463,22 @@ class CouchDatabaseSyncTests(test_sync.DatabaseSyncTests, CouchDBTestCase): self.db1 = None self.db2 = None self.db3 = None + self.db1_copy = None + self.db2_copy = None test_sync.DatabaseSyncTests.setUp(self) def tearDown(self): - self.db and self.db.delete_database() - self.db1 and self.db1.delete_database() - self.db2 and self.db2.delete_database() - self.db3 and self.db3.delete_database() - db = self.create_database('test1_copy', 'source') - db.delete_database() - db = self.create_database('test2_copy', 'target') - db.delete_database() - db = self.create_database('test3', 'target') - db.delete_database() + for db in [self.db, self.db1, self.db2, self.db3, self.db1_copy, + self.db2_copy]: + if db is not None: + db.delete_database() + db.close() + for replica_uid, dbname in [('test1_copy', 'source'), + ('test2_copy', 'target'), ('test3', 'target')]: + db = self.create_database(replica_uid, dbname) + db.delete_database() + # cleanup connections to avoid leaking of file descriptors + db.close() test_sync.DatabaseSyncTests.tearDown(self) @@ -486,6 +493,7 @@ class CouchDatabaseExceptionsTests(CouchDBTestCase): def tearDown(self): self.db.delete_database() + self.db.close() def test_missing_design_doc_raises(self): """ diff --git a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py index 3c219b91..b03f79e7 100644 --- a/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py +++ b/common/src/leap/soledad/common/tests/test_couch_operations_atomicity.py @@ -114,6 +114,7 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): def tearDown(self): self.db.delete_database() + self.db.close() CouchDBTestCase.tearDown(self) TestCaseWithServer.tearDown(self) @@ -218,6 +219,7 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): _create_docs_and_sync(sol, 0) _create_docs_and_sync(sol, 1) + sol.close() # # Concurrency tests @@ -351,6 +353,7 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): self.assertEqual( 1, len(filter(lambda t: t[0] == doc_id, transaction_log))) + sol.close() def test_concurrent_syncs_do_not_fail(self): """ @@ -394,3 +397,4 @@ class CouchAtomicityTestCase(CouchDBTestCase, TestCaseWithServer): self.assertEqual( 1, len(filter(lambda t: t[0] == doc_id, transaction_log))) + sol.close() diff --git a/common/src/leap/soledad/common/tests/test_crypto.py b/common/src/leap/soledad/common/tests/test_crypto.py index af11bc76..4b2470ba 100644 --- a/common/src/leap/soledad/common/tests/test_crypto.py +++ b/common/src/leap/soledad/common/tests/test_crypto.py @@ -44,7 +44,6 @@ from leap.soledad.common.crypto import WrongMac, UnknownMacMethod from leap.soledad.common.tests.u1db_tests import ( simple_doc, nested_doc, - TestCaseWithServer, ) @@ -95,6 +94,7 @@ class RecoveryDocumentTestCase(BaseSoledadTest): self.assertEqual(self._soledad._get_storage_secret(), s._get_storage_secret(), 'Failed settinng secret for symmetric encryption.') + s.close() class SoledadSecretsTestCase(BaseSoledadTest): @@ -110,6 +110,7 @@ class SoledadSecretsTestCase(BaseSoledadTest): # generate new secret secret_id_2 = sol._gen_secret() self.assertTrue(secret_id_1 != secret_id_2) + sol.close() # re-instantiate sol = self._soledad_instance( user='user@leap.se', @@ -130,6 +131,7 @@ class SoledadSecretsTestCase(BaseSoledadTest): # assert id is hash of new secret self.assertTrue( secret_id_2 == hashlib.sha256(sol.storage_secret).hexdigest()) + sol.close() def test__has_secret(self): sol = self._soledad_instance( @@ -144,6 +146,7 @@ class SoledadSecretsTestCase(BaseSoledadTest): # but not being able to decrypt correctly should sol._secrets[sol.secret_id][sol.SECRET_KEY] = None self.assertFalse(sol._has_secret()) + sol.close() class MacAuthTestCase(BaseSoledadTest): diff --git a/common/src/leap/soledad/common/tests/test_server.py b/common/src/leap/soledad/common/tests/test_server.py index 6fe9211c..1c5a7407 100644 --- a/common/src/leap/soledad/common/tests/test_server.py +++ b/common/src/leap/soledad/common/tests/test_server.py @@ -391,6 +391,9 @@ class EncryptedSyncTestCase( # assert incoming doc is equal to the first sent doc self.assertEqual(doc1, doc2) db.delete_database() + db.close() + sol1.close() + sol2.close() def test_encrypted_sym_sync_with_unicode_passphrase(self): """ @@ -450,6 +453,9 @@ class EncryptedSyncTestCase( # assert incoming doc is equal to the first sent doc self.assertEqual(doc1, doc2) db.delete_database() + db.close() + sol1.close() + sol2.close() def test_sync_very_large_files(self): """ @@ -492,6 +498,9 @@ class EncryptedSyncTestCase( self.assertEqual(doc1, doc2) # delete remote database db.delete_database() + db.close() + sol1.close() + sol2.close() def test_sync_many_small_files(self): @@ -535,6 +544,9 @@ class EncryptedSyncTestCase( self.assertEqual(sol1.get_doc(doc.doc_id), doc) # delete remote database db.delete_database() + db.close() + sol1.close() + sol2.close() class LockResourceTestCase( CouchDBTestCase, TestCaseWithServer): diff --git a/common/src/leap/soledad/common/tests/test_soledad.py b/common/src/leap/soledad/common/tests/test_soledad.py index 035c5ac5..5a3bf2b0 100644 --- a/common/src/leap/soledad/common/tests/test_soledad.py +++ b/common/src/leap/soledad/common/tests/test_soledad.py @@ -49,6 +49,7 @@ class AuxMethodsTestCase(BaseSoledadTest): secrets_path = os.path.dirname(sol.secrets_path) self.assertTrue(os.path.isdir(local_db_dir)) self.assertTrue(os.path.isdir(secrets_path)) + sol.close() def test__init_db(self): sol = self._soledad_instance() @@ -61,6 +62,7 @@ class AuxMethodsTestCase(BaseSoledadTest): sol._init_db() from leap.soledad.client.sqlcipher import SQLCipherDatabase self.assertIsInstance(sol._db, SQLCipherDatabase) + sol.close() def test__init_config_defaults(self): """ @@ -103,6 +105,7 @@ class AuxMethodsTestCase(BaseSoledadTest): os.path.join(self.tempdir, 'value_2'), sol.local_db_path) self.assertEqual('value_1', sol.server_url) + sol.close() def test_change_passphrase(self): """ @@ -118,6 +121,7 @@ class AuxMethodsTestCase(BaseSoledadTest): # change the passphrase sol.change_passphrase(u'654321') + sol.close() self.assertRaises( WrongMac, @@ -132,6 +136,7 @@ class AuxMethodsTestCase(BaseSoledadTest): prefix=self.rand_prefix) doc2 = sol2.get_doc(doc_id) self.assertEqual(doc, doc2) + sol2.close() def test_change_passphrase_with_short_passphrase_raises(self): """ @@ -145,6 +150,7 @@ class AuxMethodsTestCase(BaseSoledadTest): self.assertRaises( PassphraseTooShort, sol.change_passphrase, u'54321') + sol.close() def test_get_passphrase(self): """ @@ -152,6 +158,7 @@ class AuxMethodsTestCase(BaseSoledadTest): """ sol = self._soledad_instance() self.assertEqual('123', sol.passphrase) + sol.close() class SoledadSharedDBTestCase(BaseSoledadTest): @@ -165,6 +172,9 @@ class SoledadSharedDBTestCase(BaseSoledadTest): 'https://provider/', ADDRESS, document_factory=SoledadDocument, creds=None) + def tearDown(self): + BaseSoledadTest.tearDown(self) + def test__get_secrets_from_shared_db(self): """ Ensure the shared db is queried with the correct doc_id. @@ -209,7 +219,7 @@ class SoledadSignalingTestCase(BaseSoledadTest): BaseSoledadTest.setUp(self) def tearDown(self): - pass + BaseSoledadTest.tearDown(self) def _pop_mock_call(self, mocked): mocked.call_args_list.pop() @@ -283,6 +293,7 @@ class SoledadSignalingTestCase(BaseSoledadTest): # assert db was locked and unlocked sol._shared_db.lock.assert_called_with() sol._shared_db.unlock.assert_called_with('atoken') + sol.close() def test_stage2_bootstrap_signals(self): """ @@ -305,6 +316,7 @@ class SoledadSignalingTestCase(BaseSoledadTest): def __call__(self): return self + sol.close() # reset mock soledad.client.signal.reset_mock() # get a fresh instance so it emits all bootstrap signals @@ -328,6 +340,7 @@ class SoledadSignalingTestCase(BaseSoledadTest): proto.SOLEDAD_DONE_DOWNLOADING_KEYS, ADDRESS, ) + sol.close() def test_stage1_bootstrap_signals(self): """ @@ -337,6 +350,7 @@ class SoledadSignalingTestCase(BaseSoledadTest): # get an existent instance so it emits only some of bootstrap signals sol = self._soledad_instance() self.assertEqual([], soledad.client.signal.mock_calls) + sol.close() def test_sync_signals(self): """ @@ -355,6 +369,7 @@ class SoledadSignalingTestCase(BaseSoledadTest): proto.SOLEDAD_DONE_DATA_SYNC, ADDRESS, ) + sol.close() def test_need_sync_signals(self): """ @@ -375,3 +390,4 @@ class SoledadSignalingTestCase(BaseSoledadTest): ADDRESS, ) SoledadSyncTarget.get_sync_info = old_get_sync_info + sol.close() diff --git a/common/src/leap/soledad/common/tests/test_sqlcipher.py b/common/src/leap/soledad/common/tests/test_sqlcipher.py index 66a673b6..c79a6045 100644 --- a/common/src/leap/soledad/common/tests/test_sqlcipher.py +++ b/common/src/leap/soledad/common/tests/test_sqlcipher.py @@ -469,11 +469,9 @@ class SQLCipherDatabaseSyncTests( def setUp(self): test_sync.DatabaseSyncTests.setUp(self) - BaseSoledadTest.setUp(self) def tearDown(self): test_sync.DatabaseSyncTests.tearDown(self) - BaseSoledadTest.tearDown(self) def test_sync_autoresolves(self): """ @@ -683,11 +681,9 @@ class SQLCipherSyncTargetTests( def setUp(self): test_sync.DatabaseSyncTargetTests.setUp(self) - #BaseSoledadTest.setUp(self) def tearDown(self): test_sync.DatabaseSyncTargetTests.tearDown(self) - BaseSoledadTest.tearDown(self) def test_sync_exchange(self): """ diff --git a/common/src/leap/soledad/common/tests/test_target.py b/common/src/leap/soledad/common/tests/test_target.py index 5a541745..c1e00d52 100644 --- a/common/src/leap/soledad/common/tests/test_target.py +++ b/common/src/leap/soledad/common/tests/test_target.py @@ -272,11 +272,9 @@ class TestSoledadParsingSyncStream( def setUp(self): test_remote_sync_target.TestParsingSyncStream.setUp(self) - BaseSoledadTest.setUp(self) def tearDown(self): test_remote_sync_target.TestParsingSyncStream.tearDown(self) - BaseSoledadTest.tearDown(self) def test_extra_comma(self): """ @@ -646,6 +644,7 @@ class SoledadDatabaseSyncTargetTests( self.assertEqual(([], 1, last_trans_id), (self.other_changes, new_gen, last_trans_id)) self.assertEqual(10, self.st.get_sync_info('replica')[3]) + sol.close() def test_sync_exchange_push_many(self): """ diff --git a/common/src/leap/soledad/common/tests/u1db_tests/__init__.py b/common/src/leap/soledad/common/tests/u1db_tests/__init__.py index 3bc12487..99ff77b4 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/__init__.py +++ b/common/src/leap/soledad/common/tests/u1db_tests/__init__.py @@ -208,8 +208,8 @@ class DatabaseBaseTests(TestCase): self.db = self.create_database('test') def tearDown(self): - # TODO: Add close_database parameterization - # self.close_database(self.db) + if hasattr(self, 'db') and self.db is not None: + self.db.close() super(DatabaseBaseTests, self).tearDown() def assertTransactionLog(self, doc_ids, db): @@ -335,6 +335,13 @@ class TestCaseWithServer(TestCase): super(TestCaseWithServer, self).setUp() self.server = self.server_thread = None + def tearDown(self): + if self.server is not None: + self.server.shutdown() + self.server_thread.join() + self.server.server_close() + super(TestCaseWithServer, self).tearDown() + @property def url_scheme(self): return self.server_def()[-1] diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_backends.py b/common/src/leap/soledad/common/tests/u1db_tests/test_backends.py index d2a91d11..c0a7e1f7 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_backends.py +++ b/common/src/leap/soledad/common/tests/u1db_tests/test_backends.py @@ -355,6 +355,9 @@ class LocalDatabaseTests(tests.DatabaseBaseTests): scenarios = tests.LOCAL_DATABASES_SCENARIOS + def setUp(self): + tests.DatabaseBaseTests.setUp(self) + def test_create_doc_different_ids_diff_db(self): doc1 = self.db.create_doc_from_json(simple_doc) db2 = self.create_database('other-uid') diff --git a/common/src/leap/soledad/common/tests/u1db_tests/test_sync.py b/common/src/leap/soledad/common/tests/u1db_tests/test_sync.py index 1f78f912..a37c36db 100644 --- a/common/src/leap/soledad/common/tests/u1db_tests/test_sync.py +++ b/common/src/leap/soledad/common/tests/u1db_tests/test_sync.py @@ -85,7 +85,6 @@ class DatabaseSyncTargetTests(tests.DatabaseBaseTests, whitebox = True def setUp(self): - tests.DatabaseBaseTests.setUp(self) tests.TestCaseWithServer.setUp(self) self.db, self.st = self.create_db_and_target(self) self.other_changes = [] @@ -94,7 +93,6 @@ class DatabaseSyncTargetTests(tests.DatabaseBaseTests, # We delete them explicitly, so that connections are cleanly closed del self.st self.db.close() - del self.db super(DatabaseSyncTargetTests, self).tearDown() def create_db_and_target(self, *args): @@ -1013,30 +1011,30 @@ class DatabaseSyncTests(tests.DatabaseBaseTests, def test_sync_supersedes_conflicts(self): self.db1 = self.create_database('test1', 'both') self.db2 = self.create_database('test2', 'target') - db3 = self.create_database('test3', 'both') + self.db3 = self.create_database('test3', 'both') doc1 = self.db1.create_doc_from_json('{"a": 1}', doc_id='the-doc') self.db2.create_doc_from_json('{"b": 1}', doc_id='the-doc') - db3.create_doc_from_json('{"c": 1}', doc_id='the-doc') - self.sync(db3, self.db1) + self.db3.create_doc_from_json('{"c": 1}', doc_id='the-doc') + self.sync(self.db3, self.db1) self.assertEqual( self.db1._get_generation_info(), - db3._get_replica_gen_and_trans_id(self.db1._replica_uid)) + self.db3._get_replica_gen_and_trans_id(self.db1._replica_uid)) self.assertEqual( - db3._get_generation_info(), - self.db1._get_replica_gen_and_trans_id(db3._replica_uid)) - self.sync(db3, self.db2) + self.db3._get_generation_info(), + self.db1._get_replica_gen_and_trans_id(self.db3._replica_uid)) + self.sync(self.db3, self.db2) self.assertEqual( self.db2._get_generation_info(), - db3._get_replica_gen_and_trans_id(self.db2._replica_uid)) + self.db3._get_replica_gen_and_trans_id(self.db2._replica_uid)) self.assertEqual( - db3._get_generation_info(), - self.db2._get_replica_gen_and_trans_id(db3._replica_uid)) - self.assertEqual(3, len(db3.get_doc_conflicts('the-doc'))) + self.db3._get_generation_info(), + self.db2._get_replica_gen_and_trans_id(self.db3._replica_uid)) + self.assertEqual(3, len(self.db3.get_doc_conflicts('the-doc'))) doc1.set_json('{"a": 2}') self.db1.put_doc(doc1) - self.sync(db3, self.db1) + self.sync(self.db3, self.db1) # original doc1 should have been removed from conflicts - self.assertEqual(3, len(db3.get_doc_conflicts('the-doc'))) + self.assertEqual(3, len(self.db3.get_doc_conflicts('the-doc'))) def test_sync_stops_after_get_sync_info(self): self.db1 = self.create_database('test1', 'source') @@ -1054,70 +1052,70 @@ class DatabaseSyncTests(tests.DatabaseBaseTests, self.db2 = self.create_database('test2', 'target') self.db1.create_doc_from_json(tests.simple_doc, doc_id='doc1') self.sync(self.db1, self.db2) - db1_copy = self.copy_database(self.db1) + self.db1_copy = self.copy_database(self.db1) self.db1.create_doc_from_json(tests.simple_doc, doc_id='doc2') self.sync(self.db1, self.db2) self.assertRaises( - errors.InvalidGeneration, self.sync, db1_copy, self.db2) + errors.InvalidGeneration, self.sync, self.db1_copy, self.db2) def test_sync_detects_rollback_in_target(self): self.db1 = self.create_database('test1', 'source') self.db2 = self.create_database('test2', 'target') self.db1.create_doc_from_json(tests.simple_doc, doc_id="divergent") self.sync(self.db1, self.db2) - db2_copy = self.copy_database(self.db2) + self.db2_copy = self.copy_database(self.db2) self.db2.create_doc_from_json(tests.simple_doc, doc_id='doc2') self.sync(self.db1, self.db2) self.assertRaises( - errors.InvalidGeneration, self.sync, self.db1, db2_copy) + errors.InvalidGeneration, self.sync, self.db1, self.db2_copy) def test_sync_detects_diverged_source(self): self.db1 = self.create_database('test1', 'source') self.db2 = self.create_database('test2', 'target') - db3 = self.copy_database(self.db1) + self.db3 = self.copy_database(self.db1) self.db1.create_doc_from_json(tests.simple_doc, doc_id="divergent") - db3.create_doc_from_json(tests.simple_doc, doc_id="divergent") + self.db3.create_doc_from_json(tests.simple_doc, doc_id="divergent") self.sync(self.db1, self.db2) self.assertRaises( - errors.InvalidTransactionId, self.sync, db3, self.db2) + errors.InvalidTransactionId, self.sync, self.db3, self.db2) def test_sync_detects_diverged_target(self): self.db1 = self.create_database('test1', 'source') self.db2 = self.create_database('test2', 'target') - db3 = self.copy_database(self.db2) - db3.create_doc_from_json(tests.nested_doc, doc_id="divergent") + self.db3 = self.copy_database(self.db2) + self.db3.create_doc_from_json(tests.nested_doc, doc_id="divergent") self.db1.create_doc_from_json(tests.simple_doc, doc_id="divergent") self.sync(self.db1, self.db2) self.assertRaises( - errors.InvalidTransactionId, self.sync, self.db1, db3) + errors.InvalidTransactionId, self.sync, self.db1, self.db3) def test_sync_detects_rollback_and_divergence_in_source(self): self.db1 = self.create_database('test1', 'source') self.db2 = self.create_database('test2', 'target') self.db1.create_doc_from_json(tests.simple_doc, doc_id='doc1') self.sync(self.db1, self.db2) - db1_copy = self.copy_database(self.db1) + self.db1_copy = self.copy_database(self.db1) self.db1.create_doc_from_json(tests.simple_doc, doc_id='doc2') self.db1.create_doc_from_json(tests.simple_doc, doc_id='doc3') self.sync(self.db1, self.db2) - db1_copy.create_doc_from_json(tests.simple_doc, doc_id='doc2') - db1_copy.create_doc_from_json(tests.simple_doc, doc_id='doc3') + self.db1_copy.create_doc_from_json(tests.simple_doc, doc_id='doc2') + self.db1_copy.create_doc_from_json(tests.simple_doc, doc_id='doc3') self.assertRaises( - errors.InvalidTransactionId, self.sync, db1_copy, self.db2) + errors.InvalidTransactionId, self.sync, self.db1_copy, self.db2) def test_sync_detects_rollback_and_divergence_in_target(self): self.db1 = self.create_database('test1', 'source') self.db2 = self.create_database('test2', 'target') self.db1.create_doc_from_json(tests.simple_doc, doc_id="divergent") self.sync(self.db1, self.db2) - db2_copy = self.copy_database(self.db2) + self.db2_copy = self.copy_database(self.db2) self.db2.create_doc_from_json(tests.simple_doc, doc_id='doc2') self.db2.create_doc_from_json(tests.simple_doc, doc_id='doc3') self.sync(self.db1, self.db2) - db2_copy.create_doc_from_json(tests.simple_doc, doc_id='doc2') - db2_copy.create_doc_from_json(tests.simple_doc, doc_id='doc3') + self.db2_copy.create_doc_from_json(tests.simple_doc, doc_id='doc2') + self.db2_copy.create_doc_from_json(tests.simple_doc, doc_id='doc3') self.assertRaises( - errors.InvalidTransactionId, self.sync, self.db1, db2_copy) + errors.InvalidTransactionId, self.sync, self.db1, self.db2_copy) class TestDbSync(tests.TestCaseWithServer): -- cgit v1.2.3 From 2c3e88a83a9edf2758691f163e9dec84d30ea81c Mon Sep 17 00:00:00 2001 From: drebs Date: Tue, 15 Apr 2014 14:30:31 -0300 Subject: Close connection after syncing (#5507). --- .../changes/bug_5507_close-connection-after-sync | 2 + client/src/leap/soledad/client/sqlcipher.py | 53 ++++++++++++---------- 2 files changed, 30 insertions(+), 25 deletions(-) create mode 100644 client/changes/bug_5507_close-connection-after-sync diff --git a/client/changes/bug_5507_close-connection-after-sync b/client/changes/bug_5507_close-connection-after-sync new file mode 100644 index 00000000..0e77ab25 --- /dev/null +++ b/client/changes/bug_5507_close-connection-after-sync @@ -0,0 +1,2 @@ + o Close connection with server after syncing to avoid client hanging on exit + (#5507). diff --git a/client/src/leap/soledad/client/sqlcipher.py b/client/src/leap/soledad/client/sqlcipher.py index 3aea340d..04f8ebf9 100644 --- a/client/src/leap/soledad/client/sqlcipher.py +++ b/client/src/leap/soledad/client/sqlcipher.py @@ -43,12 +43,15 @@ So, as the statements above were introduced for backwards compatibility with SLCipher 1.1 databases, we do not implement them as all SQLCipher databases handled by Soledad should be created by SQLCipher >= 2.0. """ -import httplib import logging import os import string import threading import time +import json + +from hashlib import sha256 +from contextlib import contextmanager from pysqlcipher import dbapi2 from u1db.backends import sqlite_backend @@ -149,7 +152,6 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): k_lock = threading.Lock() create_doc_lock = threading.Lock() update_indexes_lock = threading.Lock() - _syncer = None def __init__(self, sqlcipher_file, password, document_factory=None, crypto=None, raw_key=False, cipher='aes-256-cbc', @@ -211,6 +213,7 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): has_conflicts=has_conflicts, syncable=syncable) self.set_document_factory(factory) + self._syncers = {} @classmethod def _open_database(cls, sqlcipher_file, password, document_factory=None, @@ -351,46 +354,46 @@ class SQLCipherDatabase(sqlite_backend.SQLitePartialExpandDatabase): :return: The local generation before the synchronisation was performed. :rtype: int """ - if not self.syncer: - self._create_syncer(url, creds=creds) - - try: - res = self.syncer.sync(autocreate=autocreate) - except httplib.CannotSendRequest: - # raised when you reuse httplib.HTTP object for new request - # while you havn't called its getresponse() - # this catch works for the current connclass used - # by our HTTPClientBase, since it uses httplib. - # we will have to replace it if it changes. - logger.info("Replacing connection and trying again...") - self._syncer = None - self._create_syncer(url, creds=creds) - res = self.syncer.sync(autocreate=autocreate) + res = None + with self.syncer(url, creds=creds) as syncer: + res = syncer.sync(autocreate=autocreate) return res - @property - def syncer(self): + @contextmanager + def syncer(self, url, creds=None): """ Accesor for synchronizer. """ - return self._syncer + syncer = self._get_syncer(url, creds=creds) + yield syncer + syncer.sync_target.close() - def _create_syncer(self, url, creds=None): + def _get_syncer(self, url, creds=None): """ - Creates a synchronizer + Get a synchronizer for C{url} using C{creds}. :param url: The url of the target replica to sync with. :type url: str :param creds: optional dictionary giving credentials. - to authorize the operation with the server. + to authorize the operation with the server. :type creds: dict + + :return: A synchronizer. + :rtype: u1db.sync.Synchronizer """ - if self._syncer is None: - self._syncer = Synchronizer( + # we want to store at most one syncer for each url, so we also store a + # hash of the connection credentials and replace the stored syncer for + # a certain url if credentials have changed. + h = sha256(json.dumps([url, creds])).hexdigest() + cur_h, syncer = self._syncers.get(url, (None, None)) + if syncer is None or h != cur_h: + syncer = Synchronizer( self, SoledadSyncTarget(url, creds=creds, crypto=self._crypto)) + self._syncers[url] = (h, syncer) + return syncer def _extra_schema_init(self, c): """ -- cgit v1.2.3 From 4375c180b3de4eb84603bf15755ae1f7635804fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Touceda?= Date: Fri, 16 May 2014 15:46:28 -0300 Subject: Fold in changes --- CHANGELOG | 13 +++++++++++++ client/changes/bug_5507_close-connection-after-sync | 2 -- .../bug_5493_properly-close-connections-on-couch-backend | 2 -- ...ture_5386_prevent-couch-backend-from-trying-to-create-db | 1 - .../feature_5386_prevent-uneeded-get-on-couch-server-state | 2 -- 5 files changed, 13 insertions(+), 7 deletions(-) delete mode 100644 client/changes/bug_5507_close-connection-after-sync delete mode 100644 common/changes/bug_5493_properly-close-connections-on-couch-backend delete mode 100644 common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db delete mode 100644 common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state diff --git a/CHANGELOG b/CHANGELOG index d3a5b9b4..ff1ba240 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,16 @@ +0.5.1 May 16, 2014: +Client: + o Close connection with server after syncing to avoid client hanging + on exit. Fixes #5507. + +Common: + o Properly close connections on couch backend. Also prevent file + descriptor leaks on tests. Closes #5493. + o Prevent couch backend from always trying to create the + database. Fixes #5386. + o Prevent Couch Server State from making one uneeded GET request on + instantiation. Fixes #5386. + 0.5.0 Apr 4, 2014: Client: o Catch lock timeout exception. Fixes #4435. diff --git a/client/changes/bug_5507_close-connection-after-sync b/client/changes/bug_5507_close-connection-after-sync deleted file mode 100644 index 0e77ab25..00000000 --- a/client/changes/bug_5507_close-connection-after-sync +++ /dev/null @@ -1,2 +0,0 @@ - o Close connection with server after syncing to avoid client hanging on exit - (#5507). diff --git a/common/changes/bug_5493_properly-close-connections-on-couch-backend b/common/changes/bug_5493_properly-close-connections-on-couch-backend deleted file mode 100644 index 3cb55168..00000000 --- a/common/changes/bug_5493_properly-close-connections-on-couch-backend +++ /dev/null @@ -1,2 +0,0 @@ - o Properly close connections on couch backend. Also prevent file descriptor - leaks on tests. Closes #5493. diff --git a/common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db b/common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db deleted file mode 100644 index 9bbac329..00000000 --- a/common/changes/feature_5386_prevent-couch-backend-from-trying-to-create-db +++ /dev/null @@ -1 +0,0 @@ - o Prevent couch backend from always trying to create the database (#5386). diff --git a/common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state b/common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state deleted file mode 100644 index 95919873..00000000 --- a/common/changes/feature_5386_prevent-uneeded-get-on-couch-server-state +++ /dev/null @@ -1,2 +0,0 @@ - o Prevent Couch Server State from making one uneeded GET request on - instantiation (#5386). -- cgit v1.2.3