diff options
Diffstat (limited to 'scripts/profiling/backends_cpu_usage')
| -rwxr-xr-x | scripts/profiling/backends_cpu_usage/log_cpu_usage.py | 46 | ||||
| -rw-r--r-- | scripts/profiling/backends_cpu_usage/movingaverage.py | 209 | ||||
| -rwxr-xr-x | scripts/profiling/backends_cpu_usage/plot.py | 81 | ||||
| -rwxr-xr-x | scripts/profiling/backends_cpu_usage/test_u1db_sync.py | 113 | 
4 files changed, 449 insertions, 0 deletions
| diff --git a/scripts/profiling/backends_cpu_usage/log_cpu_usage.py b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py new file mode 100755 index 00000000..2674e1ff --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + + +# Get the CPU usage and print to file. + + +import psutil +import time +import argparse +import os +import threading + + +class LogCpuUsage(threading.Thread): + +    def __init__(self, fname): +        threading.Thread.__init__(self) +        self._stopped = True +        self._fname = fname  + +    def run(self): +        self._stopped = False +        with open(self._fname, 'w') as f: +            start = time.time() +            while self._stopped is False: +                now = time.time() +                f.write("%f %f\n" % ((now - start), psutil.cpu_percent())) +                time.sleep(0.01) + +    def stop(self): +        self._stopped = True + + +if __name__ == '__main__': +    parser = argparse.ArgumentParser() +    parser.add_argument('file', help='where to save output') +    args = parser.parse_args() + +    if os.path.isfile(args.file): +        replace = raw_input('File %s exists, replace it (y/N)? ' % args.file) +        if replace.lower() != 'y': +            print 'Bailing out.' +            exit(1) +     +    log_cpu = LogCpuUsage(args.file) +    log_cpu.run() diff --git a/scripts/profiling/backends_cpu_usage/movingaverage.py b/scripts/profiling/backends_cpu_usage/movingaverage.py new file mode 100644 index 00000000..bac1b3e1 --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/movingaverage.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +#  Sean Reifschneider, tummy.com, ltd.  <jafo@tummy.com> +#  Released into the Public Domain, 2011-02-06 + +import itertools +from itertools import islice +from collections import deque + + +######################################################### +def movingaverage(data, subset_size, data_is_list = None, +		avoid_fp_drift = True): +	'''Return the moving averages of the data, with a window size of +	`subset_size`.  `subset_size` must be an integer greater than 0 and +	less than the length of the input data, or a ValueError will be raised. + +	`data_is_list` can be used to tune the algorithm for list or iteratable +	as an input.  The default value, `None` will auto-detect this. +	The algorithm used if `data` is a list is almost twice as fast as if +	it is an iteratable. + +	`avoid_fp_drift`, if True (the default) sums every sub-set rather than +	keeping a "rolling sum" (which may be subject to floating-point drift). +	While more correct, it is also dramatically slower for subset sizes +	much larger than 20. + +	NOTE: You really should consider setting `avoid_fp_drift = False` unless +	you are dealing with very small numbers (say, far smaller than 0.00001) +	or require extreme accuracy at the cost of execution time.  For +	`subset_size` < 20, the performance difference is very small. +	''' +	if subset_size < 1: +		raise ValueError('subset_size must be 1 or larger') + +	if data_is_list is None: +		data_is_list = hasattr(data, '__getslice__') + +	divisor = float(subset_size) +	if data_is_list: +		#  This only works if we can re-access old elements, but is much faster. +		#  In other words, it can't be just an iterable, it needs to be a list. + +		if subset_size > len(data): +			raise ValueError('subset_size must be smaller than data set size') + +		if avoid_fp_drift: +			for x in range(subset_size, len(data) + 1): +				yield sum(data[x - subset_size:x]) / divisor +		else: +			cur = sum(data[0:subset_size]) +			yield cur / divisor +			for x in range(subset_size, len(data)): +				cur += data[x] - data[x - subset_size] +				yield cur / divisor +	else: +		#  Based on the recipe at: +		#     http://docs.python.org/library/collections.html#deque-recipes +		it = iter(data) +		d = deque(islice(it, subset_size)) + +		if subset_size > len(d): +			raise ValueError('subset_size must be smaller than data set size') + +		if avoid_fp_drift: +			yield sum(d) / divisor +			for elem in it: +				d.popleft() +				d.append(elem) +				yield sum(d) / divisor +		else: +			s = sum(d) +			yield s / divisor +			for elem in it: +				s += elem - d.popleft() +				d.append(elem) +				yield s / divisor + + +########################## +if __name__ == '__main__': +	import unittest + +	class TestMovingAverage(unittest.TestCase): +		#################### +		def test_List(self): +			try: +				list(movingaverage([1,2,3], 0)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage([1,2,3,4,5,6], 7)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5]) + +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, False)), [40.0,42.0,45.0,43.0]) +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, True)), [40.0,42.0,45.0,43.0]) + + +		###################### +		def test_XRange(self): +			try: +				list(movingaverage(xrange(1, 4), 0)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage(xrange(1, 7), 7)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 2)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), +					2)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5]) + + +		########################### +		def test_ListRolling(self): +			try: +				list(movingaverage([1,2,3], 0, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1, +					avoid_fp_drift = False)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2, +					avoid_fp_drift = False)), +					[1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2, +					avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3, +					avoid_fp_drift = False)), [2,3,4,5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4, +					avoid_fp_drift = False)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5, +					avoid_fp_drift = False)), [3,4]) +			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6, +					avoid_fp_drift = False)), [3.5]) + +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) +			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44], +					3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0]) + + +		############################# +		def test_XRangeRolling(self): +			try: +				list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size=0') +			except ValueError: +				pass + +			try: +				list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False)) +				self.fail('Did not raise ValueError on subset_size > len(data)') +			except ValueError: +				pass + +			self.assertEqual(list(movingaverage(xrange(1, 7), 1, +					avoid_fp_drift = False)), [1,2,3,4,5,6]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 2, +					avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))), +					2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 3, +					avoid_fp_drift = False)), [2,3,4,5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 4, +					avoid_fp_drift = False)), [2.5,3.5,4.5]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 5, +					avoid_fp_drift = False)), [3,4]) +			self.assertEqual(list(movingaverage(xrange(1, 7), 6, +					avoid_fp_drift = False)), [3.5]) + + +	###################################################################### +	suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage) +	unittest.TextTestRunner(verbosity = 2).run(suite) + diff --git a/scripts/profiling/backends_cpu_usage/plot.py b/scripts/profiling/backends_cpu_usage/plot.py new file mode 100755 index 00000000..4e5083ad --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/plot.py @@ -0,0 +1,81 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt +from movingaverage import movingaverage + + +def smooth(l): +    return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False) + + +files = [ +    ('sqlite', 'b'), +    ('sqlcipher', 'r'), +    ('u1dblite', 'g'), +    ('u1dbcipher', 'm'), +] + + +# config the plot +plt.xlabel('time (s)') +plt.ylabel('cpu usage (%)') +plt.title('u1db backends CPU usage') + + +for fi in files: + +    backend = fi[0] +    color = fi[1] +    filename = '%s.txt' % backend  + +    x = [] +    y = [] + +    xmax = None +    xmin = None +    ymax = None +    ymin = None + +    # read data from file +    with open(filename, 'r') as f: +        line = f.readline() +        while line is not None: +            time, cpu = tuple(line.strip().split(' ')) +            cpu = float(cpu) +            x.append(float(time)) +            y.append(cpu) +            if ymax == None or cpu > ymax: +                ymax = cpu +                xmax = time +            if ymin == None or cpu < ymin: +                ymin = cpu +                xmin = time +            line = f.readline() +            if line == '': +                break + +    kwargs = { +        'linewidth': 1.0, +        'linestyle': '-', +    #    'marker': '.', +        'color': color, +    } +    plt.plot( +        [n for n in smooth(x)], +        [n for n in smooth(y)], +        label=backend, **kwargs) + +    #plt.axes().get_xaxis().set_ticks(x) +    #plt.axes().get_xaxis().set_ticklabels(x) + +    # annotate max and min values +    #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) +    #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.ylim(0, 100) +plt.grid() +plt.legend() +plt.show() + diff --git a/scripts/profiling/backends_cpu_usage/test_u1db_sync.py b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py new file mode 100755 index 00000000..26ef8f9f --- /dev/null +++ b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py @@ -0,0 +1,113 @@ +#!/usr/bin/python + + +import u1db +import tempfile +import logging +import shutil +import os +import argparse +import time +import binascii +import random + + +from leap.soledad.client.sqlcipher import open as sqlcipher_open +from log_cpu_usage import LogCpuUsage +from u1dblite import open as u1dblite_open +from u1dbcipher import open as u1dbcipher_open + + +DOCS_TO_SYNC = 1000 +SMALLEST_DOC_SIZE = 1 * 1024  # 1 KB +BIGGEST_DOC_SIZE = 100 * 1024  # 100 KB + + +def get_data(size): +    return binascii.hexlify(os.urandom(size/2)) + + +def run_test(testname, open_fun, tempdir, docs,  *args): +    logger.info('Starting test \"%s\".' % testname) + +    # instantiate dbs +    db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args) +    db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args) + +    # get sync target and synchsonizer +    target = db2.get_sync_target() +    synchronizer = u1db.sync.Synchronizer(db1, target) + + +    # generate lots of small documents +    logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC) +    for content in docs: +        db1.create_doc(content) +    logger.info('%d documents created in source db.' % DOCS_TO_SYNC) + +    # run the test +    filename = testname + '.txt' +    logger.info('Logging CPU usage to %s.' % filename) +    log_cpu = LogCpuUsage(filename) +    tstart = time.time() + +    # start logging cpu +    log_cpu.start() +    logger.info('Sleeping for 5 seconds...') +    time.sleep(5) + +    # sync +    logger.info('Starting sync...') +    sstart = time.time() +    synchronizer.sync() +    send = time.time() +    logger.info('Sync finished.') + +    # stop logging cpu +    logger.info('Sleeping for 5 seconds...') +    time.sleep(5) +    tend = time.time() +    log_cpu.stop() + +    # report +    logger.info('Total sync time: %f seconds' % (send - sstart)) +    logger.info('Total test time: %f seconds' % (tend - tstart)) +    logger.info('Finished test \"%s\".' % testname) + +    # close dbs +    db1.close() +    db2.close() + + +if __name__ == '__main__': +     +    # configure logger +    logger = logging.getLogger(__name__) +    LOG_FORMAT = '%(asctime)s %(message)s' +    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +    # get a temporary dir +    tempdir = tempfile.mkdtemp() +    logger.info('Using temporary directory %s' % tempdir) + + +    # create a lot of documents with random sizes +    docs = [] +    for i in xrange(DOCS_TO_SYNC): +        docs.append({ +            'index': i, +            #'data': get_data( +            #    random.randrange( +            #        SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE)) +        }) + +    # run tests +    run_test('sqlite', u1db.open, tempdir, docs, True) +    run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True) +    run_test('u1dblite', u1dblite_open, tempdir, docs) +    run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True) + +    # remove temporary dir +    logger.info('Removing temporary directory %s' % tempdir) +    shutil.rmtree(tempdir) | 
