summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordrebs <drebs@leap.se>2014-03-11 16:14:39 -0300
committerdrebs <drebs@leap.se>2014-03-12 16:21:02 -0300
commit3337f48c810df45aac7d3009b49b4b2a34ef019d (patch)
treee6047243db5a1630d16849b0449994d0930091c5
parent94ec89384abeba52d660addf3528b11a9aa5a8b0 (diff)
Add scripts to measure backend cpu usage.
-rwxr-xr-xscripts/backends_cpu_usage/log_cpu_usage.py46
-rw-r--r--scripts/backends_cpu_usage/movingaverage.py209
-rwxr-xr-xscripts/backends_cpu_usage/plot.py81
-rwxr-xr-xscripts/backends_cpu_usage/test_u1db_sync.py113
4 files changed, 449 insertions, 0 deletions
diff --git a/scripts/backends_cpu_usage/log_cpu_usage.py b/scripts/backends_cpu_usage/log_cpu_usage.py
new file mode 100755
index 00000000..2674e1ff
--- /dev/null
+++ b/scripts/backends_cpu_usage/log_cpu_usage.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+
+
+# Get the CPU usage and print to file.
+
+
+import psutil
+import time
+import argparse
+import os
+import threading
+
+
+class LogCpuUsage(threading.Thread):
+
+ def __init__(self, fname):
+ threading.Thread.__init__(self)
+ self._stopped = True
+ self._fname = fname
+
+ def run(self):
+ self._stopped = False
+ with open(self._fname, 'w') as f:
+ start = time.time()
+ while self._stopped is False:
+ now = time.time()
+ f.write("%f %f\n" % ((now - start), psutil.cpu_percent()))
+ time.sleep(0.01)
+
+ def stop(self):
+ self._stopped = True
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('file', help='where to save output')
+ args = parser.parse_args()
+
+ if os.path.isfile(args.file):
+ replace = raw_input('File %s exists, replace it (y/N)? ' % args.file)
+ if replace.lower() != 'y':
+ print 'Bailing out.'
+ exit(1)
+
+ log_cpu = LogCpuUsage(args.file)
+ log_cpu.run()
diff --git a/scripts/backends_cpu_usage/movingaverage.py b/scripts/backends_cpu_usage/movingaverage.py
new file mode 100644
index 00000000..bac1b3e1
--- /dev/null
+++ b/scripts/backends_cpu_usage/movingaverage.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+#
+# Sean Reifschneider, tummy.com, ltd. <jafo@tummy.com>
+# Released into the Public Domain, 2011-02-06
+
+import itertools
+from itertools import islice
+from collections import deque
+
+
+#########################################################
+def movingaverage(data, subset_size, data_is_list = None,
+ avoid_fp_drift = True):
+ '''Return the moving averages of the data, with a window size of
+ `subset_size`. `subset_size` must be an integer greater than 0 and
+ less than the length of the input data, or a ValueError will be raised.
+
+ `data_is_list` can be used to tune the algorithm for list or iteratable
+ as an input. The default value, `None` will auto-detect this.
+ The algorithm used if `data` is a list is almost twice as fast as if
+ it is an iteratable.
+
+ `avoid_fp_drift`, if True (the default) sums every sub-set rather than
+ keeping a "rolling sum" (which may be subject to floating-point drift).
+ While more correct, it is also dramatically slower for subset sizes
+ much larger than 20.
+
+ NOTE: You really should consider setting `avoid_fp_drift = False` unless
+ you are dealing with very small numbers (say, far smaller than 0.00001)
+ or require extreme accuracy at the cost of execution time. For
+ `subset_size` < 20, the performance difference is very small.
+ '''
+ if subset_size < 1:
+ raise ValueError('subset_size must be 1 or larger')
+
+ if data_is_list is None:
+ data_is_list = hasattr(data, '__getslice__')
+
+ divisor = float(subset_size)
+ if data_is_list:
+ # This only works if we can re-access old elements, but is much faster.
+ # In other words, it can't be just an iterable, it needs to be a list.
+
+ if subset_size > len(data):
+ raise ValueError('subset_size must be smaller than data set size')
+
+ if avoid_fp_drift:
+ for x in range(subset_size, len(data) + 1):
+ yield sum(data[x - subset_size:x]) / divisor
+ else:
+ cur = sum(data[0:subset_size])
+ yield cur / divisor
+ for x in range(subset_size, len(data)):
+ cur += data[x] - data[x - subset_size]
+ yield cur / divisor
+ else:
+ # Based on the recipe at:
+ # http://docs.python.org/library/collections.html#deque-recipes
+ it = iter(data)
+ d = deque(islice(it, subset_size))
+
+ if subset_size > len(d):
+ raise ValueError('subset_size must be smaller than data set size')
+
+ if avoid_fp_drift:
+ yield sum(d) / divisor
+ for elem in it:
+ d.popleft()
+ d.append(elem)
+ yield sum(d) / divisor
+ else:
+ s = sum(d)
+ yield s / divisor
+ for elem in it:
+ s += elem - d.popleft()
+ d.append(elem)
+ yield s / divisor
+
+
+##########################
+if __name__ == '__main__':
+ import unittest
+
+ class TestMovingAverage(unittest.TestCase):
+ ####################
+ def test_List(self):
+ try:
+ list(movingaverage([1,2,3], 0))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage([1,2,3,4,5,6], 7))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5])
+
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, False)), [40.0,42.0,45.0,43.0])
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, True)), [40.0,42.0,45.0,43.0])
+
+
+ ######################
+ def test_XRange(self):
+ try:
+ list(movingaverage(xrange(1, 4), 0))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage(xrange(1, 7), 7))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 2)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))),
+ 2)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5])
+
+
+ ###########################
+ def test_ListRolling(self):
+ try:
+ list(movingaverage([1,2,3], 0, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1,
+ avoid_fp_drift = False)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2,
+ avoid_fp_drift = False)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2,
+ avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3,
+ avoid_fp_drift = False)), [2,3,4,5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4,
+ avoid_fp_drift = False)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5,
+ avoid_fp_drift = False)), [3,4])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6,
+ avoid_fp_drift = False)), [3.5])
+
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0])
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0])
+
+
+ #############################
+ def test_XRangeRolling(self):
+ try:
+ list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage(xrange(1, 7), 1,
+ avoid_fp_drift = False)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 2,
+ avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))),
+ 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 3,
+ avoid_fp_drift = False)), [2,3,4,5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 4,
+ avoid_fp_drift = False)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 5,
+ avoid_fp_drift = False)), [3,4])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 6,
+ avoid_fp_drift = False)), [3.5])
+
+
+ ######################################################################
+ suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage)
+ unittest.TextTestRunner(verbosity = 2).run(suite)
+
diff --git a/scripts/backends_cpu_usage/plot.py b/scripts/backends_cpu_usage/plot.py
new file mode 100755
index 00000000..4e5083ad
--- /dev/null
+++ b/scripts/backends_cpu_usage/plot.py
@@ -0,0 +1,81 @@
+#!/usr/bin/python
+
+
+from matplotlib import pyplot as plt
+from movingaverage import movingaverage
+
+
+def smooth(l):
+ return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False)
+
+
+files = [
+ ('sqlite', 'b'),
+ ('sqlcipher', 'r'),
+ ('u1dblite', 'g'),
+ ('u1dbcipher', 'm'),
+]
+
+
+# config the plot
+plt.xlabel('time (s)')
+plt.ylabel('cpu usage (%)')
+plt.title('u1db backends CPU usage')
+
+
+for fi in files:
+
+ backend = fi[0]
+ color = fi[1]
+ filename = '%s.txt' % backend
+
+ x = []
+ y = []
+
+ xmax = None
+ xmin = None
+ ymax = None
+ ymin = None
+
+ # read data from file
+ with open(filename, 'r') as f:
+ line = f.readline()
+ while line is not None:
+ time, cpu = tuple(line.strip().split(' '))
+ cpu = float(cpu)
+ x.append(float(time))
+ y.append(cpu)
+ if ymax == None or cpu > ymax:
+ ymax = cpu
+ xmax = time
+ if ymin == None or cpu < ymin:
+ ymin = cpu
+ xmin = time
+ line = f.readline()
+ if line == '':
+ break
+
+ kwargs = {
+ 'linewidth': 1.0,
+ 'linestyle': '-',
+ # 'marker': '.',
+ 'color': color,
+ }
+ plt.plot(
+ [n for n in smooth(x)],
+ [n for n in smooth(y)],
+ label=backend, **kwargs)
+
+ #plt.axes().get_xaxis().set_ticks(x)
+ #plt.axes().get_xaxis().set_ticklabels(x)
+
+ # annotate max and min values
+ #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax))
+ #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin))
+
+
+plt.ylim(0, 100)
+plt.grid()
+plt.legend()
+plt.show()
+
diff --git a/scripts/backends_cpu_usage/test_u1db_sync.py b/scripts/backends_cpu_usage/test_u1db_sync.py
new file mode 100755
index 00000000..26ef8f9f
--- /dev/null
+++ b/scripts/backends_cpu_usage/test_u1db_sync.py
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+
+
+import u1db
+import tempfile
+import logging
+import shutil
+import os
+import argparse
+import time
+import binascii
+import random
+
+
+from leap.soledad.client.sqlcipher import open as sqlcipher_open
+from log_cpu_usage import LogCpuUsage
+from u1dblite import open as u1dblite_open
+from u1dbcipher import open as u1dbcipher_open
+
+
+DOCS_TO_SYNC = 1000
+SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB
+BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB
+
+
+def get_data(size):
+ return binascii.hexlify(os.urandom(size/2))
+
+
+def run_test(testname, open_fun, tempdir, docs, *args):
+ logger.info('Starting test \"%s\".' % testname)
+
+ # instantiate dbs
+ db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args)
+ db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args)
+
+ # get sync target and synchsonizer
+ target = db2.get_sync_target()
+ synchronizer = u1db.sync.Synchronizer(db1, target)
+
+
+ # generate lots of small documents
+ logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC)
+ for content in docs:
+ db1.create_doc(content)
+ logger.info('%d documents created in source db.' % DOCS_TO_SYNC)
+
+ # run the test
+ filename = testname + '.txt'
+ logger.info('Logging CPU usage to %s.' % filename)
+ log_cpu = LogCpuUsage(filename)
+ tstart = time.time()
+
+ # start logging cpu
+ log_cpu.start()
+ logger.info('Sleeping for 5 seconds...')
+ time.sleep(5)
+
+ # sync
+ logger.info('Starting sync...')
+ sstart = time.time()
+ synchronizer.sync()
+ send = time.time()
+ logger.info('Sync finished.')
+
+ # stop logging cpu
+ logger.info('Sleeping for 5 seconds...')
+ time.sleep(5)
+ tend = time.time()
+ log_cpu.stop()
+
+ # report
+ logger.info('Total sync time: %f seconds' % (send - sstart))
+ logger.info('Total test time: %f seconds' % (tend - tstart))
+ logger.info('Finished test \"%s\".' % testname)
+
+ # close dbs
+ db1.close()
+ db2.close()
+
+
+if __name__ == '__main__':
+
+ # configure logger
+ logger = logging.getLogger(__name__)
+ LOG_FORMAT = '%(asctime)s %(message)s'
+ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+ # get a temporary dir
+ tempdir = tempfile.mkdtemp()
+ logger.info('Using temporary directory %s' % tempdir)
+
+
+ # create a lot of documents with random sizes
+ docs = []
+ for i in xrange(DOCS_TO_SYNC):
+ docs.append({
+ 'index': i,
+ #'data': get_data(
+ # random.randrange(
+ # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE))
+ })
+
+ # run tests
+ run_test('sqlite', u1db.open, tempdir, docs, True)
+ run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True)
+ run_test('u1dblite', u1dblite_open, tempdir, docs)
+ run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True)
+
+ # remove temporary dir
+ logger.info('Removing temporary directory %s' % tempdir)
+ shutil.rmtree(tempdir)