4 files changed, 449 insertions, 0 deletions
diff --git a/scripts/profiling/backends_cpu_usage/log_cpu_usage.py b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py
new file mode 100755
index 00000000..2674e1ff
--- /dev/null
+++ b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+
+
+# Get the CPU usage and print to file.
+
+
+import psutil
+import time
+import argparse
+import os
+import threading
+
+
+class LogCpuUsage(threading.Thread):
+
+    def __init__(self, fname):
+        threading.Thread.__init__(self)
+        self._stopped = True
+        self._fname = fname 
+
+    def run(self):
+        self._stopped = False
+        with open(self._fname, 'w') as f:
+            start = time.time()
+            while self._stopped is False:
+                now = time.time()
+                f.write("%f %f\n" % ((now - start), psutil.cpu_percent()))
+                time.sleep(0.01)
+
+    def stop(self):
+        self._stopped = True
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('file', help='where to save output')
+    args = parser.parse_args()
+
+    if os.path.isfile(args.file):
+        replace = raw_input('File %s exists, replace it (y/N)? ' % args.file)
+        if replace.lower() != 'y':
+            print 'Bailing out.'
+            exit(1)
+    
+    log_cpu = LogCpuUsage(args.file)
+    log_cpu.run()
diff --git a/scripts/profiling/backends_cpu_usage/movingaverage.py b/scripts/profiling/backends_cpu_usage/movingaverage.py
new file mode 100644
index 00000000..bac1b3e1
--- /dev/null
+++ b/scripts/profiling/backends_cpu_usage/movingaverage.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+#
+#  Sean Reifschneider, tummy.com, ltd.  <jafo@tummy.com>
+#  Released into the Public Domain, 2011-02-06
+
+import itertools
+from itertools import islice
+from collections import deque
+
+
+#########################################################
+def movingaverage(data, subset_size, data_is_list = None,
+		avoid_fp_drift = True):
+	'''Return the moving averages of the data, with a window size of
+	`subset_size`.  `subset_size` must be an integer greater than 0 and
+	less than the length of the input data, or a ValueError will be raised.
+
+	`data_is_list` can be used to tune the algorithm for list or iteratable
+	as an input.  The default value, `None` will auto-detect this.
+	The algorithm used if `data` is a list is almost twice as fast as if
+	it is an iteratable.
+
+	`avoid_fp_drift`, if True (the default) sums every sub-set rather than
+	keeping a "rolling sum" (which may be subject to floating-point drift).
+	While more correct, it is also dramatically slower for subset sizes
+	much larger than 20.
+
+	NOTE: You really should consider setting `avoid_fp_drift = False` unless
+	you are dealing with very small numbers (say, far smaller than 0.00001)
+	or require extreme accuracy at the cost of execution time.  For
+	`subset_size` < 20, the performance difference is very small.
+	'''
+	if subset_size < 1:
+		raise ValueError('subset_size must be 1 or larger')
+
+	if data_is_list is None:
+		data_is_list = hasattr(data, '__getslice__')
+
+	divisor = float(subset_size)
+	if data_is_list:
+		#  This only works if we can re-access old elements, but is much faster.
+		#  In other words, it can't be just an iterable, it needs to be a list.
+
+		if subset_size > len(data):
+			raise ValueError('subset_size must be smaller than data set size')
+
+		if avoid_fp_drift:
+			for x in range(subset_size, len(data) + 1):
+				yield sum(data[x - subset_size:x]) / divisor
+		else:
+			cur = sum(data[0:subset_size])
+			yield cur / divisor
+			for x in range(subset_size, len(data)):
+				cur += data[x] - data[x - subset_size]
+				yield cur / divisor
+	else:
+		#  Based on the recipe at:
+		#     http://docs.python.org/library/collections.html#deque-recipes
+		it = iter(data)
+		d = deque(islice(it, subset_size))
+
+		if subset_size > len(d):
+			raise ValueError('subset_size must be smaller than data set size')
+
+		if avoid_fp_drift:
+			yield sum(d) / divisor
+			for elem in it:
+				d.popleft()
+				d.append(elem)
+				yield sum(d) / divisor
+		else:
+			s = sum(d)
+			yield s / divisor
+			for elem in it:
+				s += elem - d.popleft()
+				d.append(elem)
+				yield s / divisor
+
+
+##########################
+if __name__ == '__main__':
+	import unittest
+
+	class TestMovingAverage(unittest.TestCase):
+		####################
+		def test_List(self):
+			try:
+				list(movingaverage([1,2,3], 0))
+				self.fail('Did not raise ValueError on subset_size=0')
+			except ValueError:
+				pass
+
+			try:
+				list(movingaverage([1,2,3,4,5,6], 7))
+				self.fail('Did not raise ValueError on subset_size > len(data)')
+			except ValueError:
+				pass
+
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)),
+					[1.5,2.5,3.5,4.5,5.5])
+			self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)),
+					[1.5,2.5,3.5,4.5,5.5])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5])
+
+			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+					3, False)), [40.0,42.0,45.0,43.0])
+			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+					3, True)), [40.0,42.0,45.0,43.0])
+
+
+		######################
+		def test_XRange(self):
+			try:
+				list(movingaverage(xrange(1, 4), 0))
+				self.fail('Did not raise ValueError on subset_size=0')
+			except ValueError:
+				pass
+
+			try:
+				list(movingaverage(xrange(1, 7), 7))
+				self.fail('Did not raise ValueError on subset_size > len(data)')
+			except ValueError:
+				pass
+
+			self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 2)),
+					[1.5,2.5,3.5,4.5,5.5])
+			self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))),
+					2)), [1.5,2.5,3.5,4.5,5.5])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5])
+
+
+		###########################
+		def test_ListRolling(self):
+			try:
+				list(movingaverage([1,2,3], 0, avoid_fp_drift = False))
+				self.fail('Did not raise ValueError on subset_size=0')
+			except ValueError:
+				pass
+
+			try:
+				list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False))
+				self.fail('Did not raise ValueError on subset_size > len(data)')
+			except ValueError:
+				pass
+
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1,
+					avoid_fp_drift = False)), [1,2,3,4,5,6])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2,
+					avoid_fp_drift = False)),
+					[1.5,2.5,3.5,4.5,5.5])
+			self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2,
+					avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3,
+					avoid_fp_drift = False)), [2,3,4,5])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4,
+					avoid_fp_drift = False)), [2.5,3.5,4.5])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5,
+					avoid_fp_drift = False)), [3,4])
+			self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6,
+					avoid_fp_drift = False)), [3.5])
+
+			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+					3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0])
+			self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+					3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0])
+
+
+		#############################
+		def test_XRangeRolling(self):
+			try:
+				list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False))
+				self.fail('Did not raise ValueError on subset_size=0')
+			except ValueError:
+				pass
+
+			try:
+				list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False))
+				self.fail('Did not raise ValueError on subset_size > len(data)')
+			except ValueError:
+				pass
+
+			self.assertEqual(list(movingaverage(xrange(1, 7), 1,
+					avoid_fp_drift = False)), [1,2,3,4,5,6])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 2,
+					avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+			self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))),
+					2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 3,
+					avoid_fp_drift = False)), [2,3,4,5])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 4,
+					avoid_fp_drift = False)), [2.5,3.5,4.5])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 5,
+					avoid_fp_drift = False)), [3,4])
+			self.assertEqual(list(movingaverage(xrange(1, 7), 6,
+					avoid_fp_drift = False)), [3.5])
+
+
+	######################################################################
+	suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage)
+	unittest.TextTestRunner(verbosity = 2).run(suite)
+
diff --git a/scripts/profiling/backends_cpu_usage/plot.py b/scripts/profiling/backends_cpu_usage/plot.py
new file mode 100755
index 00000000..4e5083ad
--- /dev/null
+++ b/scripts/profiling/backends_cpu_usage/plot.py
@@ -0,0 +1,81 @@
+#!/usr/bin/python
+
+
+from matplotlib import pyplot as plt
+from movingaverage import movingaverage
+
+
+def smooth(l):
+    return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False)
+
+
+files = [
+    ('sqlite', 'b'),
+    ('sqlcipher', 'r'),
+    ('u1dblite', 'g'),
+    ('u1dbcipher', 'm'),
+]
+
+
+# config the plot
+plt.xlabel('time (s)')
+plt.ylabel('cpu usage (%)')
+plt.title('u1db backends CPU usage')
+
+
+for fi in files:
+
+    backend = fi[0]
+    color = fi[1]
+    filename = '%s.txt' % backend 
+
+    x = []
+    y = []
+
+    xmax = None
+    xmin = None
+    ymax = None
+    ymin = None
+
+    # read data from file
+    with open(filename, 'r') as f:
+        line = f.readline()
+        while line is not None:
+            time, cpu = tuple(line.strip().split(' '))
+            cpu = float(cpu)
+            x.append(float(time))
+            y.append(cpu)
+            if ymax == None or cpu > ymax:
+                ymax = cpu
+                xmax = time
+            if ymin == None or cpu < ymin:
+                ymin = cpu
+                xmin = time
+            line = f.readline()
+            if line == '':
+                break
+
+    kwargs = {
+        'linewidth': 1.0,
+        'linestyle': '-',
+    #    'marker': '.',
+        'color': color,
+    }
+    plt.plot(
+        [n for n in smooth(x)],
+        [n for n in smooth(y)],
+        label=backend, **kwargs)
+
+    #plt.axes().get_xaxis().set_ticks(x)
+    #plt.axes().get_xaxis().set_ticklabels(x)
+
+    # annotate max and min values
+    #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax))
+    #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin))
+
+
+plt.ylim(0, 100)
+plt.grid()
+plt.legend()
+plt.show()
+
diff --git a/scripts/profiling/backends_cpu_usage/test_u1db_sync.py b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py
new file mode 100755
index 00000000..26ef8f9f
--- /dev/null
+++ b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+
+
+import u1db
+import tempfile
+import logging
+import shutil
+import os
+import argparse
+import time
+import binascii
+import random
+
+
+from leap.soledad.client.sqlcipher import open as sqlcipher_open
+from log_cpu_usage import LogCpuUsage
+from u1dblite import open as u1dblite_open
+from u1dbcipher import open as u1dbcipher_open
+
+
+DOCS_TO_SYNC = 1000
+SMALLEST_DOC_SIZE = 1 * 1024  # 1 KB
+BIGGEST_DOC_SIZE = 100 * 1024  # 100 KB
+
+
+def get_data(size):
+    return binascii.hexlify(os.urandom(size/2))
+
+
+def run_test(testname, open_fun, tempdir, docs,  *args):
+    logger.info('Starting test \"%s\".' % testname)
+
+    # instantiate dbs
+    db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args)
+    db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args)
+
+    # get sync target and synchsonizer
+    target = db2.get_sync_target()
+    synchronizer = u1db.sync.Synchronizer(db1, target)
+
+
+    # generate lots of small documents
+    logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC)
+    for content in docs:
+        db1.create_doc(content)
+    logger.info('%d documents created in source db.' % DOCS_TO_SYNC)
+
+    # run the test
+    filename = testname + '.txt'
+    logger.info('Logging CPU usage to %s.' % filename)
+    log_cpu = LogCpuUsage(filename)
+    tstart = time.time()
+
+    # start logging cpu
+    log_cpu.start()
+    logger.info('Sleeping for 5 seconds...')
+    time.sleep(5)
+
+    # sync
+    logger.info('Starting sync...')
+    sstart = time.time()
+    synchronizer.sync()
+    send = time.time()
+    logger.info('Sync finished.')
+
+    # stop logging cpu
+    logger.info('Sleeping for 5 seconds...')
+    time.sleep(5)
+    tend = time.time()
+    log_cpu.stop()
+
+    # report
+    logger.info('Total sync time: %f seconds' % (send - sstart))
+    logger.info('Total test time: %f seconds' % (tend - tstart))
+    logger.info('Finished test \"%s\".' % testname)
+
+    # close dbs
+    db1.close()
+    db2.close()
+
+
+if __name__ == '__main__':
+    
+    # configure logger
+    logger = logging.getLogger(__name__)
+    LOG_FORMAT = '%(asctime)s %(message)s'
+    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+    # get a temporary dir
+    tempdir = tempfile.mkdtemp()
+    logger.info('Using temporary directory %s' % tempdir)
+
+
+    # create a lot of documents with random sizes
+    docs = []
+    for i in xrange(DOCS_TO_SYNC):
+        docs.append({
+            'index': i,
+            #'data': get_data(
+            #    random.randrange(
+            #        SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE))
+        })
+
+    # run tests
+    run_test('sqlite', u1db.open, tempdir, docs, True)
+    run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True)
+    run_test('u1dblite', u1dblite_open, tempdir, docs)
+    run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True)
+
+    # remove temporary dir
+    logger.info('Removing temporary directory %s' % tempdir)
+    shutil.rmtree(tempdir)