summaryrefslogtreecommitdiff
path: root/scripts/profiling
diff options
context:
space:
mode:
authorTomás Touceda <chiiph@leap.se>2014-04-09 16:16:47 -0300
committerTomás Touceda <chiiph@leap.se>2014-04-09 16:16:47 -0300
commit8935765a0d0a6956f1140bb7ab0bd1af57b4303f (patch)
treeec4f2147fcf0f94a8e3d11e47e27566d30a226bd /scripts/profiling
parent8b8d9befbe3e60753e73bc7aaf1b8842a1846046 (diff)
parent11757fb0d071b753819a04d8504a72baed80db2f (diff)
Merge remote-tracking branch 'refs/remotes/drebs/feature/5386_improve-sync' into develop
Diffstat (limited to 'scripts/profiling')
-rwxr-xr-xscripts/profiling/backends_cpu_usage/log_cpu_usage.py46
-rw-r--r--scripts/profiling/backends_cpu_usage/movingaverage.py209
-rwxr-xr-xscripts/profiling/backends_cpu_usage/plot.py81
-rwxr-xr-xscripts/profiling/backends_cpu_usage/test_u1db_sync.py113
-rwxr-xr-xscripts/profiling/doc_put_memory_usage/find_max_upload_size.py169
-rwxr-xr-xscripts/profiling/doc_put_memory_usage/get-mem.py16
-rwxr-xr-xscripts/profiling/doc_put_memory_usage/plot-mem.py73
7 files changed, 707 insertions, 0 deletions
diff --git a/scripts/profiling/backends_cpu_usage/log_cpu_usage.py b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py
new file mode 100755
index 00000000..2674e1ff
--- /dev/null
+++ b/scripts/profiling/backends_cpu_usage/log_cpu_usage.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+
+
+# Get the CPU usage and print to file.
+
+
+import psutil
+import time
+import argparse
+import os
+import threading
+
+
+class LogCpuUsage(threading.Thread):
+
+ def __init__(self, fname):
+ threading.Thread.__init__(self)
+ self._stopped = True
+ self._fname = fname
+
+ def run(self):
+ self._stopped = False
+ with open(self._fname, 'w') as f:
+ start = time.time()
+ while self._stopped is False:
+ now = time.time()
+ f.write("%f %f\n" % ((now - start), psutil.cpu_percent()))
+ time.sleep(0.01)
+
+ def stop(self):
+ self._stopped = True
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('file', help='where to save output')
+ args = parser.parse_args()
+
+ if os.path.isfile(args.file):
+ replace = raw_input('File %s exists, replace it (y/N)? ' % args.file)
+ if replace.lower() != 'y':
+ print 'Bailing out.'
+ exit(1)
+
+ log_cpu = LogCpuUsage(args.file)
+ log_cpu.run()
diff --git a/scripts/profiling/backends_cpu_usage/movingaverage.py b/scripts/profiling/backends_cpu_usage/movingaverage.py
new file mode 100644
index 00000000..bac1b3e1
--- /dev/null
+++ b/scripts/profiling/backends_cpu_usage/movingaverage.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+#
+# Sean Reifschneider, tummy.com, ltd. <jafo@tummy.com>
+# Released into the Public Domain, 2011-02-06
+
+import itertools
+from itertools import islice
+from collections import deque
+
+
+#########################################################
+def movingaverage(data, subset_size, data_is_list = None,
+ avoid_fp_drift = True):
+ '''Return the moving averages of the data, with a window size of
+ `subset_size`. `subset_size` must be an integer greater than 0 and
+ less than the length of the input data, or a ValueError will be raised.
+
+ `data_is_list` can be used to tune the algorithm for list or iteratable
+ as an input. The default value, `None` will auto-detect this.
+ The algorithm used if `data` is a list is almost twice as fast as if
+ it is an iteratable.
+
+ `avoid_fp_drift`, if True (the default) sums every sub-set rather than
+ keeping a "rolling sum" (which may be subject to floating-point drift).
+ While more correct, it is also dramatically slower for subset sizes
+ much larger than 20.
+
+ NOTE: You really should consider setting `avoid_fp_drift = False` unless
+ you are dealing with very small numbers (say, far smaller than 0.00001)
+ or require extreme accuracy at the cost of execution time. For
+ `subset_size` < 20, the performance difference is very small.
+ '''
+ if subset_size < 1:
+ raise ValueError('subset_size must be 1 or larger')
+
+ if data_is_list is None:
+ data_is_list = hasattr(data, '__getslice__')
+
+ divisor = float(subset_size)
+ if data_is_list:
+ # This only works if we can re-access old elements, but is much faster.
+ # In other words, it can't be just an iterable, it needs to be a list.
+
+ if subset_size > len(data):
+ raise ValueError('subset_size must be smaller than data set size')
+
+ if avoid_fp_drift:
+ for x in range(subset_size, len(data) + 1):
+ yield sum(data[x - subset_size:x]) / divisor
+ else:
+ cur = sum(data[0:subset_size])
+ yield cur / divisor
+ for x in range(subset_size, len(data)):
+ cur += data[x] - data[x - subset_size]
+ yield cur / divisor
+ else:
+ # Based on the recipe at:
+ # http://docs.python.org/library/collections.html#deque-recipes
+ it = iter(data)
+ d = deque(islice(it, subset_size))
+
+ if subset_size > len(d):
+ raise ValueError('subset_size must be smaller than data set size')
+
+ if avoid_fp_drift:
+ yield sum(d) / divisor
+ for elem in it:
+ d.popleft()
+ d.append(elem)
+ yield sum(d) / divisor
+ else:
+ s = sum(d)
+ yield s / divisor
+ for elem in it:
+ s += elem - d.popleft()
+ d.append(elem)
+ yield s / divisor
+
+
+##########################
+if __name__ == '__main__':
+ import unittest
+
+ class TestMovingAverage(unittest.TestCase):
+ ####################
+ def test_List(self):
+ try:
+ list(movingaverage([1,2,3], 0))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage([1,2,3,4,5,6], 7))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3)), [2,3,4,5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5)), [3,4])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6)), [3.5])
+
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, False)), [40.0,42.0,45.0,43.0])
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, True)), [40.0,42.0,45.0,43.0])
+
+
+ ######################
+ def test_XRange(self):
+ try:
+ list(movingaverage(xrange(1, 4), 0))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage(xrange(1, 7), 7))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage(xrange(1, 7), 1)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 2)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))),
+ 2)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 3)), [2,3,4,5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 4)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 5)), [3,4])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 6)), [3.5])
+
+
+ ###########################
+ def test_ListRolling(self):
+ try:
+ list(movingaverage([1,2,3], 0, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage([1,2,3,4,5,6], 7, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 1,
+ avoid_fp_drift = False)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 2,
+ avoid_fp_drift = False)),
+ [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(map(float, [1,2,3,4,5,6]), 2,
+ avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 3,
+ avoid_fp_drift = False)), [2,3,4,5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 4,
+ avoid_fp_drift = False)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 5,
+ avoid_fp_drift = False)), [3,4])
+ self.assertEqual(list(movingaverage([1,2,3,4,5,6], 6,
+ avoid_fp_drift = False)), [3.5])
+
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, False, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0])
+ self.assertEqual(list(movingaverage([40, 30, 50, 46, 39, 44],
+ 3, True, avoid_fp_drift = False)), [40.0,42.0,45.0,43.0])
+
+
+ #############################
+ def test_XRangeRolling(self):
+ try:
+ list(movingaverage(xrange(1, 4), 0, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size=0')
+ except ValueError:
+ pass
+
+ try:
+ list(movingaverage(xrange(1, 7), 7, avoid_fp_drift = False))
+ self.fail('Did not raise ValueError on subset_size > len(data)')
+ except ValueError:
+ pass
+
+ self.assertEqual(list(movingaverage(xrange(1, 7), 1,
+ avoid_fp_drift = False)), [1,2,3,4,5,6])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 2,
+ avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(iter(map(float, xrange(1, 7))),
+ 2, avoid_fp_drift = False)), [1.5,2.5,3.5,4.5,5.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 3,
+ avoid_fp_drift = False)), [2,3,4,5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 4,
+ avoid_fp_drift = False)), [2.5,3.5,4.5])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 5,
+ avoid_fp_drift = False)), [3,4])
+ self.assertEqual(list(movingaverage(xrange(1, 7), 6,
+ avoid_fp_drift = False)), [3.5])
+
+
+ ######################################################################
+ suite = unittest.TestLoader().loadTestsFromTestCase(TestMovingAverage)
+ unittest.TextTestRunner(verbosity = 2).run(suite)
+
diff --git a/scripts/profiling/backends_cpu_usage/plot.py b/scripts/profiling/backends_cpu_usage/plot.py
new file mode 100755
index 00000000..4e5083ad
--- /dev/null
+++ b/scripts/profiling/backends_cpu_usage/plot.py
@@ -0,0 +1,81 @@
+#!/usr/bin/python
+
+
+from matplotlib import pyplot as plt
+from movingaverage import movingaverage
+
+
+def smooth(l):
+ return movingaverage(l, 10, data_is_list=True, avoid_fp_drift=False)
+
+
+files = [
+ ('sqlite', 'b'),
+ ('sqlcipher', 'r'),
+ ('u1dblite', 'g'),
+ ('u1dbcipher', 'm'),
+]
+
+
+# config the plot
+plt.xlabel('time (s)')
+plt.ylabel('cpu usage (%)')
+plt.title('u1db backends CPU usage')
+
+
+for fi in files:
+
+ backend = fi[0]
+ color = fi[1]
+ filename = '%s.txt' % backend
+
+ x = []
+ y = []
+
+ xmax = None
+ xmin = None
+ ymax = None
+ ymin = None
+
+ # read data from file
+ with open(filename, 'r') as f:
+ line = f.readline()
+ while line is not None:
+ time, cpu = tuple(line.strip().split(' '))
+ cpu = float(cpu)
+ x.append(float(time))
+ y.append(cpu)
+ if ymax == None or cpu > ymax:
+ ymax = cpu
+ xmax = time
+ if ymin == None or cpu < ymin:
+ ymin = cpu
+ xmin = time
+ line = f.readline()
+ if line == '':
+ break
+
+ kwargs = {
+ 'linewidth': 1.0,
+ 'linestyle': '-',
+ # 'marker': '.',
+ 'color': color,
+ }
+ plt.plot(
+ [n for n in smooth(x)],
+ [n for n in smooth(y)],
+ label=backend, **kwargs)
+
+ #plt.axes().get_xaxis().set_ticks(x)
+ #plt.axes().get_xaxis().set_ticklabels(x)
+
+ # annotate max and min values
+ #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax))
+ #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin))
+
+
+plt.ylim(0, 100)
+plt.grid()
+plt.legend()
+plt.show()
+
diff --git a/scripts/profiling/backends_cpu_usage/test_u1db_sync.py b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py
new file mode 100755
index 00000000..26ef8f9f
--- /dev/null
+++ b/scripts/profiling/backends_cpu_usage/test_u1db_sync.py
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+
+
+import u1db
+import tempfile
+import logging
+import shutil
+import os
+import argparse
+import time
+import binascii
+import random
+
+
+from leap.soledad.client.sqlcipher import open as sqlcipher_open
+from log_cpu_usage import LogCpuUsage
+from u1dblite import open as u1dblite_open
+from u1dbcipher import open as u1dbcipher_open
+
+
+DOCS_TO_SYNC = 1000
+SMALLEST_DOC_SIZE = 1 * 1024 # 1 KB
+BIGGEST_DOC_SIZE = 100 * 1024 # 100 KB
+
+
+def get_data(size):
+ return binascii.hexlify(os.urandom(size/2))
+
+
+def run_test(testname, open_fun, tempdir, docs, *args):
+ logger.info('Starting test \"%s\".' % testname)
+
+ # instantiate dbs
+ db1 = open_fun(os.path.join(tempdir, testname + '1.db'), *args)
+ db2 = open_fun(os.path.join(tempdir, testname + '2.db'), *args)
+
+ # get sync target and synchsonizer
+ target = db2.get_sync_target()
+ synchronizer = u1db.sync.Synchronizer(db1, target)
+
+
+ # generate lots of small documents
+ logger.info('Creating %d documents in source db...' % DOCS_TO_SYNC)
+ for content in docs:
+ db1.create_doc(content)
+ logger.info('%d documents created in source db.' % DOCS_TO_SYNC)
+
+ # run the test
+ filename = testname + '.txt'
+ logger.info('Logging CPU usage to %s.' % filename)
+ log_cpu = LogCpuUsage(filename)
+ tstart = time.time()
+
+ # start logging cpu
+ log_cpu.start()
+ logger.info('Sleeping for 5 seconds...')
+ time.sleep(5)
+
+ # sync
+ logger.info('Starting sync...')
+ sstart = time.time()
+ synchronizer.sync()
+ send = time.time()
+ logger.info('Sync finished.')
+
+ # stop logging cpu
+ logger.info('Sleeping for 5 seconds...')
+ time.sleep(5)
+ tend = time.time()
+ log_cpu.stop()
+
+ # report
+ logger.info('Total sync time: %f seconds' % (send - sstart))
+ logger.info('Total test time: %f seconds' % (tend - tstart))
+ logger.info('Finished test \"%s\".' % testname)
+
+ # close dbs
+ db1.close()
+ db2.close()
+
+
+if __name__ == '__main__':
+
+ # configure logger
+ logger = logging.getLogger(__name__)
+ LOG_FORMAT = '%(asctime)s %(message)s'
+ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+ # get a temporary dir
+ tempdir = tempfile.mkdtemp()
+ logger.info('Using temporary directory %s' % tempdir)
+
+
+ # create a lot of documents with random sizes
+ docs = []
+ for i in xrange(DOCS_TO_SYNC):
+ docs.append({
+ 'index': i,
+ #'data': get_data(
+ # random.randrange(
+ # SMALLEST_DOC_SIZE, BIGGEST_DOC_SIZE))
+ })
+
+ # run tests
+ run_test('sqlite', u1db.open, tempdir, docs, True)
+ run_test('sqlcipher', sqlcipher_open, tempdir, docs, '123456', True)
+ run_test('u1dblite', u1dblite_open, tempdir, docs)
+ run_test('u1dbcipher', u1dbcipher_open, tempdir, docs, '123456', True)
+
+ # remove temporary dir
+ logger.info('Removing temporary directory %s' % tempdir)
+ shutil.rmtree(tempdir)
diff --git a/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py b/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py
new file mode 100755
index 00000000..02c68015
--- /dev/null
+++ b/scripts/profiling/doc_put_memory_usage/find_max_upload_size.py
@@ -0,0 +1,169 @@
+#!/usr/bin/python
+
+# This script finds the maximum upload size for a document in the current
+# server. It pulls couch URL from Soledad config file and attempts multiple
+# PUTs until it finds the maximum size supported by the server.
+#
+# As the Soledad couch user is not an admin, you have to pass a database into
+# which the test will be run. The database should already exist and be
+# initialized with soledad design documents.
+#
+# Use it like this:
+#
+# ./find_max_upload_size.py <dbname>
+# ./find_max_upload_size.py -h
+
+import os
+import configparser
+import logging
+import argparse
+import random
+import string
+import binascii
+import json
+import time
+import uuid
+
+
+from couchdb.client import Database
+from socket import error as socket_error
+from leap.soledad.common.couch import CouchDatabase
+
+
+SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf'
+PREFIX = '/tmp/soledad_test'
+LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s'
+RETRIES = 3 # number of times to retry uploading a document of a certain
+ # size after a failure
+
+
+# configure logger
+logger = logging.getLogger(__name__)
+
+
+def config_log(level):
+ logging.basicConfig(format=LOG_FORMAT, level=level)
+
+
+def log_to_file(filename):
+ handler = logging.FileHandler(filename, mode='a')
+ handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT))
+ logger.addHandler(handler)
+
+
+# create test dir
+if not os.path.exists(PREFIX):
+ os.mkdir(PREFIX)
+
+
+def get_couch_url(config_file=SOLEDAD_CONFIG_FILE):
+ config = configparser.ConfigParser()
+ config.read(config_file)
+ return config['soledad-server']['couch_url']
+
+
+# generate or load an uploadable doc with the given size in mb
+def get_content(size):
+ fname = os.path.join(PREFIX, 'content-%d.json' % size)
+ if os.path.exists(fname):
+ logger.debug('Loading content with %d MB...' % size)
+ with open(fname, 'r') as f:
+ return f.read()
+ else:
+ length = int(size * 1024 ** 2)
+ logger.debug('Generating body with %d MB...' % size)
+ content = binascii.hexlify(os.urandom(length))[:length]
+ with open(fname, 'w') as f:
+ f.write(content)
+ return content
+
+
+def delete_doc(db):
+ doc = db.get('largedoc')
+ db.delete(doc)
+
+
+def upload(db, size, couch_db):
+ # try many times to be sure that size is infeasible
+ for i in range(RETRIES):
+ # wait until server is up to upload
+ while True:
+ try:
+ 'largedoc' in couch_db
+ break
+ except socket_error:
+ logger.debug('Waiting for server to come up...')
+ time.sleep(1)
+ # attempt to upload
+ try:
+ logger.debug(
+ 'Trying to upload %d MB document (attempt %d/%d)...' %
+ (size, (i+1), RETRIES))
+ content = get_content(size)
+ logger.debug('Starting upload of %d bytes.' % len(content))
+ doc = db.create_doc({'data': content}, doc_id='largedoc')
+ delete_doc(couch_db)
+ logger.debug('Success uploading %d MB doc.' % size)
+ return True
+ except Exception as e:
+ logger.debug('Failed to upload %d MB doc: %s' % (size, str(e)))
+ return False
+
+
+def find_max_upload_size(db_uri):
+ db = CouchDatabase.open_database(db_uri, False)
+ couch_db = Database(db_uri)
+ logger.debug('Database URI: %s' % db_uri)
+ # delete eventual leftover from last run
+ if 'largedoc' in couch_db:
+ delete_doc(couch_db)
+ # phase 1: increase upload size exponentially
+ logger.info('Starting phase 1: increasing size exponentially.')
+ size = 1
+ #import ipdb; ipdb.set_trace()
+ while True:
+ if upload(db, size, couch_db):
+ size *= 2
+ else:
+ break
+
+ # phase 2: binary search for maximum value
+ unable = size
+ able = size / 2
+ logger.info('Starting phase 2: binary search for maximum value.')
+ while unable - able > 1:
+ size = able + ((unable - able) / 2)
+ if upload(db, size, couch_db):
+ able = size
+ else:
+ unable = size
+ return able
+
+
+if __name__ == '__main__':
+ # parse command line
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-d', action='store_true', dest='debug',
+ help='print debugging information')
+ parser.add_argument(
+ '-l', dest='logfile',
+ help='log output to file')
+ parser.add_argument(
+ 'db_uri', help='the couch database URI to test')
+ args = parser.parse_args()
+
+ # log to file
+ if args.logfile is not None:
+ log_to_file(args.logfile)
+
+ # set loglevel
+ if args.debug is True:
+ config_log(logging.DEBUG)
+ else:
+ config_log(logging.INFO)
+
+ # run test and report
+ logger.info('Will test using db at %s.' % args.db_uri)
+ maxsize = find_max_upload_size(args.db_uri)
+ logger.info('Max upload size is %d MB.' % maxsize)
diff --git a/scripts/profiling/doc_put_memory_usage/get-mem.py b/scripts/profiling/doc_put_memory_usage/get-mem.py
new file mode 100755
index 00000000..d64875fc
--- /dev/null
+++ b/scripts/profiling/doc_put_memory_usage/get-mem.py
@@ -0,0 +1,16 @@
+#!/usr/bin/python
+
+
+import psutil
+import time
+
+
+delta = 50 * 60
+start = time.time()
+
+while True:
+ now = time.time()
+ print "%s %s" % (now - start, psutil.phymem_usage().used)
+ time.sleep(0.1)
+ if now > start + delta:
+ break
diff --git a/scripts/profiling/doc_put_memory_usage/plot-mem.py b/scripts/profiling/doc_put_memory_usage/plot-mem.py
new file mode 100755
index 00000000..e24679a2
--- /dev/null
+++ b/scripts/profiling/doc_put_memory_usage/plot-mem.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+
+
+from matplotlib import pyplot as plt
+
+
+files = [
+ ('local', 'couchdb-json', 'b'),
+ ('local', 'bigcouch-json', 'r'),
+ ('local', 'couchdb-multipart', 'g'),
+ ('local', 'bigcouch-multipart', 'm'),
+]
+
+
+# config the plot
+plt.xlabel('time')
+plt.ylabel('memory usage')
+plt.title('bigcouch versus couch memory usage')
+
+
+for fi in files:
+
+ machine = fi[0]
+ database = fi[1]
+ color = fi[2]
+ filename = '%s-%s.txt' % (machine, database)
+
+ x = []
+ y = []
+
+ xmax = None
+ xmin = None
+ ymax = None
+ ymin = None
+
+ # read data from file
+ with open(filename, 'r') as f:
+ line = f.readline()
+ while line is not None:
+ time, mem = tuple(line.strip().split(' '))
+ mem = float(mem) / (10**9)
+ x.append(float(time))
+ y.append(mem)
+ if ymax == None or mem > ymax:
+ ymax = mem
+ xmax = time
+ if ymin == None or mem < ymin:
+ ymin = mem
+ xmin = time
+ line = f.readline()
+ if line == '':
+ break
+
+ kwargs = {
+ 'linewidth': 1.0,
+ 'linestyle': '-',
+ # 'marker': '.',
+ 'color': color,
+ }
+ plt.plot(x, y, label=database, **kwargs)
+
+ #plt.axes().get_xaxis().set_ticks(x)
+ #plt.axes().get_xaxis().set_ticklabels(x)
+
+ # annotate max and min values
+ #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax))
+ #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin))
+
+
+plt.grid()
+plt.legend()
+plt.show()
+