summaryrefslogtreecommitdiff
path: root/scripts/profiling/storage
diff options
context:
space:
mode:
authordrebs <drebs@leap.se>2014-09-08 17:12:45 -0300
committerdrebs <drebs@leap.se>2014-09-10 10:13:58 -0300
commit2f1ee76a7169abc100efdf706f12a0abf6032f04 (patch)
tree2b4413b685a06abbf939cb270e5f0d3e7c29e154 /scripts/profiling/storage
parent3ab68fd26bae17c82dbbb0c0171933b8a7540c73 (diff)
Add benchmarking scripts.
Diffstat (limited to 'scripts/profiling/storage')
-rw-r--r--scripts/profiling/storage/benchmark-storage.py104
-rw-r--r--scripts/profiling/storage/benchmark_storage_utils.py4
l---------scripts/profiling/storage/client_side_db.py1
-rwxr-xr-xscripts/profiling/storage/plot.py94
-rw-r--r--scripts/profiling/storage/profile-format.py29
-rwxr-xr-xscripts/profiling/storage/profile-storage.py107
l---------scripts/profiling/storage/util.py1
7 files changed, 340 insertions, 0 deletions
diff --git a/scripts/profiling/storage/benchmark-storage.py b/scripts/profiling/storage/benchmark-storage.py
new file mode 100644
index 00000000..79ee3270
--- /dev/null
+++ b/scripts/profiling/storage/benchmark-storage.py
@@ -0,0 +1,104 @@
+#!/usr/bin/python
+
+# scenarios:
+# 1. soledad instantiation time.
+# a. for unexisting db.
+# b. for existing db.
+# 2. soledad doc storage/retrieval.
+# a. 1 KB document.
+# b 10 KB.
+# c. 100 KB.
+# d. 1 MB.
+
+
+import logging
+import getpass
+import tempfile
+import argparse
+import shutil
+import timeit
+
+
+from util import ValidateUserHandle
+
+# benchmarking args
+REPEAT_NUMBER = 1000
+DOC_SIZE = 1024
+
+
+# create a logger
+logger = logging.getLogger(__name__)
+LOG_FORMAT = '%(asctime)s %(message)s'
+logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+def parse_args():
+ # parse command line
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'user@provider', action=ValidateUserHandle, help='the user handle')
+ parser.add_argument(
+ '-b', dest='basedir', required=False, default=None,
+ help='soledad base directory')
+ parser.add_argument(
+ '-p', dest='passphrase', required=False, default=None,
+ help='the user passphrase')
+ parser.add_argument(
+ '-l', dest='logfile', required=False, default='/tmp/benchhmark-storage.log',
+ help='the file to which write the benchmark logs')
+ args = parser.parse_args()
+ # get the password
+ passphrase = args.passphrase
+ if passphrase is None:
+ passphrase = getpass.getpass(
+ 'Password for %s@%s: ' % (args.username, args.provider))
+ # get the basedir
+ basedir = args.basedir
+ if basedir is None:
+ basedir = tempfile.mkdtemp()
+ logger.info('Using %s as base directory.' % basedir)
+
+ return args.username, args.provider, passphrase, basedir, args.logfile
+
+
+if __name__ == '__main__':
+ username, provider, passphrase, basedir, logfile = parse_args()
+ create_results = []
+ getall_results = []
+ for i in [1, 200, 400, 600, 800, 1000]:
+ tempdir = tempfile.mkdtemp(dir=basedir)
+ setup_common = """
+import os
+#from benchmark_storage_utils import benchmark_fun
+#from benchmark_storage_utils import get_soledad_instance
+from client_side_db import get_soledad_instance
+sol = get_soledad_instance('%s', '%s', '%s', '%s')
+ """ % (username, provider, passphrase, tempdir)
+
+ setup_create = setup_common + """
+content = {'data': os.urandom(%d/2).encode('hex')}
+""" % (DOC_SIZE * i)
+ time = timeit.timeit(
+ 'sol.create_doc(content);',
+ setup=setup_create, number=REPEAT_NUMBER)
+ create_results.append((DOC_SIZE*i, time))
+ print "CREATE: %d %f" % (DOC_SIZE*i, time)
+
+ setup_get = setup_common + """
+doc_ids = [doc.doc_id for doc in sol.get_all_docs()[1]]
+"""
+
+ time = timeit.timeit(
+ "[sol.get_doc(doc_id) for doc_id in doc_ids]",
+ setup=setup_get, number=1)
+ getall_results.append((DOC_SIZE*i, time))
+ print "GET_ALL: %d %f" % (DOC_SIZE*i, time)
+ shutil.rmtree(tempdir)
+ print "# size, time for creation of %d docs" % REPEAT_NUMBER
+ for size, time in create_results:
+ print size, time
+ print "# size, time for retrieval of %d docs" % REPEAT_NUMBER
+ for size, time in getall_results:
+ print size, time
+ shutil.rmtree(basedir)
+
diff --git a/scripts/profiling/storage/benchmark_storage_utils.py b/scripts/profiling/storage/benchmark_storage_utils.py
new file mode 100644
index 00000000..fa8bb658
--- /dev/null
+++ b/scripts/profiling/storage/benchmark_storage_utils.py
@@ -0,0 +1,4 @@
+from client_side_db import get_soledad_instance
+
+def benchmark_fun(sol, content):
+ sol.create_doc(content)
diff --git a/scripts/profiling/storage/client_side_db.py b/scripts/profiling/storage/client_side_db.py
new file mode 120000
index 00000000..9e49a7f0
--- /dev/null
+++ b/scripts/profiling/storage/client_side_db.py
@@ -0,0 +1 @@
+../../db_access/client_side_db.py \ No newline at end of file
diff --git a/scripts/profiling/storage/plot.py b/scripts/profiling/storage/plot.py
new file mode 100755
index 00000000..280b9375
--- /dev/null
+++ b/scripts/profiling/storage/plot.py
@@ -0,0 +1,94 @@
+#!/usr/bin/python
+
+
+# Create a plot of the results of running the ./benchmark-storage.py script.
+
+
+import argparse
+from matplotlib import pyplot as plt
+
+from sets import Set
+
+
+def plot(filename, subtitle=''):
+
+ # config the plot
+ plt.xlabel('doc size (KB)')
+ plt.ylabel('operation time (s)')
+ title = 'soledad 1000 docs creation/retrieval times'
+ if subtitle != '':
+ title += '- %s' % subtitle
+ plt.title(title)
+
+ x = Set()
+ ycreate = []
+ yget = []
+
+ ys = []
+ #ys.append((ycreate, 'creation time', 'r', '-'))
+ #ys.append((yget, 'retrieval time', 'b', '-'))
+
+ # read data from file
+ with open(filename, 'r') as f:
+ f.readline()
+ for i in xrange(6):
+ size, y = f.readline().strip().split(' ')
+ x.add(int(size))
+ ycreate.append(float(y))
+
+ f.readline()
+ for i in xrange(6):
+ size, y = f.readline().strip().split(' ')
+ x.add(int(size))
+ yget.append(float(y))
+
+ # get doc size in KB
+ x = list(x)
+ x.sort()
+ x = map(lambda val: val / 1024, x)
+
+ # get normalized results per KB
+ nycreate = []
+ nyget = []
+ for i in xrange(len(x)):
+ nycreate.append(ycreate[i]/x[i])
+ nyget.append(yget[i]/x[i])
+
+ ys.append((nycreate, 'creation time per KB', 'r', '-.'))
+ ys.append((nyget, 'retrieval time per KB', 'b', '-.'))
+
+ for y in ys:
+ kwargs = {
+ 'linewidth': 1.0,
+ 'marker': '.',
+ 'color': y[2],
+ 'linestyle': y[3],
+ }
+ # normalize by doc size
+ plt.plot(
+ x,
+ y[0],
+ label=y[1], **kwargs)
+
+ #plt.axes().get_xaxis().set_ticks(x)
+ #plt.axes().get_xaxis().set_ticklabels(x)
+
+ # annotate max and min values
+ plt.xlim(0, 1100)
+ #plt.ylim(0, 350)
+ plt.grid()
+ plt.legend()
+ plt.show()
+
+
+if __name__ == '__main__':
+ # parse command line
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'datafile',
+ help='the data file to plot')
+ parser.add_argument(
+ '-s', dest='subtitle', required=False, default='',
+ help='a subtitle for the plot')
+ args = parser.parse_args()
+ plot(args.datafile, args.subtitle)
diff --git a/scripts/profiling/storage/profile-format.py b/scripts/profiling/storage/profile-format.py
new file mode 100644
index 00000000..262a52ab
--- /dev/null
+++ b/scripts/profiling/storage/profile-format.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+
+import argparse
+import pstats
+
+
+def parse_args():
+ # parse command line
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-f', dest='statsfiles', action='append', required=True,
+ help='a stats file')
+ args = parser.parse_args()
+ return args.statsfiles
+
+
+def format_stats(statsfiles):
+ for f in statsfiles:
+ ps = pstats.Stats(f)
+ ps.strip_dirs()
+ ps.sort_stats('time')
+ ps.print_stats()
+ ps.sort_stats('cumulative')
+ ps.print_stats()
+
+
+if __name__ == '__main__':
+ statsfiles = parse_args()
+ format_stats(statsfiles)
diff --git a/scripts/profiling/storage/profile-storage.py b/scripts/profiling/storage/profile-storage.py
new file mode 100755
index 00000000..305e6d5a
--- /dev/null
+++ b/scripts/profiling/storage/profile-storage.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python
+
+import os
+import logging
+import getpass
+import tempfile
+import argparse
+import cProfile
+import shutil
+import pstats
+import StringIO
+import datetime
+
+
+from client_side_db import get_soledad_instance
+from util import ValidateUserHandle
+
+# profiling args
+NUM_DOCS = 1
+DOC_SIZE = 1024**2
+
+
+# create a logger
+logger = logging.getLogger(__name__)
+LOG_FORMAT = '%(asctime)s %(message)s'
+logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+def parse_args():
+ # parse command line
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'user@provider', action=ValidateUserHandle, help='the user handle')
+ parser.add_argument(
+ '-b', dest='basedir', required=False, default=None,
+ help='soledad base directory')
+ parser.add_argument(
+ '-p', dest='passphrase', required=False, default=None,
+ help='the user passphrase')
+ parser.add_argument(
+ '-d', dest='logdir', required=False, default='/tmp/',
+ help='the direcroty to which write the profile stats')
+ args = parser.parse_args()
+ # get the password
+ passphrase = args.passphrase
+ if passphrase is None:
+ passphrase = getpass.getpass(
+ 'Password for %s@%s: ' % (args.username, args.provider))
+ # get the basedir
+ basedir = args.basedir
+ if basedir is None:
+ basedir = tempfile.mkdtemp()
+ logger.info('Using %s as base directory.' % basedir)
+
+ return args.username, args.provider, passphrase, basedir, args.logdir
+
+created_docs = []
+
+def create_docs(sol, content):
+ for i in xrange(NUM_DOCS):
+ doc = sol.create_doc(content)
+ created_docs.append(doc.doc_id)
+
+def get_all_docs(sol):
+ for doc_id in created_docs:
+ sol.get_doc(doc_id)
+
+def do_profile(logdir, sol):
+ fname_prefix = os.path.join(
+ logdir,
+ "profile_%s" \
+ % datetime.datetime.now().strftime('%Y-%m-%d_%H-%m-%S'))
+
+ # profile create docs
+ content = {'data': os.urandom(DOC_SIZE/2).encode('hex')}
+ pr = cProfile.Profile()
+ pr.runcall(
+ create_docs,
+ sol, content)
+ s = StringIO.StringIO()
+ ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
+ ps.print_stats()
+ ps.dump_stats("%s_creation.stats" % fname_prefix)
+ print s.getvalue()
+
+ # profile get all docs
+ pr = cProfile.Profile()
+ pr.runcall(
+ get_all_docs,
+ sol)
+ s = StringIO.StringIO()
+ ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
+ ps.dump_stats("%s_retrieval.stats" % fname_prefix)
+ ps.print_stats()
+ print s.getvalue()
+
+
+if __name__ == '__main__':
+ username, provider, passphrase, basedir, logdir = parse_args()
+ sol = get_soledad_instance(
+ username,
+ provider,
+ passphrase,
+ basedir)
+ do_profile(logdir, sol)
+ shutil.rmtree(basedir)
+
diff --git a/scripts/profiling/storage/util.py b/scripts/profiling/storage/util.py
new file mode 120000
index 00000000..7f16d684
--- /dev/null
+++ b/scripts/profiling/storage/util.py
@@ -0,0 +1 @@
+../util.py \ No newline at end of file