Add benchmarking scripts.

author: drebs <drebs@leap.se> 2014-09-08 17:12:45 -0300
committer: drebs <drebs@leap.se> 2014-09-10 10:13:58 -0300
commit: 2f1ee76a7169abc100efdf706f12a0abf6032f04 (patch)
tree: 2b4413b685a06abbf939cb270e5f0d3e7c29e154 /scripts/profiling/storage
parent: 3ab68fd26bae17c82dbbb0c0171933b8a7540c73 (diff)
7 files changed, 340 insertions, 0 deletions
diff --git a/scripts/profiling/storage/benchmark-storage.py b/scripts/profiling/storage/benchmark-storage.py
new file mode 100644
index 00000000..79ee3270
--- /dev/null
+++ b/scripts/profiling/storage/benchmark-storage.py
@@ -0,0 +1,104 @@
+#!/usr/bin/python
+
+# scenarios:
+#   1. soledad instantiation time.
+#     a. for unexisting db.
+#     b. for existing db.
+#   2. soledad doc storage/retrieval.
+#     a. 1 KB document.
+#     b  10 KB.
+#     c. 100 KB.
+#     d. 1 MB.
+
+
+import logging
+import getpass
+import tempfile
+import argparse
+import shutil
+import timeit
+
+
+from util import ValidateUserHandle
+
+# benchmarking args
+REPEAT_NUMBER = 1000
+DOC_SIZE = 1024
+
+
+# create a logger
+logger = logging.getLogger(__name__)
+LOG_FORMAT = '%(asctime)s %(message)s'
+logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+def parse_args():
+    # parse command line
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'user@provider', action=ValidateUserHandle, help='the user handle')
+    parser.add_argument(
+        '-b', dest='basedir', required=False, default=None,
+        help='soledad base directory')
+    parser.add_argument(
+        '-p', dest='passphrase', required=False, default=None,
+        help='the user passphrase')
+    parser.add_argument(
+        '-l', dest='logfile', required=False, default='/tmp/benchhmark-storage.log',
+        help='the file to which write the benchmark logs')
+    args = parser.parse_args()
+    # get the password
+    passphrase = args.passphrase
+    if passphrase is None:
+        passphrase = getpass.getpass(
+            'Password for %s@%s: ' % (args.username, args.provider))
+    # get the basedir
+    basedir = args.basedir
+    if basedir is None:
+        basedir = tempfile.mkdtemp()
+    logger.info('Using %s as base directory.' % basedir)
+
+    return args.username, args.provider, passphrase, basedir, args.logfile
+
+
+if __name__ == '__main__':
+    username, provider, passphrase, basedir, logfile = parse_args()
+    create_results = []
+    getall_results = []
+    for i in [1, 200, 400, 600, 800, 1000]:
+        tempdir = tempfile.mkdtemp(dir=basedir)
+        setup_common = """
+import os
+#from benchmark_storage_utils import benchmark_fun
+#from benchmark_storage_utils import get_soledad_instance
+from client_side_db import get_soledad_instance
+sol = get_soledad_instance('%s', '%s', '%s', '%s')
+        """ % (username, provider, passphrase, tempdir)
+
+        setup_create = setup_common + """
+content = {'data': os.urandom(%d/2).encode('hex')}
+""" % (DOC_SIZE * i)
+        time = timeit.timeit(
+            'sol.create_doc(content);',
+            setup=setup_create, number=REPEAT_NUMBER)
+        create_results.append((DOC_SIZE*i, time))
+        print "CREATE: %d %f" % (DOC_SIZE*i, time)
+
+        setup_get = setup_common + """
+doc_ids = [doc.doc_id for doc in sol.get_all_docs()[1]]
+"""
+
+        time = timeit.timeit(
+            "[sol.get_doc(doc_id) for doc_id in doc_ids]",
+            setup=setup_get, number=1)
+        getall_results.append((DOC_SIZE*i, time))
+        print "GET_ALL: %d %f" % (DOC_SIZE*i, time)
+        shutil.rmtree(tempdir)
+    print "# size, time for creation of %d docs" % REPEAT_NUMBER
+    for size, time in create_results:
+        print size, time
+    print "# size, time for retrieval of %d docs" % REPEAT_NUMBER
+    for size, time in getall_results:
+        print size, time
+    shutil.rmtree(basedir)
+
diff --git a/scripts/profiling/storage/benchmark_storage_utils.py b/scripts/profiling/storage/benchmark_storage_utils.py
new file mode 100644
index 00000000..fa8bb658
--- /dev/null
+++ b/scripts/profiling/storage/benchmark_storage_utils.py
@@ -0,0 +1,4 @@
+from client_side_db import get_soledad_instance
+
+def benchmark_fun(sol, content):
+    sol.create_doc(content)
diff --git a/scripts/profiling/storage/client_side_db.py b/scripts/profiling/storage/client_side_db.py
new file mode 120000
index 00000000..9e49a7f0
--- /dev/null
+++ b/scripts/profiling/storage/client_side_db.py
@@ -0,0 +1 @@
+../../db_access/client_side_db.py
+\ No newline at end of file
diff --git a/scripts/profiling/storage/plot.py b/scripts/profiling/storage/plot.py
new file mode 100755
index 00000000..280b9375
--- /dev/null
+++ b/scripts/profiling/storage/plot.py
@@ -0,0 +1,94 @@
+#!/usr/bin/python
+
+
+# Create a plot of the results of running the ./benchmark-storage.py script.
+
+
+import argparse
+from matplotlib import pyplot as plt
+
+from sets import Set
+
+
+def plot(filename, subtitle=''):
+
+    # config the plot
+    plt.xlabel('doc size (KB)')
+    plt.ylabel('operation time (s)')
+    title = 'soledad 1000 docs creation/retrieval times'
+    if subtitle != '':
+        title += '- %s' % subtitle
+    plt.title(title)
+
+    x = Set()
+    ycreate = []
+    yget = []
+
+    ys = []
+    #ys.append((ycreate, 'creation time', 'r', '-'))
+    #ys.append((yget, 'retrieval time', 'b', '-'))
+
+    # read data from file
+    with open(filename, 'r') as f:
+        f.readline()
+        for i in xrange(6):
+            size, y = f.readline().strip().split(' ')
+            x.add(int(size))
+            ycreate.append(float(y))
+
+        f.readline()
+        for i in xrange(6):
+            size, y = f.readline().strip().split(' ')
+            x.add(int(size))
+            yget.append(float(y))
+
+    # get doc size in KB
+    x = list(x)
+    x.sort()
+    x = map(lambda val: val / 1024, x)
+
+    # get normalized results per KB
+    nycreate = []
+    nyget = []
+    for i in xrange(len(x)):
+        nycreate.append(ycreate[i]/x[i])
+        nyget.append(yget[i]/x[i])
+
+    ys.append((nycreate, 'creation time per KB', 'r', '-.'))
+    ys.append((nyget, 'retrieval time per KB', 'b', '-.'))
+
+    for y in ys:
+        kwargs = {
+            'linewidth': 1.0,
+            'marker': '.',
+            'color': y[2],
+            'linestyle': y[3],
+        }
+        # normalize by doc size
+        plt.plot(
+            x,
+            y[0],
+            label=y[1], **kwargs)
+
+    #plt.axes().get_xaxis().set_ticks(x)
+    #plt.axes().get_xaxis().set_ticklabels(x)
+
+    # annotate max and min values
+    plt.xlim(0, 1100)
+    #plt.ylim(0, 350)
+    plt.grid()
+    plt.legend()
+    plt.show()
+
+
+if __name__ == '__main__':
+    # parse command line
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'datafile',
+        help='the data file to plot')
+    parser.add_argument(
+        '-s', dest='subtitle', required=False, default='',
+        help='a subtitle for the plot')
+    args = parser.parse_args()
+    plot(args.datafile, args.subtitle)
diff --git a/scripts/profiling/storage/profile-format.py b/scripts/profiling/storage/profile-format.py
new file mode 100644
index 00000000..262a52ab
--- /dev/null
+++ b/scripts/profiling/storage/profile-format.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+
+import argparse
+import pstats
+
+
+def parse_args():
+    # parse command line
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-f', dest='statsfiles', action='append', required=True,
+        help='a stats file')
+    args = parser.parse_args()
+    return args.statsfiles
+
+
+def format_stats(statsfiles):
+    for f in statsfiles:
+        ps = pstats.Stats(f)
+        ps.strip_dirs()
+        ps.sort_stats('time')
+        ps.print_stats()
+        ps.sort_stats('cumulative')
+        ps.print_stats()
+
+
+if __name__ == '__main__':
+    statsfiles = parse_args()
+    format_stats(statsfiles)
diff --git a/scripts/profiling/storage/profile-storage.py b/scripts/profiling/storage/profile-storage.py
new file mode 100755
index 00000000..305e6d5a
--- /dev/null
+++ b/scripts/profiling/storage/profile-storage.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python
+
+import os
+import logging
+import getpass
+import tempfile
+import argparse
+import cProfile
+import shutil
+import pstats
+import StringIO
+import datetime
+
+
+from client_side_db import get_soledad_instance
+from util import ValidateUserHandle
+
+# profiling args
+NUM_DOCS = 1
+DOC_SIZE = 1024**2
+
+
+# create a logger
+logger = logging.getLogger(__name__)
+LOG_FORMAT = '%(asctime)s %(message)s'
+logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+
+def parse_args():
+    # parse command line
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'user@provider', action=ValidateUserHandle, help='the user handle')
+    parser.add_argument(
+        '-b', dest='basedir', required=False, default=None,
+        help='soledad base directory')
+    parser.add_argument(
+        '-p', dest='passphrase', required=False, default=None,
+        help='the user passphrase')
+    parser.add_argument(
+        '-d', dest='logdir', required=False, default='/tmp/',
+        help='the direcroty to which write the profile stats')
+    args = parser.parse_args()
+    # get the password
+    passphrase = args.passphrase
+    if passphrase is None:
+        passphrase = getpass.getpass(
+            'Password for %s@%s: ' % (args.username, args.provider))
+    # get the basedir
+    basedir = args.basedir
+    if basedir is None:
+        basedir = tempfile.mkdtemp()
+    logger.info('Using %s as base directory.' % basedir)
+
+    return args.username, args.provider, passphrase, basedir, args.logdir
+
+created_docs = []
+
+def create_docs(sol, content):
+    for i in xrange(NUM_DOCS):
+        doc = sol.create_doc(content)
+        created_docs.append(doc.doc_id)
+        
+def get_all_docs(sol):
+    for doc_id in created_docs:
+        sol.get_doc(doc_id)
+
+def do_profile(logdir, sol):
+    fname_prefix = os.path.join(
+        logdir,
+        "profile_%s" \
+        % datetime.datetime.now().strftime('%Y-%m-%d_%H-%m-%S'))
+
+    # profile create docs
+    content = {'data': os.urandom(DOC_SIZE/2).encode('hex')}
+    pr = cProfile.Profile()
+    pr.runcall(
+        create_docs,
+        sol, content)
+    s = StringIO.StringIO()
+    ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
+    ps.print_stats()
+    ps.dump_stats("%s_creation.stats" % fname_prefix)
+    print s.getvalue()
+
+    # profile get all docs
+    pr = cProfile.Profile()
+    pr.runcall(
+        get_all_docs,
+        sol)
+    s = StringIO.StringIO()
+    ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
+    ps.dump_stats("%s_retrieval.stats" % fname_prefix)
+    ps.print_stats()
+    print s.getvalue()
+
+
+if __name__ == '__main__':
+    username, provider, passphrase, basedir, logdir = parse_args()
+    sol = get_soledad_instance(
+        username,
+        provider,
+        passphrase,
+        basedir)
+    do_profile(logdir, sol)
+    shutil.rmtree(basedir)
+
diff --git a/scripts/profiling/storage/util.py b/scripts/profiling/storage/util.py
new file mode 120000
index 00000000..7f16d684
--- /dev/null
+++ b/scripts/profiling/storage/util.py
@@ -0,0 +1 @@
+../util.py
+\ No newline at end of file
author	drebs <drebs@leap.se>	2014-09-08 17:12:45 -0300
committer	drebs <drebs@leap.se>	2014-09-10 10:13:58 -0300
commit	2f1ee76a7169abc100efdf706f12a0abf6032f04 (patch)
tree	2b4413b685a06abbf939cb270e5f0d3e7c29e154 /scripts/profiling/storage
parent	3ab68fd26bae17c82dbbb0c0171933b8a7540c73 (diff)