From 2f1ee76a7169abc100efdf706f12a0abf6032f04 Mon Sep 17 00:00:00 2001 From: drebs Date: Mon, 8 Sep 2014 17:12:45 -0300 Subject: Add benchmarking scripts. --- scripts/profiling/storage/benchmark-storage.py | 104 ++++++++++++++++++++ .../profiling/storage/benchmark_storage_utils.py | 4 + scripts/profiling/storage/client_side_db.py | 1 + scripts/profiling/storage/plot.py | 94 ++++++++++++++++++ scripts/profiling/storage/profile-format.py | 29 ++++++ scripts/profiling/storage/profile-storage.py | 107 +++++++++++++++++++++ scripts/profiling/storage/util.py | 1 + 7 files changed, 340 insertions(+) create mode 100644 scripts/profiling/storage/benchmark-storage.py create mode 100644 scripts/profiling/storage/benchmark_storage_utils.py create mode 120000 scripts/profiling/storage/client_side_db.py create mode 100755 scripts/profiling/storage/plot.py create mode 100644 scripts/profiling/storage/profile-format.py create mode 100755 scripts/profiling/storage/profile-storage.py create mode 120000 scripts/profiling/storage/util.py (limited to 'scripts/profiling/storage') diff --git a/scripts/profiling/storage/benchmark-storage.py b/scripts/profiling/storage/benchmark-storage.py new file mode 100644 index 00000000..79ee3270 --- /dev/null +++ b/scripts/profiling/storage/benchmark-storage.py @@ -0,0 +1,104 @@ +#!/usr/bin/python + +# scenarios: +# 1. soledad instantiation time. +# a. for unexisting db. +# b. for existing db. +# 2. soledad doc storage/retrieval. +# a. 1 KB document. +# b 10 KB. +# c. 100 KB. +# d. 1 MB. + + +import logging +import getpass +import tempfile +import argparse +import shutil +import timeit + + +from util import ValidateUserHandle + +# benchmarking args +REPEAT_NUMBER = 1000 +DOC_SIZE = 1024 + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +def parse_args(): + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + 'user@provider', action=ValidateUserHandle, help='the user handle') + parser.add_argument( + '-b', dest='basedir', required=False, default=None, + help='soledad base directory') + parser.add_argument( + '-p', dest='passphrase', required=False, default=None, + help='the user passphrase') + parser.add_argument( + '-l', dest='logfile', required=False, default='/tmp/benchhmark-storage.log', + help='the file to which write the benchmark logs') + args = parser.parse_args() + # get the password + passphrase = args.passphrase + if passphrase is None: + passphrase = getpass.getpass( + 'Password for %s@%s: ' % (args.username, args.provider)) + # get the basedir + basedir = args.basedir + if basedir is None: + basedir = tempfile.mkdtemp() + logger.info('Using %s as base directory.' % basedir) + + return args.username, args.provider, passphrase, basedir, args.logfile + + +if __name__ == '__main__': + username, provider, passphrase, basedir, logfile = parse_args() + create_results = [] + getall_results = [] + for i in [1, 200, 400, 600, 800, 1000]: + tempdir = tempfile.mkdtemp(dir=basedir) + setup_common = """ +import os +#from benchmark_storage_utils import benchmark_fun +#from benchmark_storage_utils import get_soledad_instance +from client_side_db import get_soledad_instance +sol = get_soledad_instance('%s', '%s', '%s', '%s') + """ % (username, provider, passphrase, tempdir) + + setup_create = setup_common + """ +content = {'data': os.urandom(%d/2).encode('hex')} +""" % (DOC_SIZE * i) + time = timeit.timeit( + 'sol.create_doc(content);', + setup=setup_create, number=REPEAT_NUMBER) + create_results.append((DOC_SIZE*i, time)) + print "CREATE: %d %f" % (DOC_SIZE*i, time) + + setup_get = setup_common + """ +doc_ids = [doc.doc_id for doc in sol.get_all_docs()[1]] +""" + + time = timeit.timeit( + "[sol.get_doc(doc_id) for doc_id in doc_ids]", + setup=setup_get, number=1) + getall_results.append((DOC_SIZE*i, time)) + print "GET_ALL: %d %f" % (DOC_SIZE*i, time) + shutil.rmtree(tempdir) + print "# size, time for creation of %d docs" % REPEAT_NUMBER + for size, time in create_results: + print size, time + print "# size, time for retrieval of %d docs" % REPEAT_NUMBER + for size, time in getall_results: + print size, time + shutil.rmtree(basedir) + diff --git a/scripts/profiling/storage/benchmark_storage_utils.py b/scripts/profiling/storage/benchmark_storage_utils.py new file mode 100644 index 00000000..fa8bb658 --- /dev/null +++ b/scripts/profiling/storage/benchmark_storage_utils.py @@ -0,0 +1,4 @@ +from client_side_db import get_soledad_instance + +def benchmark_fun(sol, content): + sol.create_doc(content) diff --git a/scripts/profiling/storage/client_side_db.py b/scripts/profiling/storage/client_side_db.py new file mode 120000 index 00000000..9e49a7f0 --- /dev/null +++ b/scripts/profiling/storage/client_side_db.py @@ -0,0 +1 @@ +../../db_access/client_side_db.py \ No newline at end of file diff --git a/scripts/profiling/storage/plot.py b/scripts/profiling/storage/plot.py new file mode 100755 index 00000000..280b9375 --- /dev/null +++ b/scripts/profiling/storage/plot.py @@ -0,0 +1,94 @@ +#!/usr/bin/python + + +# Create a plot of the results of running the ./benchmark-storage.py script. + + +import argparse +from matplotlib import pyplot as plt + +from sets import Set + + +def plot(filename, subtitle=''): + + # config the plot + plt.xlabel('doc size (KB)') + plt.ylabel('operation time (s)') + title = 'soledad 1000 docs creation/retrieval times' + if subtitle != '': + title += '- %s' % subtitle + plt.title(title) + + x = Set() + ycreate = [] + yget = [] + + ys = [] + #ys.append((ycreate, 'creation time', 'r', '-')) + #ys.append((yget, 'retrieval time', 'b', '-')) + + # read data from file + with open(filename, 'r') as f: + f.readline() + for i in xrange(6): + size, y = f.readline().strip().split(' ') + x.add(int(size)) + ycreate.append(float(y)) + + f.readline() + for i in xrange(6): + size, y = f.readline().strip().split(' ') + x.add(int(size)) + yget.append(float(y)) + + # get doc size in KB + x = list(x) + x.sort() + x = map(lambda val: val / 1024, x) + + # get normalized results per KB + nycreate = [] + nyget = [] + for i in xrange(len(x)): + nycreate.append(ycreate[i]/x[i]) + nyget.append(yget[i]/x[i]) + + ys.append((nycreate, 'creation time per KB', 'r', '-.')) + ys.append((nyget, 'retrieval time per KB', 'b', '-.')) + + for y in ys: + kwargs = { + 'linewidth': 1.0, + 'marker': '.', + 'color': y[2], + 'linestyle': y[3], + } + # normalize by doc size + plt.plot( + x, + y[0], + label=y[1], **kwargs) + + #plt.axes().get_xaxis().set_ticks(x) + #plt.axes().get_xaxis().set_ticklabels(x) + + # annotate max and min values + plt.xlim(0, 1100) + #plt.ylim(0, 350) + plt.grid() + plt.legend() + plt.show() + + +if __name__ == '__main__': + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + 'datafile', + help='the data file to plot') + parser.add_argument( + '-s', dest='subtitle', required=False, default='', + help='a subtitle for the plot') + args = parser.parse_args() + plot(args.datafile, args.subtitle) diff --git a/scripts/profiling/storage/profile-format.py b/scripts/profiling/storage/profile-format.py new file mode 100644 index 00000000..262a52ab --- /dev/null +++ b/scripts/profiling/storage/profile-format.py @@ -0,0 +1,29 @@ +#!/usr/bin/python + +import argparse +import pstats + + +def parse_args(): + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + '-f', dest='statsfiles', action='append', required=True, + help='a stats file') + args = parser.parse_args() + return args.statsfiles + + +def format_stats(statsfiles): + for f in statsfiles: + ps = pstats.Stats(f) + ps.strip_dirs() + ps.sort_stats('time') + ps.print_stats() + ps.sort_stats('cumulative') + ps.print_stats() + + +if __name__ == '__main__': + statsfiles = parse_args() + format_stats(statsfiles) diff --git a/scripts/profiling/storage/profile-storage.py b/scripts/profiling/storage/profile-storage.py new file mode 100755 index 00000000..305e6d5a --- /dev/null +++ b/scripts/profiling/storage/profile-storage.py @@ -0,0 +1,107 @@ +#!/usr/bin/python + +import os +import logging +import getpass +import tempfile +import argparse +import cProfile +import shutil +import pstats +import StringIO +import datetime + + +from client_side_db import get_soledad_instance +from util import ValidateUserHandle + +# profiling args +NUM_DOCS = 1 +DOC_SIZE = 1024**2 + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +def parse_args(): + # parse command line + parser = argparse.ArgumentParser() + parser.add_argument( + 'user@provider', action=ValidateUserHandle, help='the user handle') + parser.add_argument( + '-b', dest='basedir', required=False, default=None, + help='soledad base directory') + parser.add_argument( + '-p', dest='passphrase', required=False, default=None, + help='the user passphrase') + parser.add_argument( + '-d', dest='logdir', required=False, default='/tmp/', + help='the direcroty to which write the profile stats') + args = parser.parse_args() + # get the password + passphrase = args.passphrase + if passphrase is None: + passphrase = getpass.getpass( + 'Password for %s@%s: ' % (args.username, args.provider)) + # get the basedir + basedir = args.basedir + if basedir is None: + basedir = tempfile.mkdtemp() + logger.info('Using %s as base directory.' % basedir) + + return args.username, args.provider, passphrase, basedir, args.logdir + +created_docs = [] + +def create_docs(sol, content): + for i in xrange(NUM_DOCS): + doc = sol.create_doc(content) + created_docs.append(doc.doc_id) + +def get_all_docs(sol): + for doc_id in created_docs: + sol.get_doc(doc_id) + +def do_profile(logdir, sol): + fname_prefix = os.path.join( + logdir, + "profile_%s" \ + % datetime.datetime.now().strftime('%Y-%m-%d_%H-%m-%S')) + + # profile create docs + content = {'data': os.urandom(DOC_SIZE/2).encode('hex')} + pr = cProfile.Profile() + pr.runcall( + create_docs, + sol, content) + s = StringIO.StringIO() + ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') + ps.print_stats() + ps.dump_stats("%s_creation.stats" % fname_prefix) + print s.getvalue() + + # profile get all docs + pr = cProfile.Profile() + pr.runcall( + get_all_docs, + sol) + s = StringIO.StringIO() + ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') + ps.dump_stats("%s_retrieval.stats" % fname_prefix) + ps.print_stats() + print s.getvalue() + + +if __name__ == '__main__': + username, provider, passphrase, basedir, logdir = parse_args() + sol = get_soledad_instance( + username, + provider, + passphrase, + basedir) + do_profile(logdir, sol) + shutil.rmtree(basedir) + diff --git a/scripts/profiling/storage/util.py b/scripts/profiling/storage/util.py new file mode 120000 index 00000000..7f16d684 --- /dev/null +++ b/scripts/profiling/storage/util.py @@ -0,0 +1 @@ +../util.py \ No newline at end of file -- cgit v1.2.3