diff options
Diffstat (limited to 'scripts/profiling/storage')
| -rw-r--r-- | scripts/profiling/storage/benchmark-storage.py | 104 | ||||
| -rw-r--r-- | scripts/profiling/storage/benchmark_storage_utils.py | 4 | ||||
| l--------- | scripts/profiling/storage/client_side_db.py | 1 | ||||
| -rwxr-xr-x | scripts/profiling/storage/plot.py | 94 | ||||
| -rw-r--r-- | scripts/profiling/storage/profile-format.py | 29 | ||||
| -rwxr-xr-x | scripts/profiling/storage/profile-storage.py | 107 | ||||
| l--------- | scripts/profiling/storage/util.py | 1 | 
7 files changed, 340 insertions, 0 deletions
| diff --git a/scripts/profiling/storage/benchmark-storage.py b/scripts/profiling/storage/benchmark-storage.py new file mode 100644 index 00000000..79ee3270 --- /dev/null +++ b/scripts/profiling/storage/benchmark-storage.py @@ -0,0 +1,104 @@ +#!/usr/bin/python + +# scenarios: +#   1. soledad instantiation time. +#     a. for unexisting db. +#     b. for existing db. +#   2. soledad doc storage/retrieval. +#     a. 1 KB document. +#     b  10 KB. +#     c. 100 KB. +#     d. 1 MB. + + +import logging +import getpass +import tempfile +import argparse +import shutil +import timeit + + +from util import ValidateUserHandle + +# benchmarking args +REPEAT_NUMBER = 1000 +DOC_SIZE = 1024 + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +def parse_args(): +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        'user@provider', action=ValidateUserHandle, help='the user handle') +    parser.add_argument( +        '-b', dest='basedir', required=False, default=None, +        help='soledad base directory') +    parser.add_argument( +        '-p', dest='passphrase', required=False, default=None, +        help='the user passphrase') +    parser.add_argument( +        '-l', dest='logfile', required=False, default='/tmp/benchhmark-storage.log', +        help='the file to which write the benchmark logs') +    args = parser.parse_args() +    # get the password +    passphrase = args.passphrase +    if passphrase is None: +        passphrase = getpass.getpass( +            'Password for %s@%s: ' % (args.username, args.provider)) +    # get the basedir +    basedir = args.basedir +    if basedir is None: +        basedir = tempfile.mkdtemp() +    logger.info('Using %s as base directory.' % basedir) + +    return args.username, args.provider, passphrase, basedir, args.logfile + + +if __name__ == '__main__': +    username, provider, passphrase, basedir, logfile = parse_args() +    create_results = [] +    getall_results = [] +    for i in [1, 200, 400, 600, 800, 1000]: +        tempdir = tempfile.mkdtemp(dir=basedir) +        setup_common = """ +import os +#from benchmark_storage_utils import benchmark_fun +#from benchmark_storage_utils import get_soledad_instance +from client_side_db import get_soledad_instance +sol = get_soledad_instance('%s', '%s', '%s', '%s') +        """ % (username, provider, passphrase, tempdir) + +        setup_create = setup_common + """ +content = {'data': os.urandom(%d/2).encode('hex')} +""" % (DOC_SIZE * i) +        time = timeit.timeit( +            'sol.create_doc(content);', +            setup=setup_create, number=REPEAT_NUMBER) +        create_results.append((DOC_SIZE*i, time)) +        print "CREATE: %d %f" % (DOC_SIZE*i, time) + +        setup_get = setup_common + """ +doc_ids = [doc.doc_id for doc in sol.get_all_docs()[1]] +""" + +        time = timeit.timeit( +            "[sol.get_doc(doc_id) for doc_id in doc_ids]", +            setup=setup_get, number=1) +        getall_results.append((DOC_SIZE*i, time)) +        print "GET_ALL: %d %f" % (DOC_SIZE*i, time) +        shutil.rmtree(tempdir) +    print "# size, time for creation of %d docs" % REPEAT_NUMBER +    for size, time in create_results: +        print size, time +    print "# size, time for retrieval of %d docs" % REPEAT_NUMBER +    for size, time in getall_results: +        print size, time +    shutil.rmtree(basedir) + diff --git a/scripts/profiling/storage/benchmark_storage_utils.py b/scripts/profiling/storage/benchmark_storage_utils.py new file mode 100644 index 00000000..fa8bb658 --- /dev/null +++ b/scripts/profiling/storage/benchmark_storage_utils.py @@ -0,0 +1,4 @@ +from client_side_db import get_soledad_instance + +def benchmark_fun(sol, content): +    sol.create_doc(content) diff --git a/scripts/profiling/storage/client_side_db.py b/scripts/profiling/storage/client_side_db.py new file mode 120000 index 00000000..9e49a7f0 --- /dev/null +++ b/scripts/profiling/storage/client_side_db.py @@ -0,0 +1 @@ +../../db_access/client_side_db.py
\ No newline at end of file diff --git a/scripts/profiling/storage/plot.py b/scripts/profiling/storage/plot.py new file mode 100755 index 00000000..280b9375 --- /dev/null +++ b/scripts/profiling/storage/plot.py @@ -0,0 +1,94 @@ +#!/usr/bin/python + + +# Create a plot of the results of running the ./benchmark-storage.py script. + + +import argparse +from matplotlib import pyplot as plt + +from sets import Set + + +def plot(filename, subtitle=''): + +    # config the plot +    plt.xlabel('doc size (KB)') +    plt.ylabel('operation time (s)') +    title = 'soledad 1000 docs creation/retrieval times' +    if subtitle != '': +        title += '- %s' % subtitle +    plt.title(title) + +    x = Set() +    ycreate = [] +    yget = [] + +    ys = [] +    #ys.append((ycreate, 'creation time', 'r', '-')) +    #ys.append((yget, 'retrieval time', 'b', '-')) + +    # read data from file +    with open(filename, 'r') as f: +        f.readline() +        for i in xrange(6): +            size, y = f.readline().strip().split(' ') +            x.add(int(size)) +            ycreate.append(float(y)) + +        f.readline() +        for i in xrange(6): +            size, y = f.readline().strip().split(' ') +            x.add(int(size)) +            yget.append(float(y)) + +    # get doc size in KB +    x = list(x) +    x.sort() +    x = map(lambda val: val / 1024, x) + +    # get normalized results per KB +    nycreate = [] +    nyget = [] +    for i in xrange(len(x)): +        nycreate.append(ycreate[i]/x[i]) +        nyget.append(yget[i]/x[i]) + +    ys.append((nycreate, 'creation time per KB', 'r', '-.')) +    ys.append((nyget, 'retrieval time per KB', 'b', '-.')) + +    for y in ys: +        kwargs = { +            'linewidth': 1.0, +            'marker': '.', +            'color': y[2], +            'linestyle': y[3], +        } +        # normalize by doc size +        plt.plot( +            x, +            y[0], +            label=y[1], **kwargs) + +    #plt.axes().get_xaxis().set_ticks(x) +    #plt.axes().get_xaxis().set_ticklabels(x) + +    # annotate max and min values +    plt.xlim(0, 1100) +    #plt.ylim(0, 350) +    plt.grid() +    plt.legend() +    plt.show() + + +if __name__ == '__main__': +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        'datafile', +        help='the data file to plot') +    parser.add_argument( +        '-s', dest='subtitle', required=False, default='', +        help='a subtitle for the plot') +    args = parser.parse_args() +    plot(args.datafile, args.subtitle) diff --git a/scripts/profiling/storage/profile-format.py b/scripts/profiling/storage/profile-format.py new file mode 100644 index 00000000..262a52ab --- /dev/null +++ b/scripts/profiling/storage/profile-format.py @@ -0,0 +1,29 @@ +#!/usr/bin/python + +import argparse +import pstats + + +def parse_args(): +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        '-f', dest='statsfiles', action='append', required=True, +        help='a stats file') +    args = parser.parse_args() +    return args.statsfiles + + +def format_stats(statsfiles): +    for f in statsfiles: +        ps = pstats.Stats(f) +        ps.strip_dirs() +        ps.sort_stats('time') +        ps.print_stats() +        ps.sort_stats('cumulative') +        ps.print_stats() + + +if __name__ == '__main__': +    statsfiles = parse_args() +    format_stats(statsfiles) diff --git a/scripts/profiling/storage/profile-storage.py b/scripts/profiling/storage/profile-storage.py new file mode 100755 index 00000000..305e6d5a --- /dev/null +++ b/scripts/profiling/storage/profile-storage.py @@ -0,0 +1,107 @@ +#!/usr/bin/python + +import os +import logging +import getpass +import tempfile +import argparse +import cProfile +import shutil +import pstats +import StringIO +import datetime + + +from client_side_db import get_soledad_instance +from util import ValidateUserHandle + +# profiling args +NUM_DOCS = 1 +DOC_SIZE = 1024**2 + + +# create a logger +logger = logging.getLogger(__name__) +LOG_FORMAT = '%(asctime)s %(message)s' +logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + +def parse_args(): +    # parse command line +    parser = argparse.ArgumentParser() +    parser.add_argument( +        'user@provider', action=ValidateUserHandle, help='the user handle') +    parser.add_argument( +        '-b', dest='basedir', required=False, default=None, +        help='soledad base directory') +    parser.add_argument( +        '-p', dest='passphrase', required=False, default=None, +        help='the user passphrase') +    parser.add_argument( +        '-d', dest='logdir', required=False, default='/tmp/', +        help='the direcroty to which write the profile stats') +    args = parser.parse_args() +    # get the password +    passphrase = args.passphrase +    if passphrase is None: +        passphrase = getpass.getpass( +            'Password for %s@%s: ' % (args.username, args.provider)) +    # get the basedir +    basedir = args.basedir +    if basedir is None: +        basedir = tempfile.mkdtemp() +    logger.info('Using %s as base directory.' % basedir) + +    return args.username, args.provider, passphrase, basedir, args.logdir + +created_docs = [] + +def create_docs(sol, content): +    for i in xrange(NUM_DOCS): +        doc = sol.create_doc(content) +        created_docs.append(doc.doc_id) +         +def get_all_docs(sol): +    for doc_id in created_docs: +        sol.get_doc(doc_id) + +def do_profile(logdir, sol): +    fname_prefix = os.path.join( +        logdir, +        "profile_%s" \ +        % datetime.datetime.now().strftime('%Y-%m-%d_%H-%m-%S')) + +    # profile create docs +    content = {'data': os.urandom(DOC_SIZE/2).encode('hex')} +    pr = cProfile.Profile() +    pr.runcall( +        create_docs, +        sol, content) +    s = StringIO.StringIO() +    ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') +    ps.print_stats() +    ps.dump_stats("%s_creation.stats" % fname_prefix) +    print s.getvalue() + +    # profile get all docs +    pr = cProfile.Profile() +    pr.runcall( +        get_all_docs, +        sol) +    s = StringIO.StringIO() +    ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') +    ps.dump_stats("%s_retrieval.stats" % fname_prefix) +    ps.print_stats() +    print s.getvalue() + + +if __name__ == '__main__': +    username, provider, passphrase, basedir, logdir = parse_args() +    sol = get_soledad_instance( +        username, +        provider, +        passphrase, +        basedir) +    do_profile(logdir, sol) +    shutil.rmtree(basedir) + diff --git a/scripts/profiling/storage/util.py b/scripts/profiling/storage/util.py new file mode 120000 index 00000000..7f16d684 --- /dev/null +++ b/scripts/profiling/storage/util.py @@ -0,0 +1 @@ +../util.py
\ No newline at end of file | 
