diff options
| -rw-r--r-- | scripts/db_access/client_side_db.py (renamed from scripts/client_side_db.py) | 0 | ||||
| -rw-r--r-- | scripts/db_access/server_side_db.py (renamed from scripts/server_side_db.py) | 0 | ||||
| -rwxr-xr-x | scripts/doc_put_memory_usage/find_max_upload_size.py (renamed from scripts/find_max_upload_size.py) | 116 | ||||
| -rwxr-xr-x | scripts/doc_put_memory_usage/get-mem.py | 16 | ||||
| -rwxr-xr-x | scripts/doc_put_memory_usage/plot-mem.py | 73 | 
5 files changed, 147 insertions, 58 deletions
| diff --git a/scripts/client_side_db.py b/scripts/db_access/client_side_db.py index 15980f5d..15980f5d 100644 --- a/scripts/client_side_db.py +++ b/scripts/db_access/client_side_db.py diff --git a/scripts/server_side_db.py b/scripts/db_access/server_side_db.py index 18641a0f..18641a0f 100644 --- a/scripts/server_side_db.py +++ b/scripts/db_access/server_side_db.py diff --git a/scripts/find_max_upload_size.py b/scripts/doc_put_memory_usage/find_max_upload_size.py index 8abeee78..02c68015 100755 --- a/scripts/find_max_upload_size.py +++ b/scripts/doc_put_memory_usage/find_max_upload_size.py @@ -15,18 +15,26 @@  import os  import configparser -import couchdb  import logging  import argparse  import random  import string  import binascii  import json +import time +import uuid + + +from couchdb.client import Database +from socket import error as socket_error +from leap.soledad.common.couch import CouchDatabase  SOLEDAD_CONFIG_FILE = '/etc/leap/soledad-server.conf'  PREFIX = '/tmp/soledad_test'  LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' +RETRIES = 3  # number of times to retry uploading a document of a certain +             # size after a failure  # configure logger @@ -55,27 +63,19 @@ def get_couch_url(config_file=SOLEDAD_CONFIG_FILE):  # generate or load an uploadable doc with the given size in mb -def gen_body(size): -    if os.path.exists( -            os.path.join(PREFIX, 'body-%d.json' % size)): -        logger.debug('Loading body with %d MB...' % size) -        with open(os.path.join(PREFIX, 'body-%d.json' % size), 'r') as f: -            return json.loads(f.read()) +def get_content(size): +    fname = os.path.join(PREFIX, 'content-%d.json' % size) +    if os.path.exists(fname): +        logger.debug('Loading content with %d MB...' % size) +        with open(fname, 'r') as f: +            return f.read()      else:          length = int(size * 1024 ** 2) -        hexdata = binascii.hexlify(os.urandom(length))[:length] -        body = { -            'couch_rev': None, -            'u1db_rev': '1', -            'content': hexdata, -            'trans_id': '1', -            'conflicts': None, -            'update_conflicts': False, -        }          logger.debug('Generating body with %d MB...' % size) -        with open(os.path.join(PREFIX, 'body-%d.json' % size), 'w+') as f: -            f.write(json.dumps(body)) -        return body +        content = binascii.hexlify(os.urandom(length))[:length] +        with open(fname, 'w') as f: +            f.write(content) +        return content  def delete_doc(db): @@ -83,57 +83,57 @@ def delete_doc(db):      db.delete(doc) -def upload(db, size): -    ddoc_path = ['_design', 'docs', '_update', 'put', 'largedoc'] -    resource = db.resource(*ddoc_path) -    body = gen_body(size) -    try: -        logger.debug('Uploading %d MB body...' % size) -        response = resource.put_json( -            body=body, -            headers={'content-type': 'application/json'}) -        # the document might have been updated in between, so we check for -        # the return message -        msg = response[2].read() -        if msg == 'ok': -            delete_doc(db) +def upload(db, size, couch_db): +    # try many times to be sure that size is infeasible +    for i in range(RETRIES): +        # wait until server is up to upload +        while True: +            try: +                'largedoc' in couch_db +                break +            except socket_error: +                logger.debug('Waiting for server to come up...') +                time.sleep(1) +        # attempt to upload +        try: +            logger.debug( +                'Trying to upload %d MB document (attempt %d/%d)...' % +                (size, (i+1), RETRIES)) +            content = get_content(size) +            logger.debug('Starting upload of %d bytes.' % len(content)) +            doc = db.create_doc({'data': content}, doc_id='largedoc') +            delete_doc(couch_db)              logger.debug('Success uploading %d MB doc.' % size)              return True -        else: -            # should not happen -            logger.error('Unexpected error uploading %d MB doc: %s' % (size, msg)) -            return False -    except Exception as e: -        logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) -        return False - - -def find_max_upload_size(dbname): -    couch_url = get_couch_url() -    db_url = '%s/%s' % (couch_url, dbname) -    logger.debug('Couch URL: %s' % db_url) -    # get a 'raw' couch handler -    server = couchdb.client.Server(couch_url) -    db = server[dbname] +        except Exception as e: +            logger.debug('Failed to upload %d MB doc: %s' % (size, str(e))) +    return False + + +def find_max_upload_size(db_uri): +    db = CouchDatabase.open_database(db_uri, False) +    couch_db = Database(db_uri) +    logger.debug('Database URI: %s' % db_uri)      # delete eventual leftover from last run -    largedoc = db.get('largedoc') -    if largedoc is not None: -        db.delete(largedoc) +    if 'largedoc' in couch_db: +        delete_doc(couch_db)      # phase 1: increase upload size exponentially      logger.info('Starting phase 1: increasing size exponentially.')      size = 1 +    #import ipdb; ipdb.set_trace()      while True: -        if upload(db, size): +        if upload(db, size, couch_db):              size *= 2          else:              break +      # phase 2: binary search for maximum value      unable = size      able = size / 2      logger.info('Starting phase 2: binary search for maximum value.')      while unable - able > 1:          size = able + ((unable - able) / 2) -        if upload(db, size): +        if upload(db, size, couch_db):              able = size          else:              unable = size @@ -150,12 +150,12 @@ if __name__ == '__main__':          '-l', dest='logfile',          help='log output to file')      parser.add_argument( -        'dbname', help='the name of the database to test in') +        'db_uri', help='the couch database URI to test')      args = parser.parse_args()      # log to file      if args.logfile is not None: -        add_file_handler(args.logfile) +        log_to_file(args.logfile)      # set loglevel      if args.debug is True: @@ -164,6 +164,6 @@ if __name__ == '__main__':          config_log(logging.INFO)      # run test and report -    logger.info('Will test using db %s.' % args.dbname) -    maxsize = find_max_upload_size(args.dbname) +    logger.info('Will test using db at %s.' % args.db_uri) +    maxsize = find_max_upload_size(args.db_uri)      logger.info('Max upload size is %d MB.' % maxsize) diff --git a/scripts/doc_put_memory_usage/get-mem.py b/scripts/doc_put_memory_usage/get-mem.py new file mode 100755 index 00000000..d64875fc --- /dev/null +++ b/scripts/doc_put_memory_usage/get-mem.py @@ -0,0 +1,16 @@ +#!/usr/bin/python + + +import psutil +import time + + +delta = 50 * 60 +start = time.time() + +while True: +    now = time.time() +    print "%s %s" % (now - start, psutil.phymem_usage().used) +    time.sleep(0.1) +    if now > start + delta: +        break diff --git a/scripts/doc_put_memory_usage/plot-mem.py b/scripts/doc_put_memory_usage/plot-mem.py new file mode 100755 index 00000000..e24679a2 --- /dev/null +++ b/scripts/doc_put_memory_usage/plot-mem.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + + +from matplotlib import pyplot as plt + + +files = [ +    ('local', 'couchdb-json', 'b'), +    ('local', 'bigcouch-json', 'r'), +    ('local', 'couchdb-multipart', 'g'), +    ('local', 'bigcouch-multipart', 'm'), +] + + +# config the plot +plt.xlabel('time') +plt.ylabel('memory usage') +plt.title('bigcouch versus couch memory usage') + + +for fi in files: + +    machine = fi[0] +    database = fi[1] +    color = fi[2] +    filename = '%s-%s.txt' % (machine, database) + +    x = [] +    y = [] + +    xmax = None +    xmin = None +    ymax = None +    ymin = None + +    # read data from file +    with open(filename, 'r') as f: +        line = f.readline() +        while line is not None: +            time, mem = tuple(line.strip().split(' ')) +            mem = float(mem) / (10**9) +            x.append(float(time)) +            y.append(mem) +            if ymax == None or mem > ymax: +                ymax = mem +                xmax = time +            if ymin == None or mem < ymin: +                ymin = mem +                xmin = time +            line = f.readline() +            if line == '': +                break + +    kwargs = { +        'linewidth': 1.0, +        'linestyle': '-', +    #    'marker': '.', +        'color': color, +    } +    plt.plot(x, y, label=database, **kwargs) + +    #plt.axes().get_xaxis().set_ticks(x) +    #plt.axes().get_xaxis().set_ticklabels(x) + +    # annotate max and min values +    #plt.axes().annotate("%.2f GB" % ymax, xy=(xmax, ymax)) +    #plt.axes().annotate("%.2f GB" % ymin, xy=(xmin, ymin)) + + +plt.grid() +plt.legend() +plt.show() + | 
