diff options
Diffstat (limited to 'blob-multiprocess')
17 files changed, 0 insertions, 635 deletions
diff --git a/blob-multiprocess/README.txt b/blob-multiprocess/README.txt deleted file mode 100644 index 31caa29..0000000 --- a/blob-multiprocess/README.txt +++ /dev/null @@ -1,92 +0,0 @@ -Blobs Server Multiprocessing Improvement Assessment -=================================================== - -The code in this directory has the purpose of showing whether multiprocessing -improves the Soledad Blobs Server or not. - -It uses a haproxy instance listening on local port 8000 to route between -different processes listening on ports 8001-8008. The number of processes -depends on the testing scenario. - -Then, it runs a set of tests using 1, 2, 4 and 8 blobs server processes and -stores the results in a text file. Those results can then be graphed and -compared visually. - - -Dependencies ------------- - -To run the testing script, you will need to have: - -- A python virtualenv with Soledad Server installed on it. -- A working haproxy installation. -- curl - -To generate the graph, you will need: - -- numpy -- matplotlib/pyplot - - -Description of files --------------------- - -. -├── blobs-in-parallel.png - A graph generated from ./results.txt -├── blobs-server.py       - A simple blobs server. -├── graph.py              - The graphing script. -├── haproxy               - A directory with haproxy config. -├── makefile              - Rules for starting up pieces of the test. -├── multiproc.py          - A service that spawns multiple blobs servers. -├── README                - This file. -├── request.py            - A stressing blobs client. -├── results               - A directory with some results stored. -├── results.txt           - The file that ./graph.py reads from. -└── run-test.sh           - The script to run tests and produce output. - - -Actions -------- - -The following set of actions are tested in each scenario, and compose the -X axis of the final graph: - -  - baseline: a simple GET / to the root of the webserver, returning an empty -    page. Nothing can be faster than that. - -  - list: a simple GET /blobs/some-user which lists the current blobs. In the -    present case, the result will always be an empty set of blobs. - -  - put: a PUT /blobs/some-user/some-blob-id. The size of the blob depends on -    the scenarios configured in ./run-test.sh. - -  - get: a PUT /blobs/some-user/some-blob-id followed by a GET to the same -    blob. - -  - flag: a PUT /blobs/some-user/some-blob-id followed by a POST to the same -    blob setting one flag. - -  - delete: a PUT /blobs/some-user/some-blob-id followed by a DELETE of the -    same blob. - -When testing, each action is run 5 times (so the numbers in ./results.txt are -the accumulated time of the 5 runs) and the mean of the time taken is used when -graphing. - -Usage ------ - -Configure the scenarios you want tests to be run by editing ./run-test.sh, and -then run the script. It will: - -  - iterate through desired number of processes and startup haproxy and -    multiproc blobs server for each scenario. - -  - iterate through all the actions tested (baseline, put, put+get, etc). - -  - iterate through amount x size blobs scenarios. - -  - delete the contents of /tmp/blobs/* and run tests for each scenario. - -Check the makefile for rules to help debugging (i.e. `make server` and `make -get`). diff --git a/blob-multiprocess/blobs-server.py b/blob-multiprocess/blobs-server.py deleted file mode 100644 index 74d5e62..0000000 --- a/blob-multiprocess/blobs-server.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python - -import errno -import os - -from argparse import ArgumentParser -from sys import stdout - -from twisted.internet import reactor -from twisted.python import log -from twisted.web.resource import Resource -from twisted.web.server import Site - -from leap.soledad.server._blobs import BlobsResource - - -def mkdir_p(path): -    try: -        os.makedirs(path) -    except OSError as exc:  # Python >2.5 -        if exc.errno == errno.EEXIST and os.path.isdir(path): -            pass -        else: -            raise - - -def parse_args(): -    parser = ArgumentParser() -    parser.add_argument('dir', type=str, -                        help="The directory to store blobs.") -    parser.add_argument('port', type=int, -                        help="The port in which to listen.") -    args = parser.parse_args() -    return args - - -class DummyResource(Resource): - -    def render_GET(self, request): -        return '' - - -def start_server(dir, port): -    resource = Resource() -    resource.putChild("", DummyResource()) -    resource.putChild("blobs", BlobsResource("filesystem", dir)) -    site = Site(resource) -    reactor.listenTCP(port, site) - - -def main(dir, port): -    mkdir_p(dir) -    log.startLogging(stdout) -    start_server(dir, port) -    reactor.run() - - -if __name__ == "__main__": -    args = parse_args() -    main(args.dir, args.port) diff --git a/blob-multiprocess/graph.py b/blob-multiprocess/graph.py deleted file mode 100755 index f1f832e..0000000 --- a/blob-multiprocess/graph.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python - -import numpy as np -import matplotlib.pyplot as plt - -from mpltools import style - -style.use('ggplot') - -graphs = [ -    'baseline', -    'list', -    'put', -    'get', -    'flag', -    'delete', -] - -labels = [ -    'baseline', -    'list', -    'put', -    'put+get', -    'put+flag', -    'put+delete', -] - - -def get_data(): -    data = {} -    with open('results.txt') as f: -        for line in f.readlines(): -            procs, action, amount, size, mean = line.split() -            if int(amount) != 1000: -                continue -            if procs not in data: -                data[procs] = {} -            data[procs][action] = float(mean) -    return data - - -def plot_data(data): - -    N = 6 - -    ind = np.arange(N)  # the x locations for the groups -    width = 0.20       # the width of the bars - -    fig, ax = plt.subplots() -    vals = [1000. / data['1'][action] for action in graphs] -    rects1 = ax.bar(ind, vals, width) - -    vals = [1000. / data['2'][action] for action in graphs] -    rects2 = ax.bar(ind + width, vals, width) - -    vals = [1000. / data['4'][action] for action in graphs] -    rects3 = ax.bar(ind + (2 * width), vals, width) - -    vals = [1000. / data['8'][action] for action in graphs] -    rects4 = ax.bar(ind + (3 * width), vals, width) - -    # add some text for labels, title and axes ticks -    ax.set_ylabel('Requests per second') -    ax.set_title('How multiprocessing affects Blobs Server') -    ax.set_xticks(ind + width) -    ax.set_xticklabels(tuple(labels)) - -    ax.legend( -        (rects1[0], rects2[0], rects3[0], rects4[0]), -        ('1 process', '2 processes', '4 processes', '8 processes')) -    ax.grid() - -    plt.savefig('blobs-in-parallel.png') -    plt.show() - - -if __name__ == '__main__': -    data = get_data() -    plot_data(data) diff --git a/blob-multiprocess/haproxy/roundrobin-1.cfg b/blob-multiprocess/haproxy/roundrobin-1.cfg deleted file mode 100644 index 47ebf8a..0000000 --- a/blob-multiprocess/haproxy/roundrobin-1.cfg +++ /dev/null @@ -1,10 +0,0 @@ -backend multiprocessing-blobs -  balance roundrobin -  server processor1 127.0.0.1:8001 -  timeout connect 5000 -  timeout server  50000 - -frontend blobs-server -  bind *:8000 -  default_backend multiprocessing-blobs -  timeout client  50000 diff --git a/blob-multiprocess/haproxy/roundrobin-2.cfg b/blob-multiprocess/haproxy/roundrobin-2.cfg deleted file mode 100644 index 98b2e2a..0000000 --- a/blob-multiprocess/haproxy/roundrobin-2.cfg +++ /dev/null @@ -1,11 +0,0 @@ -backend multiprocessing-blobs -  balance roundrobin -  server processor1 127.0.0.1:8001 -  server processor2 127.0.0.1:8002 -  timeout connect 5000 -  timeout server  50000 - -frontend blobs-server -  bind *:8000 -  default_backend multiprocessing-blobs -  timeout client  50000 diff --git a/blob-multiprocess/haproxy/roundrobin-3.cfg b/blob-multiprocess/haproxy/roundrobin-3.cfg deleted file mode 100644 index f964e5f..0000000 --- a/blob-multiprocess/haproxy/roundrobin-3.cfg +++ /dev/null @@ -1,12 +0,0 @@ -backend multiprocessing-blobs -  balance roundrobin -  server processor1 127.0.0.1:8001 -  server processor2 127.0.0.1:8002 -  server processor3 127.0.0.1:8003 -  timeout connect 5000 -  timeout server  50000 - -frontend blobs-server -  bind *:8000 -  default_backend multiprocessing-blobs -  timeout client  50000 diff --git a/blob-multiprocess/haproxy/roundrobin-4.cfg b/blob-multiprocess/haproxy/roundrobin-4.cfg deleted file mode 100644 index c1b6c1e..0000000 --- a/blob-multiprocess/haproxy/roundrobin-4.cfg +++ /dev/null @@ -1,13 +0,0 @@ -backend multiprocessing-blobs -  balance roundrobin -  server processor1 127.0.0.1:8001 -  server processor2 127.0.0.1:8002 -  server processor3 127.0.0.1:8003 -  server processor4 127.0.0.1:8004 -  timeout connect 5000 -  timeout server  50000 - -frontend blobs-server -  bind *:8000 -  default_backend multiprocessing-blobs -  timeout client  50000 diff --git a/blob-multiprocess/haproxy/roundrobin-8.cfg b/blob-multiprocess/haproxy/roundrobin-8.cfg deleted file mode 100644 index 390087c..0000000 --- a/blob-multiprocess/haproxy/roundrobin-8.cfg +++ /dev/null @@ -1,17 +0,0 @@ -backend multiprocessing-blobs -  balance roundrobin -  server processor1 127.0.0.1:8001 -  server processor2 127.0.0.1:8002 -  server processor3 127.0.0.1:8003 -  server processor4 127.0.0.1:8004 -  server processor5 127.0.0.1:8005 -  server processor6 127.0.0.1:8006 -  server processor7 127.0.0.1:8007 -  server processor8 127.0.0.1:8008 -  timeout connect 5000 -  timeout server  50000 - -frontend blobs-server -  bind *:8000 -  default_backend multiprocessing-blobs -  timeout client  50000 diff --git a/blob-multiprocess/makefile b/blob-multiprocess/makefile deleted file mode 100644 index 4a8cbaa..0000000 --- a/blob-multiprocess/makefile +++ /dev/null @@ -1,44 +0,0 @@ -DIR    = /tmp/blobs -PORT   = 8000 -URI    = http://127.0.0.1:8000/blobs/user -UUID   = $(shell uuidgen) -PROCS ?= 4 - -all: multiproc - -server: killall -	python blobs-server.py $(DIR) $(PORT) - -multiproc: -	python multiproc.py --procs $(PROCS) - -roundrobin: killall -	/usr/sbin/haproxy -D -f haproxy/roundrobin-$(PROCS).cfg - -killall: -	-killall -9 haproxy - -data: -	dd if=/dev/urandom of=/tmp/data bs=1024 count=100 - -list: -	curl -X GET $(URI)/ - -put: -	curl -X PUT $(URI)/$(UUID) --data-binary @/tmp/data - -get: -	UUID=$(UUID); \ -	curl -X PUT $(URI)/$${UUID} --data-binary @/tmp/data; \ -	curl -X GET $(URI)/$${UUID} > /dev/null - -delete: -	UUID=$(UUID); \ -	curl -X PUT $(URI)/$${UUID} --data-binary @/tmp/data; \ -	curl -X DELETE $(URI)/$${UUID} - -put-ab: -	ab -c 10 -n 1000 -T /tmp/data $(URI)/$(UUID) - - -.PHONY: server multiproc roundrobin killall diff --git a/blob-multiprocess/multiproc.py b/blob-multiprocess/multiproc.py deleted file mode 100644 index c367e04..0000000 --- a/blob-multiprocess/multiproc.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python - -from argparse import ArgumentParser -from twisted.internet import reactor -from twisted.internet.protocol import ProcessProtocol -from twisted.python.failure import Failure - - -class BlobsServerProtocol(ProcessProtocol): - -    def outReceived(self, data): -        if not isinstance(data, Failure): -            data = data.strip() -        if data: -            print(data) - -    def errorReceived(self, data): -        if not isinstance(data, Failure): -            data = data.strip() -        if data: -            print(data) - -    def processEnded(self, data): -        if not isinstance(data, Failure): -            data = data.strip() -        if data: -            print(data) - -    # def processExited(self, data): -    #     print(data) - - -def parse_args(): -    parser = ArgumentParser() -    parser.add_argument('--procs', type=int, default=4, -                        help="the number of processes to spawn") -    args = parser.parse_args() -    return args - - -def spawn_servers(procs): -    protocol = BlobsServerProtocol() -    children = [] -    python = '/home/drebs/.virtualenvs/apps/bin/python' -    for port in range(8001, 8001 + procs): -        args = [python, './blobs-server.py', '/tmp/blobs', str(port)] -        child = reactor.spawnProcess(protocol, python, args) -        children.append(child) - - -def main(): -    args = parse_args() -    spawn_servers(args.procs) -    reactor.run() - - -if __name__ == "__main__": -    main() diff --git a/blob-multiprocess/request.py b/blob-multiprocess/request.py deleted file mode 100755 index b73fb79..0000000 --- a/blob-multiprocess/request.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python - -from io import BytesIO -import time -import treq -from argparse import ArgumentParser -from twisted.internet import reactor, task, defer -from twisted.web.client import readBody, HTTPConnectionPool -from urlparse import urljoin -from uuid import uuid4 - - -BASE_URI = 'http://127.0.0.1:8000/' -BLOBS_URI = urljoin(BASE_URI, -                    'blobs/{}/'.format(time.strftime('%Y-%m-%d_%H-%M-%s'))) -CONCURRENT = 10 - -pool = HTTPConnectionPool(reactor) - - -def parse_args(): -    parser = ArgumentParser() -    parser.add_argument('amount', type=int, help="the amount of blobs") -    parser.add_argument('size', type=int, help="size in blocks of 1024 bytes") -    parser.add_argument('--put', action='store_true', -                        help="noop") -    parser.add_argument('--baseline', action='store_true', -                        help="GET /") -    parser.add_argument('--list', action='store_true', -                        help="GET /blobs/") -    parser.add_argument('--get', action='store_true', -                        help="PUT + GET /blobs/someuser/someid") -    parser.add_argument('--flag', action='store_true', -                        help="PUT + POST /blobs/someuser/someid") -    parser.add_argument('--delete', action='store_true', -                        help="PUT + DELETE /blobs/someuser/someid") -    args = parser.parse_args() -    return args - - -def _finished(_, amount, size): -    print("Finished putting {} blobs of size {}K.".format(amount, size)) -    reactor.stop() - - -def _error(failure): -    print("Failed: %r" % failure) -    reactor.stop() - - -def main(generator): -    cooperator = task.Cooperator() -    cooptask = cooperator.cooperate(generator) -    d = cooptask.whenDone() -    return d - - -def requests_generator(args): -    data = "a" * args.size * 1024 - -    def _get(_, uri): -        d = treq.get(uri, pool=pool) -        d.addCallback(lambda response: readBody(response)) -        return d - -    def _flag(_, uri): -        flags = BytesIO('["PROCESSING"]') -        d = treq.post(uri, data=flags, pool=pool) -        return d - -    def _delete(_, uri): -        d = treq.delete(uri, pool=pool) -        return d - -    deferreds = [] -    for i in xrange(args.amount): -        if args.baseline: -            d = treq.get(BASE_URI, pool=pool) - -        elif args.list: -            d = treq.get(BLOBS_URI, pool=pool) - -        else: -            uri = urljoin(BLOBS_URI, uuid4().hex) -            d = treq.put(uri, data=data, pool=pool) -            if args.get: -                d.addCallback(_get, uri) -            if args.flag: -                d.addCallback(_flag, uri) -            if args.delete: -                d.addCallback(_delete, uri) - -        deferreds.append(d) -        yield None - -    yield defer.gatherResults(deferreds) - - -if __name__ == "__main__": -    args = parse_args() -    generator = requests_generator(args) -    d = main(generator) -    d.addCallback(_finished, args.amount, args.size) -    d.addErrback(_error) -    reactor.run() diff --git a/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png b/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png Binary files differdeleted file mode 100644 index db3fd96..0000000 --- a/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png +++ /dev/null diff --git a/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt b/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt deleted file mode 100644 index 80d16ae..0000000 --- a/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt +++ /dev/null @@ -1,24 +0,0 @@ -1 baseline 1000 10 1.70170302391 -1 list 1000 10 1.86631264687 -1 put 1000 10 2.04504818916 -1 get 1000 10 5.2823679924 -1 flag 1000 10 3.88802418709 -1 delete 1000 10 3.07739658356 -2 baseline 1000 10 1.69806017876 -2 list 1000 10 3.11238617897 -2 put 1000 10 2.83830742836 -2 get 1000 10 3.40557880402 -2 flag 1000 10 3.2692761898 -2 delete 1000 10 3.34825959206 -4 baseline 1000 10 2.49910435677 -4 list 1000 10 2.48401441574 -4 put 1000 10 2.3237077713 -4 get 1000 10 3.88459482193 -4 flag 1000 10 3.63829479218 -4 delete 1000 10 3.17096538544 -8 baseline 1000 10 2.12282576561 -8 list 1000 10 2.49229278564 -8 put 1000 10 2.69512839317 -8 get 1000 10 3.47697739601 -8 flag 1000 10 3.35881881714 -8 delete 1000 10 3.29797801971 diff --git a/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png b/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png Binary files differdeleted file mode 100644 index 139b9cc..0000000 --- a/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png +++ /dev/null diff --git a/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt b/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt deleted file mode 100644 index 29dc773..0000000 --- a/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt +++ /dev/null @@ -1,24 +0,0 @@ -1 baseline 1000 10 1.65788397789 -1 list 1000 10 1.76658239365 -1 put 1000 10 2.16625041962 -1 get 1000 10 4.93044900894 -1 flag 1000 10 3.89305019379 -1 delete 1000 10 2.97089977264 -2 baseline 1000 10 1.60334076881 -2 list 1000 10 2.71356620789 -2 put 1000 10 3.23587818146 -2 get 1000 10 3.38332500458 -2 flag 1000 10 3.25708303452 -2 delete 1000 10 3.03945021629 -4 baseline 1000 10 2.60279417038 -4 list 1000 10 2.11859984398 -4 put 1000 10 2.2915845871 -4 get 1000 10 3.48964958191 -4 flag 1000 10 3.69795999527 -4 delete 1000 10 3.31933698654 -8 baseline 1000 10 2.12685017586 -8 list 1000 10 2.35639958382 -8 put 1000 10 2.58642120361 -8 get 1000 10 3.74429321289 -8 flag 1000 10 3.53779459 -8 delete 1000 10 3.292395401 diff --git a/blob-multiprocess/results/blobs-in-parallel-1.png b/blob-multiprocess/results/blobs-in-parallel-1.png Binary files differdeleted file mode 100644 index c23a635..0000000 --- a/blob-multiprocess/results/blobs-in-parallel-1.png +++ /dev/null diff --git a/blob-multiprocess/run-test.sh b/blob-multiprocess/run-test.sh deleted file mode 100755 index eab6aaa..0000000 --- a/blob-multiprocess/run-test.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash - -# Run Multiprocessing Test -# ======================== -# - -# This script measures the time of several interactions with the Blobs Server -# and outputs them to a text file. -# -# The different test scenarios are: -#   - 1, 2, 4, and 8 server processes. -#   - several client actions (baseline, list, put, get, flag, delete) -#   -  -# -# Client actions -# -------------- - -# Baseline: is a GET / to a dummy server that returns an empty reply. Nothing -# can be faster than this. - -# List: is a GET /blobs/username, which lists the (empty) set of blobs stored -# in the server. - -set -e - - -kill_multiproc() { -  pids=$(ps aux | grep python | grep "\(multiproc\|blobs-server\)" \ -         | grep -v grep | sed -e "s/\s\+/ /g" | cut -d' ' -f 2) -  if [ ! -z "${pids}" ]; then -    for pid in ${pids}; do -      kill -9 ${pid} -    done -  fi -} - - -start_multiproc() { -  procs=${1} -  kill_multiproc -  make multiproc PROCS=${procs} > /dev/null & -  sleep 3 -  make roundrobin PROCS=${procs} -  sleep 1 -} - - -get_best() { -  statement=$* -  result=$(python -m timeit -n 1 -r 5 -s "import os" "os.system('${statement}')") -  best=$(echo $result | sed -e s/.\*best\ of\ 5:\ // -e s/per\ loop//) -  echo $best -} - - -get_mean() { -  statement=$* -  python -c "import timeit; t = timeit.timeit('import os; os.system(\'./${statement} > /dev/null\');', number=5); print t / 5" -} - - -request() { -  action=${1} -  procs=${2} -  amount=${3} -  size=${4} -  best=$(get_mean ./request.py --${action} ${amount} ${size}) -  echo "${procs} ${action} ${amount} ${size} ${best}" -} - - -run_test() { -  for procs in 1 2 4 8; do -    start_multiproc ${procs} -    for action in baseline list put get flag delete; do -      #for amountsize in "10 1000" "100 100" "1000 10"; do -      for amountsize in "1000 10"; do -        rm -rf /tmp/blobs/* -        request ${action} ${procs} ${amountsize} >> results.txt -      done -    done -    kill_multiproc -  done -} - -run_test  | 
