diff options
Diffstat (limited to 'blob-multiprocess')
17 files changed, 0 insertions, 635 deletions
diff --git a/blob-multiprocess/README.txt b/blob-multiprocess/README.txt deleted file mode 100644 index 31caa29..0000000 --- a/blob-multiprocess/README.txt +++ /dev/null @@ -1,92 +0,0 @@ -Blobs Server Multiprocessing Improvement Assessment -=================================================== - -The code in this directory has the purpose of showing whether multiprocessing -improves the Soledad Blobs Server or not. - -It uses a haproxy instance listening on local port 8000 to route between -different processes listening on ports 8001-8008. The number of processes -depends on the testing scenario. - -Then, it runs a set of tests using 1, 2, 4 and 8 blobs server processes and -stores the results in a text file. Those results can then be graphed and -compared visually. - - -Dependencies ------------- - -To run the testing script, you will need to have: - -- A python virtualenv with Soledad Server installed on it. -- A working haproxy installation. -- curl - -To generate the graph, you will need: - -- numpy -- matplotlib/pyplot - - -Description of files --------------------- - -. -├── blobs-in-parallel.png - A graph generated from ./results.txt -├── blobs-server.py - A simple blobs server. -├── graph.py - The graphing script. -├── haproxy - A directory with haproxy config. -├── makefile - Rules for starting up pieces of the test. -├── multiproc.py - A service that spawns multiple blobs servers. -├── README - This file. -├── request.py - A stressing blobs client. -├── results - A directory with some results stored. -├── results.txt - The file that ./graph.py reads from. -└── run-test.sh - The script to run tests and produce output. - - -Actions -------- - -The following set of actions are tested in each scenario, and compose the -X axis of the final graph: - - - baseline: a simple GET / to the root of the webserver, returning an empty - page. Nothing can be faster than that. - - - list: a simple GET /blobs/some-user which lists the current blobs. In the - present case, the result will always be an empty set of blobs. - - - put: a PUT /blobs/some-user/some-blob-id. The size of the blob depends on - the scenarios configured in ./run-test.sh. - - - get: a PUT /blobs/some-user/some-blob-id followed by a GET to the same - blob. - - - flag: a PUT /blobs/some-user/some-blob-id followed by a POST to the same - blob setting one flag. - - - delete: a PUT /blobs/some-user/some-blob-id followed by a DELETE of the - same blob. - -When testing, each action is run 5 times (so the numbers in ./results.txt are -the accumulated time of the 5 runs) and the mean of the time taken is used when -graphing. - -Usage ------ - -Configure the scenarios you want tests to be run by editing ./run-test.sh, and -then run the script. It will: - - - iterate through desired number of processes and startup haproxy and - multiproc blobs server for each scenario. - - - iterate through all the actions tested (baseline, put, put+get, etc). - - - iterate through amount x size blobs scenarios. - - - delete the contents of /tmp/blobs/* and run tests for each scenario. - -Check the makefile for rules to help debugging (i.e. `make server` and `make -get`). diff --git a/blob-multiprocess/blobs-server.py b/blob-multiprocess/blobs-server.py deleted file mode 100644 index 74d5e62..0000000 --- a/blob-multiprocess/blobs-server.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python - -import errno -import os - -from argparse import ArgumentParser -from sys import stdout - -from twisted.internet import reactor -from twisted.python import log -from twisted.web.resource import Resource -from twisted.web.server import Site - -from leap.soledad.server._blobs import BlobsResource - - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - -def parse_args(): - parser = ArgumentParser() - parser.add_argument('dir', type=str, - help="The directory to store blobs.") - parser.add_argument('port', type=int, - help="The port in which to listen.") - args = parser.parse_args() - return args - - -class DummyResource(Resource): - - def render_GET(self, request): - return '' - - -def start_server(dir, port): - resource = Resource() - resource.putChild("", DummyResource()) - resource.putChild("blobs", BlobsResource("filesystem", dir)) - site = Site(resource) - reactor.listenTCP(port, site) - - -def main(dir, port): - mkdir_p(dir) - log.startLogging(stdout) - start_server(dir, port) - reactor.run() - - -if __name__ == "__main__": - args = parse_args() - main(args.dir, args.port) diff --git a/blob-multiprocess/graph.py b/blob-multiprocess/graph.py deleted file mode 100755 index f1f832e..0000000 --- a/blob-multiprocess/graph.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python - -import numpy as np -import matplotlib.pyplot as plt - -from mpltools import style - -style.use('ggplot') - -graphs = [ - 'baseline', - 'list', - 'put', - 'get', - 'flag', - 'delete', -] - -labels = [ - 'baseline', - 'list', - 'put', - 'put+get', - 'put+flag', - 'put+delete', -] - - -def get_data(): - data = {} - with open('results.txt') as f: - for line in f.readlines(): - procs, action, amount, size, mean = line.split() - if int(amount) != 1000: - continue - if procs not in data: - data[procs] = {} - data[procs][action] = float(mean) - return data - - -def plot_data(data): - - N = 6 - - ind = np.arange(N) # the x locations for the groups - width = 0.20 # the width of the bars - - fig, ax = plt.subplots() - vals = [1000. / data['1'][action] for action in graphs] - rects1 = ax.bar(ind, vals, width) - - vals = [1000. / data['2'][action] for action in graphs] - rects2 = ax.bar(ind + width, vals, width) - - vals = [1000. / data['4'][action] for action in graphs] - rects3 = ax.bar(ind + (2 * width), vals, width) - - vals = [1000. / data['8'][action] for action in graphs] - rects4 = ax.bar(ind + (3 * width), vals, width) - - # add some text for labels, title and axes ticks - ax.set_ylabel('Requests per second') - ax.set_title('How multiprocessing affects Blobs Server') - ax.set_xticks(ind + width) - ax.set_xticklabels(tuple(labels)) - - ax.legend( - (rects1[0], rects2[0], rects3[0], rects4[0]), - ('1 process', '2 processes', '4 processes', '8 processes')) - ax.grid() - - plt.savefig('blobs-in-parallel.png') - plt.show() - - -if __name__ == '__main__': - data = get_data() - plot_data(data) diff --git a/blob-multiprocess/haproxy/roundrobin-1.cfg b/blob-multiprocess/haproxy/roundrobin-1.cfg deleted file mode 100644 index 47ebf8a..0000000 --- a/blob-multiprocess/haproxy/roundrobin-1.cfg +++ /dev/null @@ -1,10 +0,0 @@ -backend multiprocessing-blobs - balance roundrobin - server processor1 127.0.0.1:8001 - timeout connect 5000 - timeout server 50000 - -frontend blobs-server - bind *:8000 - default_backend multiprocessing-blobs - timeout client 50000 diff --git a/blob-multiprocess/haproxy/roundrobin-2.cfg b/blob-multiprocess/haproxy/roundrobin-2.cfg deleted file mode 100644 index 98b2e2a..0000000 --- a/blob-multiprocess/haproxy/roundrobin-2.cfg +++ /dev/null @@ -1,11 +0,0 @@ -backend multiprocessing-blobs - balance roundrobin - server processor1 127.0.0.1:8001 - server processor2 127.0.0.1:8002 - timeout connect 5000 - timeout server 50000 - -frontend blobs-server - bind *:8000 - default_backend multiprocessing-blobs - timeout client 50000 diff --git a/blob-multiprocess/haproxy/roundrobin-3.cfg b/blob-multiprocess/haproxy/roundrobin-3.cfg deleted file mode 100644 index f964e5f..0000000 --- a/blob-multiprocess/haproxy/roundrobin-3.cfg +++ /dev/null @@ -1,12 +0,0 @@ -backend multiprocessing-blobs - balance roundrobin - server processor1 127.0.0.1:8001 - server processor2 127.0.0.1:8002 - server processor3 127.0.0.1:8003 - timeout connect 5000 - timeout server 50000 - -frontend blobs-server - bind *:8000 - default_backend multiprocessing-blobs - timeout client 50000 diff --git a/blob-multiprocess/haproxy/roundrobin-4.cfg b/blob-multiprocess/haproxy/roundrobin-4.cfg deleted file mode 100644 index c1b6c1e..0000000 --- a/blob-multiprocess/haproxy/roundrobin-4.cfg +++ /dev/null @@ -1,13 +0,0 @@ -backend multiprocessing-blobs - balance roundrobin - server processor1 127.0.0.1:8001 - server processor2 127.0.0.1:8002 - server processor3 127.0.0.1:8003 - server processor4 127.0.0.1:8004 - timeout connect 5000 - timeout server 50000 - -frontend blobs-server - bind *:8000 - default_backend multiprocessing-blobs - timeout client 50000 diff --git a/blob-multiprocess/haproxy/roundrobin-8.cfg b/blob-multiprocess/haproxy/roundrobin-8.cfg deleted file mode 100644 index 390087c..0000000 --- a/blob-multiprocess/haproxy/roundrobin-8.cfg +++ /dev/null @@ -1,17 +0,0 @@ -backend multiprocessing-blobs - balance roundrobin - server processor1 127.0.0.1:8001 - server processor2 127.0.0.1:8002 - server processor3 127.0.0.1:8003 - server processor4 127.0.0.1:8004 - server processor5 127.0.0.1:8005 - server processor6 127.0.0.1:8006 - server processor7 127.0.0.1:8007 - server processor8 127.0.0.1:8008 - timeout connect 5000 - timeout server 50000 - -frontend blobs-server - bind *:8000 - default_backend multiprocessing-blobs - timeout client 50000 diff --git a/blob-multiprocess/makefile b/blob-multiprocess/makefile deleted file mode 100644 index 4a8cbaa..0000000 --- a/blob-multiprocess/makefile +++ /dev/null @@ -1,44 +0,0 @@ -DIR = /tmp/blobs -PORT = 8000 -URI = http://127.0.0.1:8000/blobs/user -UUID = $(shell uuidgen) -PROCS ?= 4 - -all: multiproc - -server: killall - python blobs-server.py $(DIR) $(PORT) - -multiproc: - python multiproc.py --procs $(PROCS) - -roundrobin: killall - /usr/sbin/haproxy -D -f haproxy/roundrobin-$(PROCS).cfg - -killall: - -killall -9 haproxy - -data: - dd if=/dev/urandom of=/tmp/data bs=1024 count=100 - -list: - curl -X GET $(URI)/ - -put: - curl -X PUT $(URI)/$(UUID) --data-binary @/tmp/data - -get: - UUID=$(UUID); \ - curl -X PUT $(URI)/$${UUID} --data-binary @/tmp/data; \ - curl -X GET $(URI)/$${UUID} > /dev/null - -delete: - UUID=$(UUID); \ - curl -X PUT $(URI)/$${UUID} --data-binary @/tmp/data; \ - curl -X DELETE $(URI)/$${UUID} - -put-ab: - ab -c 10 -n 1000 -T /tmp/data $(URI)/$(UUID) - - -.PHONY: server multiproc roundrobin killall diff --git a/blob-multiprocess/multiproc.py b/blob-multiprocess/multiproc.py deleted file mode 100644 index c367e04..0000000 --- a/blob-multiprocess/multiproc.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python - -from argparse import ArgumentParser -from twisted.internet import reactor -from twisted.internet.protocol import ProcessProtocol -from twisted.python.failure import Failure - - -class BlobsServerProtocol(ProcessProtocol): - - def outReceived(self, data): - if not isinstance(data, Failure): - data = data.strip() - if data: - print(data) - - def errorReceived(self, data): - if not isinstance(data, Failure): - data = data.strip() - if data: - print(data) - - def processEnded(self, data): - if not isinstance(data, Failure): - data = data.strip() - if data: - print(data) - - # def processExited(self, data): - # print(data) - - -def parse_args(): - parser = ArgumentParser() - parser.add_argument('--procs', type=int, default=4, - help="the number of processes to spawn") - args = parser.parse_args() - return args - - -def spawn_servers(procs): - protocol = BlobsServerProtocol() - children = [] - python = '/home/drebs/.virtualenvs/apps/bin/python' - for port in range(8001, 8001 + procs): - args = [python, './blobs-server.py', '/tmp/blobs', str(port)] - child = reactor.spawnProcess(protocol, python, args) - children.append(child) - - -def main(): - args = parse_args() - spawn_servers(args.procs) - reactor.run() - - -if __name__ == "__main__": - main() diff --git a/blob-multiprocess/request.py b/blob-multiprocess/request.py deleted file mode 100755 index b73fb79..0000000 --- a/blob-multiprocess/request.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python - -from io import BytesIO -import time -import treq -from argparse import ArgumentParser -from twisted.internet import reactor, task, defer -from twisted.web.client import readBody, HTTPConnectionPool -from urlparse import urljoin -from uuid import uuid4 - - -BASE_URI = 'http://127.0.0.1:8000/' -BLOBS_URI = urljoin(BASE_URI, - 'blobs/{}/'.format(time.strftime('%Y-%m-%d_%H-%M-%s'))) -CONCURRENT = 10 - -pool = HTTPConnectionPool(reactor) - - -def parse_args(): - parser = ArgumentParser() - parser.add_argument('amount', type=int, help="the amount of blobs") - parser.add_argument('size', type=int, help="size in blocks of 1024 bytes") - parser.add_argument('--put', action='store_true', - help="noop") - parser.add_argument('--baseline', action='store_true', - help="GET /") - parser.add_argument('--list', action='store_true', - help="GET /blobs/") - parser.add_argument('--get', action='store_true', - help="PUT + GET /blobs/someuser/someid") - parser.add_argument('--flag', action='store_true', - help="PUT + POST /blobs/someuser/someid") - parser.add_argument('--delete', action='store_true', - help="PUT + DELETE /blobs/someuser/someid") - args = parser.parse_args() - return args - - -def _finished(_, amount, size): - print("Finished putting {} blobs of size {}K.".format(amount, size)) - reactor.stop() - - -def _error(failure): - print("Failed: %r" % failure) - reactor.stop() - - -def main(generator): - cooperator = task.Cooperator() - cooptask = cooperator.cooperate(generator) - d = cooptask.whenDone() - return d - - -def requests_generator(args): - data = "a" * args.size * 1024 - - def _get(_, uri): - d = treq.get(uri, pool=pool) - d.addCallback(lambda response: readBody(response)) - return d - - def _flag(_, uri): - flags = BytesIO('["PROCESSING"]') - d = treq.post(uri, data=flags, pool=pool) - return d - - def _delete(_, uri): - d = treq.delete(uri, pool=pool) - return d - - deferreds = [] - for i in xrange(args.amount): - if args.baseline: - d = treq.get(BASE_URI, pool=pool) - - elif args.list: - d = treq.get(BLOBS_URI, pool=pool) - - else: - uri = urljoin(BLOBS_URI, uuid4().hex) - d = treq.put(uri, data=data, pool=pool) - if args.get: - d.addCallback(_get, uri) - if args.flag: - d.addCallback(_flag, uri) - if args.delete: - d.addCallback(_delete, uri) - - deferreds.append(d) - yield None - - yield defer.gatherResults(deferreds) - - -if __name__ == "__main__": - args = parse_args() - generator = requests_generator(args) - d = main(generator) - d.addCallback(_finished, args.amount, args.size) - d.addErrback(_error) - reactor.run() diff --git a/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png b/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png Binary files differdeleted file mode 100644 index db3fd96..0000000 --- a/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png +++ /dev/null diff --git a/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt b/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt deleted file mode 100644 index 80d16ae..0000000 --- a/blob-multiprocess/results/2017-01-04_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt +++ /dev/null @@ -1,24 +0,0 @@ -1 baseline 1000 10 1.70170302391 -1 list 1000 10 1.86631264687 -1 put 1000 10 2.04504818916 -1 get 1000 10 5.2823679924 -1 flag 1000 10 3.88802418709 -1 delete 1000 10 3.07739658356 -2 baseline 1000 10 1.69806017876 -2 list 1000 10 3.11238617897 -2 put 1000 10 2.83830742836 -2 get 1000 10 3.40557880402 -2 flag 1000 10 3.2692761898 -2 delete 1000 10 3.34825959206 -4 baseline 1000 10 2.49910435677 -4 list 1000 10 2.48401441574 -4 put 1000 10 2.3237077713 -4 get 1000 10 3.88459482193 -4 flag 1000 10 3.63829479218 -4 delete 1000 10 3.17096538544 -8 baseline 1000 10 2.12282576561 -8 list 1000 10 2.49229278564 -8 put 1000 10 2.69512839317 -8 get 1000 10 3.47697739601 -8 flag 1000 10 3.35881881714 -8 delete 1000 10 3.29797801971 diff --git a/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png b/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png Binary files differdeleted file mode 100644 index 139b9cc..0000000 --- a/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.png +++ /dev/null diff --git a/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt b/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt deleted file mode 100644 index 29dc773..0000000 --- a/blob-multiprocess/results/2017-01-05_Intel-Core-i7-4600U@2.10GHz_8GB-RAM_SATA-II-SSD-LITEONIT-LGT-256M6G.txt +++ /dev/null @@ -1,24 +0,0 @@ -1 baseline 1000 10 1.65788397789 -1 list 1000 10 1.76658239365 -1 put 1000 10 2.16625041962 -1 get 1000 10 4.93044900894 -1 flag 1000 10 3.89305019379 -1 delete 1000 10 2.97089977264 -2 baseline 1000 10 1.60334076881 -2 list 1000 10 2.71356620789 -2 put 1000 10 3.23587818146 -2 get 1000 10 3.38332500458 -2 flag 1000 10 3.25708303452 -2 delete 1000 10 3.03945021629 -4 baseline 1000 10 2.60279417038 -4 list 1000 10 2.11859984398 -4 put 1000 10 2.2915845871 -4 get 1000 10 3.48964958191 -4 flag 1000 10 3.69795999527 -4 delete 1000 10 3.31933698654 -8 baseline 1000 10 2.12685017586 -8 list 1000 10 2.35639958382 -8 put 1000 10 2.58642120361 -8 get 1000 10 3.74429321289 -8 flag 1000 10 3.53779459 -8 delete 1000 10 3.292395401 diff --git a/blob-multiprocess/results/blobs-in-parallel-1.png b/blob-multiprocess/results/blobs-in-parallel-1.png Binary files differdeleted file mode 100644 index c23a635..0000000 --- a/blob-multiprocess/results/blobs-in-parallel-1.png +++ /dev/null diff --git a/blob-multiprocess/run-test.sh b/blob-multiprocess/run-test.sh deleted file mode 100755 index eab6aaa..0000000 --- a/blob-multiprocess/run-test.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash - -# Run Multiprocessing Test -# ======================== -# - -# This script measures the time of several interactions with the Blobs Server -# and outputs them to a text file. -# -# The different test scenarios are: -# - 1, 2, 4, and 8 server processes. -# - several client actions (baseline, list, put, get, flag, delete) -# - -# -# Client actions -# -------------- - -# Baseline: is a GET / to a dummy server that returns an empty reply. Nothing -# can be faster than this. - -# List: is a GET /blobs/username, which lists the (empty) set of blobs stored -# in the server. - -set -e - - -kill_multiproc() { - pids=$(ps aux | grep python | grep "\(multiproc\|blobs-server\)" \ - | grep -v grep | sed -e "s/\s\+/ /g" | cut -d' ' -f 2) - if [ ! -z "${pids}" ]; then - for pid in ${pids}; do - kill -9 ${pid} - done - fi -} - - -start_multiproc() { - procs=${1} - kill_multiproc - make multiproc PROCS=${procs} > /dev/null & - sleep 3 - make roundrobin PROCS=${procs} - sleep 1 -} - - -get_best() { - statement=$* - result=$(python -m timeit -n 1 -r 5 -s "import os" "os.system('${statement}')") - best=$(echo $result | sed -e s/.\*best\ of\ 5:\ // -e s/per\ loop//) - echo $best -} - - -get_mean() { - statement=$* - python -c "import timeit; t = timeit.timeit('import os; os.system(\'./${statement} > /dev/null\');', number=5); print t / 5" -} - - -request() { - action=${1} - procs=${2} - amount=${3} - size=${4} - best=$(get_mean ./request.py --${action} ${amount} ${size}) - echo "${procs} ${action} ${amount} ${size} ${best}" -} - - -run_test() { - for procs in 1 2 4 8; do - start_multiproc ${procs} - for action in baseline list put get flag delete; do - #for amountsize in "10 1000" "100 100" "1000 10"; do - for amountsize in "1000 10"; do - rm -rf /tmp/blobs/* - request ${action} ${procs} ${amountsize} >> results.txt - done - done - kill_multiproc - done -} - -run_test |