From 93250414c6ef77b69a0aca008ef28c778c48afe4 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 13 Sep 2017 12:09:41 -0300 Subject: [test] add graph for sqlite blobs backend benchmark test --- scripts/benchmark/legacy-vs-blobs/README.rst | 4 +- scripts/benchmark/sqlite-blobs-backend/README.rst | 35 ++++ .../sqlite-blobs-backend/blobs-sqlite-backend.png | Bin 0 -> 26563 bytes .../benchmark/sqlite-blobs-backend/data/get.json | 187 +++++++++++++++++++++ .../benchmark/sqlite-blobs-backend/data/put.json | 187 +++++++++++++++++++++ .../benchmark/sqlite-blobs-backend/gen-graph.py | 81 +++++++++ scripts/benchmark/sqlite-blobs-backend/makefile | 14 ++ .../sqlite-blobs-backend/requirements.pip | 2 + 8 files changed, 508 insertions(+), 2 deletions(-) create mode 100644 scripts/benchmark/sqlite-blobs-backend/README.rst create mode 100644 scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png create mode 100644 scripts/benchmark/sqlite-blobs-backend/data/get.json create mode 100644 scripts/benchmark/sqlite-blobs-backend/data/put.json create mode 100755 scripts/benchmark/sqlite-blobs-backend/gen-graph.py create mode 100644 scripts/benchmark/sqlite-blobs-backend/makefile create mode 100644 scripts/benchmark/sqlite-blobs-backend/requirements.pip diff --git a/scripts/benchmark/legacy-vs-blobs/README.rst b/scripts/benchmark/legacy-vs-blobs/README.rst index 3eedb3a4..ca502b8a 100644 --- a/scripts/benchmark/legacy-vs-blobs/README.rst +++ b/scripts/benchmark/legacy-vs-blobs/README.rst @@ -1,5 +1,5 @@ -Ploggin Legacy vs Blobs results -=============================== +Plotting Legacy vs Blobs results +================================ This script produces a bar graph comparing different implementations of the mail pipeline (legacy, blobs, blobs with session cache, blobs with session diff --git a/scripts/benchmark/sqlite-blobs-backend/README.rst b/scripts/benchmark/sqlite-blobs-backend/README.rst new file mode 100644 index 00000000..2d7a257f --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/README.rst @@ -0,0 +1,35 @@ +Plotting SQLite Blobs Backend storage/retrieval speed +===================================================== + +This script produces a bar graph comparing storage and retrieval times for +different amount/sizes of incoming data (1 x 10M, 10 x 1M, 100 x 100K, 1000 +x 10K). + +Data acquisition +---------------- + +Data was gathered through different runs of the code in +`soledad/tests/benchmarks/test_sqlite_blobs_backend.py`. As these tests are run +twice by default, you should remove the second call to pytest in `tox.ini` that +would cause a second run of the tests for measuring memory. This script only +plots time, for now. + +Once you done the above, chdir into `soledad/tests` and do something like this: + + tox -e benchmarks -- -m sqlite_blobs_backend_get --benchmark-autosave + tox -e benchmarks -- -m sqlite_blobs_backend_put --benchmark-autosave + +Each run of tox as above produces a JSON file in `soledad/tests/.benchmarks` +with stats. Copy the first file into `data/get.json` and the second file into +`data/put.json` (where `data/` is a subdir in the same directory of this readme +file). + +Plotting +-------- + +Once you have all your data in subdirectories of the `data/` directory and the +script is correctly tuned for your data, running `make` should be enough to +create a virtualenvironment with dependencies and plotting data. + +By default, the script will generate a file called `./sqlite-blobs-backend.png` +in the current directory with the plot of the data. diff --git a/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png b/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png new file mode 100644 index 00000000..c989ad3e Binary files /dev/null and b/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png differ diff --git a/scripts/benchmark/sqlite-blobs-backend/data/get.json b/scripts/benchmark/sqlite-blobs-backend/data/get.json new file mode 100644 index 00000000..8c2f5ed0 --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/data/get.json @@ -0,0 +1,187 @@ +{ + "commit_info": { + "project": "soledad", + "id": "81200dec8ac063a1ad77a5991c2bc7223e4445af", + "time": "2017-09-13T11:11:15-03:00", + "author_time": "2017-09-11T14:38:36-03:00", + "dirty": true, + "branch": "8942" + }, + "version": "3.1.1", + "benchmarks": [ + { + "extra_info": { + "doc": "", + "cpu_percent": 152.0 + }, + "params": null, + "name": "test_sqlite_blobs_backend_get_100_100k", + "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_100_100k", + "group": null, + "options": { + "max_time": 1.0, + "min_time": 5e-06, + "min_rounds": 5, + "warmup": false, + "disable_gc": false, + "timer": "time" + }, + "stats": { + "q1": 0.03407096862792969, + "iterations": 1, + "q3": 0.03496694564819336, + "stddev_outliers": 3, + "ld15iqr": 0.03357100486755371, + "total": 0.6347811222076416, + "iqr_outliers": 3, + "min": 0.03357100486755371, + "median": 0.03432202339172363, + "rounds": 18, + "hd15iqr": 0.03844714164733887, + "iqr": 0.0008959770202636719, + "outliers": "3;3", + "mean": 0.03526561790042453, + "max": 0.041728973388671875, + "stddev": 0.002284116271186255, + "ops": 28.3562307861324 + }, + "param": null + }, + { + "extra_info": { + "doc": "", + "cpu_percent": 79.6 + }, + "params": null, + "name": "test_sqlite_blobs_backend_get_1_10000k", + "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_1_10000k", + "group": null, + "options": { + "max_time": 1.0, + "min_time": 5e-06, + "min_rounds": 5, + "warmup": false, + "disable_gc": false, + "timer": "time" + }, + "stats": { + "q1": 0.04414188861846924, + "iterations": 1, + "q3": 0.04668295383453369, + "stddev_outliers": 3, + "ld15iqr": 0.04377603530883789, + "total": 0.36537861824035645, + "iqr_outliers": 0, + "min": 0.04377603530883789, + "median": 0.0463564395904541, + "rounds": 8, + "hd15iqr": 0.04724001884460449, + "iqr": 0.002541065216064453, + "outliers": "3;0", + "mean": 0.045672327280044556, + "max": 0.04724001884460449, + "stddev": 0.001407690906727707, + "ops": 21.89509621150675 + }, + "param": null + }, + { + "extra_info": { + "doc": "", + "cpu_percent": 151.1 + }, + "params": null, + "name": "test_sqlite_blobs_backend_get_1000_10k", + "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_1000_10k", + "group": null, + "options": { + "max_time": 1.0, + "min_time": 5e-06, + "min_rounds": 5, + "warmup": false, + "disable_gc": false, + "timer": "time" + }, + "stats": { + "q1": 0.1577029824256897, + "iterations": 1, + "q3": 0.2019520401954651, + "stddev_outliers": 2, + "ld15iqr": 0.15537214279174805, + "total": 0.8811802864074707, + "iqr_outliers": 0, + "min": 0.15537214279174805, + "median": 0.16169404983520508, + "rounds": 5, + "hd15iqr": 0.20454716682434082, + "iqr": 0.04424905776977539, + "outliers": "2;0", + "mean": 0.17623605728149414, + "max": 0.20454716682434082, + "stddev": 0.0243984691038373, + "ops": 5.674207738333273 + }, + "param": null + }, + { + "extra_info": { + "doc": "", + "cpu_percent": 167.9 + }, + "params": null, + "name": "test_sqlite_blobs_backend_get_10_1000k", + "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_10_1000k", + "group": null, + "options": { + "max_time": 1.0, + "min_time": 5e-06, + "min_rounds": 5, + "warmup": false, + "disable_gc": false, + "timer": "time" + }, + "stats": { + "q1": 0.0267794132232666, + "iterations": 1, + "q3": 0.027324676513671875, + "stddev_outliers": 2, + "ld15iqr": 0.026620864868164062, + "total": 0.434096097946167, + "iqr_outliers": 1, + "min": 0.026620864868164062, + "median": 0.02689993381500244, + "rounds": 16, + "hd15iqr": 0.028914213180541992, + "iqr": 0.0005452632904052734, + "outliers": "2;1", + "mean": 0.027131006121635437, + "max": 0.028914213180541992, + "stddev": 0.0005876147817628755, + "ops": 36.85819816326519 + }, + "param": null + } + ], + "machine_info": { + "processor": "", + "machine": "x86_64", + "python_implementation_version": "2.7.13", + "python_compiler": "GCC 6.3.0 20170118", + "python_implementation": "CPython", + "python_build": [ + "default", + "Jan 19 2017 14:48:08" + ], + "python_version": "2.7.13", + "cpu": { + "hardware": "unknown", + "brand": "Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz", + "vendor_id": "GenuineIntel" + }, + "release": "4.9.0-3-amd64", + "system": "Linux", + "node": "pajeh", + "host": "pajeh" + }, + "datetime": "2017-09-13T14:35:32.735207" +} \ No newline at end of file diff --git a/scripts/benchmark/sqlite-blobs-backend/data/put.json b/scripts/benchmark/sqlite-blobs-backend/data/put.json new file mode 100644 index 00000000..f56681fc --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/data/put.json @@ -0,0 +1,187 @@ +{ + "machine_info": { + "python_version": "2.7.13", + "machine": "x86_64", + "python_build": [ + "default", + "Jan 19 2017 14:48:08" + ], + "node": "pajeh", + "processor": "", + "python_implementation": "CPython", + "system": "Linux", + "cpu": { + "hardware": "unknown", + "brand": "Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz", + "vendor_id": "GenuineIntel" + }, + "host": "pajeh", + "release": "4.9.0-3-amd64", + "python_compiler": "GCC 6.3.0 20170118", + "python_implementation_version": "2.7.13" + }, + "commit_info": { + "author_time": "2017-09-11T14:38:36-03:00", + "time": "2017-09-13T11:11:15-03:00", + "project": "soledad", + "dirty": true, + "id": "81200dec8ac063a1ad77a5991c2bc7223e4445af", + "branch": "8942" + }, + "version": "3.1.1", + "benchmarks": [ + { + "params": null, + "group": null, + "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_1_10000k", + "extra_info": { + "cpu_percent": 78.4, + "doc": "" + }, + "name": "test_sqlite_blobs_backend_put_1_10000k", + "options": { + "disable_gc": false, + "min_rounds": 5, + "timer": "time", + "min_time": 5e-06, + "max_time": 1.0, + "warmup": false + }, + "stats": { + "max": 0.40472412109375, + "q3": 0.4024088382720947, + "q1": 0.3844095468521118, + "median": 0.39255285263061523, + "total": 1.9671721458435059, + "stddev": 0.009608947098416514, + "outliers": "2;0", + "iqr": 0.01799929141998291, + "iqr_outliers": 0, + "min": 0.38356804847717285, + "iterations": 1, + "stddev_outliers": 2, + "rounds": 5, + "hd15iqr": 0.40472412109375, + "ops": 2.5417196001705507, + "mean": 0.3934344291687012, + "ld15iqr": 0.38356804847717285 + }, + "param": null + }, + { + "params": null, + "group": null, + "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_1000_10k", + "extra_info": { + "cpu_percent": 113.7, + "doc": "" + }, + "name": "test_sqlite_blobs_backend_put_1000_10k", + "options": { + "disable_gc": false, + "min_rounds": 5, + "timer": "time", + "min_time": 5e-06, + "max_time": 1.0, + "warmup": false + }, + "stats": { + "max": 1.197934865951538, + "q3": 1.1804951429367065, + "q1": 1.051497757434845, + "median": 1.1336700916290283, + "total": 5.600029945373535, + "stddev": 0.07091823162518827, + "outliers": "2;0", + "iqr": 0.12899738550186157, + "iqr_outliers": 0, + "min": 1.0376191139221191, + "iterations": 1, + "stddev_outliers": 2, + "rounds": 5, + "hd15iqr": 1.197934865951538, + "ops": 0.892852368428985, + "mean": 1.120005989074707, + "ld15iqr": 1.0376191139221191 + }, + "param": null + }, + { + "params": null, + "group": null, + "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_100_100k", + "extra_info": { + "cpu_percent": 107.1, + "doc": "" + }, + "name": "test_sqlite_blobs_backend_put_100_100k", + "options": { + "disable_gc": false, + "min_rounds": 5, + "timer": "time", + "min_time": 5e-06, + "max_time": 1.0, + "warmup": false + }, + "stats": { + "max": 0.4825630187988281, + "q3": 0.4800873398780823, + "q1": 0.32923150062561035, + "median": 0.3520970344543457, + "total": 1.9610421657562256, + "stddev": 0.08220973334366445, + "outliers": "2;0", + "iqr": 0.15085583925247192, + "iqr_outliers": 0, + "min": 0.31221699714660645, + "iterations": 1, + "stddev_outliers": 2, + "rounds": 5, + "hd15iqr": 0.4825630187988281, + "ops": 2.5496647075265098, + "mean": 0.3922084331512451, + "ld15iqr": 0.31221699714660645 + }, + "param": null + }, + { + "params": null, + "group": null, + "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_10_1000k", + "extra_info": { + "cpu_percent": 89.3, + "doc": "" + }, + "name": "test_sqlite_blobs_backend_put_10_1000k", + "options": { + "disable_gc": false, + "min_rounds": 5, + "timer": "time", + "min_time": 5e-06, + "max_time": 1.0, + "warmup": false + }, + "stats": { + "max": 0.2860860824584961, + "q3": 0.28166937828063965, + "q1": 0.2644330859184265, + "median": 0.27631092071533203, + "total": 1.3479201793670654, + "stddev": 0.022973488461055777, + "outliers": "1;1", + "iqr": 0.017236292362213135, + "iqr_outliers": 1, + "min": 0.22912287712097168, + "iterations": 1, + "stddev_outliers": 1, + "rounds": 5, + "hd15iqr": 0.2860860824584961, + "ops": 3.709418463004107, + "mean": 0.26958403587341306, + "ld15iqr": 0.2762031555175781 + }, + "param": null + } + ], + "datetime": "2017-09-13T14:36:01.333043" +} \ No newline at end of file diff --git a/scripts/benchmark/sqlite-blobs-backend/gen-graph.py b/scripts/benchmark/sqlite-blobs-backend/gen-graph.py new file mode 100755 index 00000000..2bb948f1 --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/gen-graph.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +# Plot bars comparing different implementations of mail pipeline. +# +# This script can be improved to account for arbitrary number of data sets, but +# it is not doing it right now. + +import json +import matplotlib.pyplot as plt +import numpy as np +import re + +# make a prettier graph +from mpltools import style +style.use('ggplot') + +OUTPUT_FILENAME = 'blobs-sqlite-backend.png' + +graphs = [ + '1_10000k', + '10_1000k', + '100_100k', + '1000_10k', +] + + +# the JSON structure returned by the following function is ugly, but the +# original JSONs are even uglier, so this is here just to make the life of the +# script easier. +# +# We want to have something like: +# +# data[get/put][amount_size] = + +def get_data(): + data = {} + for fname in ['get', 'put']: + data[fname] = {} + with open('data/%s.json' % fname) as f: + d = json.loads(f.read()) + benchmarks = d['benchmarks'] + for item in benchmarks: + name = re.sub('^[^1]+', '', item['name']) + data[fname][name] = item['stats'] + return data + + +def plot_data(data): + + N = 4 + + get_means = tuple([data['get'][graph]['mean'] for graph in graphs]) + put_means = tuple([data['put'][graph]['mean'] for graph in graphs]) + + ind = np.arange(N) # the x locations for the groups + width = 0.40 # the width of the bars + + fig, ax = plt.subplots() + rects1 = ax.bar(ind, get_means, width) + rects2 = ax.bar(ind + width, put_means, width) + + # add some text for labels, title and axes ticks + ax.set_ylabel('Time for operation (s)') + ax.set_xlabel('Amount and size of blobs') + ax.set_title('Blobs storage and retrieval time') + ax.set_xticks(ind + (0.5 * width)) + ax.set_xticklabels( + tuple(map(lambda name: name.replace('_', ' x '), graphs))) + + ax.legend( + (rects1[0], rects2[0]), + ('retrieval time', 'storage time')) + # ax.grid() + + plt.savefig(OUTPUT_FILENAME) + # plt.show() + + +if __name__ == '__main__': + data = get_data() + plot_data(data) diff --git a/scripts/benchmark/sqlite-blobs-backend/makefile b/scripts/benchmark/sqlite-blobs-backend/makefile new file mode 100644 index 00000000..f8187e9a --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/makefile @@ -0,0 +1,14 @@ +VIRTUALENV_NAME = plot +VIRTUALENV_ROOT = ~/.virtualenvs/$(VIRTUALENV_NAME) + +all: graph + +env: + if [ ! -d $(VIRTUALENV_ROOT) ]; then \ + mkdir -p $$(dirname $(VIRTUALENV_ROOT)); \ + virtualenv $(VIRTUALENV_ROOT); \ + $(VIRTUALENV_ROOT)/bin/pip install requirements.pip; \ + fi + +graph: env + $(VIRTUALENV_ROOT)/bin/python ./gen-graph.py diff --git a/scripts/benchmark/sqlite-blobs-backend/requirements.pip b/scripts/benchmark/sqlite-blobs-backend/requirements.pip new file mode 100644 index 00000000..db5d81e0 --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/requirements.pip @@ -0,0 +1,2 @@ +matplotlib +numpy -- cgit v1.2.3