[test] add graph for sqlite blobs backend benchmark test

author: drebs <drebs@riseup.net> 2017-09-13 12:09:41 -0300
committer: drebs <drebs@riseup.net> 2017-09-14 12:14:45 -0300
commit: 93250414c6ef77b69a0aca008ef28c778c48afe4 (patch)
tree: 1a394394ef1b5c0fae392474e95bc529708cf5eb /scripts
parent: dd7b2e414adf2c84873debcd7b7d526859036bc2 (diff)
8 files changed, 508 insertions, 2 deletions
diff --git a/scripts/benchmark/legacy-vs-blobs/README.rst b/scripts/benchmark/legacy-vs-blobs/README.rst
index 3eedb3a4..ca502b8a 100644
--- a/scripts/benchmark/legacy-vs-blobs/README.rst
+++ b/scripts/benchmark/legacy-vs-blobs/README.rst
@@ -1,5 +1,5 @@
-Ploggin Legacy vs Blobs results
-===============================
+Plotting Legacy vs Blobs results
+================================
 
 This script produces a bar graph comparing different implementations of the
 mail pipeline (legacy, blobs, blobs with session cache, blobs with session
diff --git a/scripts/benchmark/sqlite-blobs-backend/README.rst b/scripts/benchmark/sqlite-blobs-backend/README.rst
new file mode 100644
index 00000000..2d7a257f
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/README.rst
@@ -0,0 +1,35 @@
+Plotting SQLite Blobs Backend storage/retrieval speed
+=====================================================
+
+This script produces a bar graph comparing storage and retrieval times for
+different amount/sizes of incoming data (1 x 10M, 10 x 1M, 100 x 100K, 1000
+x 10K).
+
+Data acquisition
+----------------
+
+Data was gathered through different runs of the code in
+`soledad/tests/benchmarks/test_sqlite_blobs_backend.py`. As these tests are run
+twice by default, you should remove the second call to pytest in `tox.ini` that
+would cause a second run of the tests for measuring memory. This script only
+plots time, for now.
+
+Once you done the above, chdir into `soledad/tests` and do something like this:
+
+  tox -e benchmarks -- -m sqlite_blobs_backend_get --benchmark-autosave
+  tox -e benchmarks -- -m sqlite_blobs_backend_put --benchmark-autosave
+
+Each run of tox as above produces a JSON file in `soledad/tests/.benchmarks`
+with stats. Copy the first file into `data/get.json` and the second file into
+`data/put.json` (where `data/` is a subdir in the same directory of this readme
+file).
+
+Plotting
+--------
+
+Once you have all your data in subdirectories of the `data/` directory and the
+script is correctly tuned for your data, running `make` should be enough to
+create a virtualenvironment with dependencies and plotting data.
+
+By default, the script will generate a file called `./sqlite-blobs-backend.png`
+in the current directory with the plot of the data.
diff --git a/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png b/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png
new file mode 100644
index 00000000..c989ad3e
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png
diff --git a/scripts/benchmark/sqlite-blobs-backend/data/get.json b/scripts/benchmark/sqlite-blobs-backend/data/get.json
new file mode 100644
index 00000000..8c2f5ed0
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/data/get.json
@@ -0,0 +1,187 @@
+{
+    "commit_info": {
+        "project": "soledad", 
+        "id": "81200dec8ac063a1ad77a5991c2bc7223e4445af", 
+        "time": "2017-09-13T11:11:15-03:00", 
+        "author_time": "2017-09-11T14:38:36-03:00", 
+        "dirty": true, 
+        "branch": "8942"
+    }, 
+    "version": "3.1.1", 
+    "benchmarks": [
+        {
+            "extra_info": {
+                "doc": "", 
+                "cpu_percent": 152.0
+            }, 
+            "params": null, 
+            "name": "test_sqlite_blobs_backend_get_100_100k", 
+            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_100_100k", 
+            "group": null, 
+            "options": {
+                "max_time": 1.0, 
+                "min_time": 5e-06, 
+                "min_rounds": 5, 
+                "warmup": false, 
+                "disable_gc": false, 
+                "timer": "time"
+            }, 
+            "stats": {
+                "q1": 0.03407096862792969, 
+                "iterations": 1, 
+                "q3": 0.03496694564819336, 
+                "stddev_outliers": 3, 
+                "ld15iqr": 0.03357100486755371, 
+                "total": 0.6347811222076416, 
+                "iqr_outliers": 3, 
+                "min": 0.03357100486755371, 
+                "median": 0.03432202339172363, 
+                "rounds": 18, 
+                "hd15iqr": 0.03844714164733887, 
+                "iqr": 0.0008959770202636719, 
+                "outliers": "3;3", 
+                "mean": 0.03526561790042453, 
+                "max": 0.041728973388671875, 
+                "stddev": 0.002284116271186255, 
+                "ops": 28.3562307861324
+            }, 
+            "param": null
+        }, 
+        {
+            "extra_info": {
+                "doc": "", 
+                "cpu_percent": 79.6
+            }, 
+            "params": null, 
+            "name": "test_sqlite_blobs_backend_get_1_10000k", 
+            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_1_10000k", 
+            "group": null, 
+            "options": {
+                "max_time": 1.0, 
+                "min_time": 5e-06, 
+                "min_rounds": 5, 
+                "warmup": false, 
+                "disable_gc": false, 
+                "timer": "time"
+            }, 
+            "stats": {
+                "q1": 0.04414188861846924, 
+                "iterations": 1, 
+                "q3": 0.04668295383453369, 
+                "stddev_outliers": 3, 
+                "ld15iqr": 0.04377603530883789, 
+                "total": 0.36537861824035645, 
+                "iqr_outliers": 0, 
+                "min": 0.04377603530883789, 
+                "median": 0.0463564395904541, 
+                "rounds": 8, 
+                "hd15iqr": 0.04724001884460449, 
+                "iqr": 0.002541065216064453, 
+                "outliers": "3;0", 
+                "mean": 0.045672327280044556, 
+                "max": 0.04724001884460449, 
+                "stddev": 0.001407690906727707, 
+                "ops": 21.89509621150675
+            }, 
+            "param": null
+        }, 
+        {
+            "extra_info": {
+                "doc": "", 
+                "cpu_percent": 151.1
+            }, 
+            "params": null, 
+            "name": "test_sqlite_blobs_backend_get_1000_10k", 
+            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_1000_10k", 
+            "group": null, 
+            "options": {
+                "max_time": 1.0, 
+                "min_time": 5e-06, 
+                "min_rounds": 5, 
+                "warmup": false, 
+                "disable_gc": false, 
+                "timer": "time"
+            }, 
+            "stats": {
+                "q1": 0.1577029824256897, 
+                "iterations": 1, 
+                "q3": 0.2019520401954651, 
+                "stddev_outliers": 2, 
+                "ld15iqr": 0.15537214279174805, 
+                "total": 0.8811802864074707, 
+                "iqr_outliers": 0, 
+                "min": 0.15537214279174805, 
+                "median": 0.16169404983520508, 
+                "rounds": 5, 
+                "hd15iqr": 0.20454716682434082, 
+                "iqr": 0.04424905776977539, 
+                "outliers": "2;0", 
+                "mean": 0.17623605728149414, 
+                "max": 0.20454716682434082, 
+                "stddev": 0.0243984691038373, 
+                "ops": 5.674207738333273
+            }, 
+            "param": null
+        }, 
+        {
+            "extra_info": {
+                "doc": "", 
+                "cpu_percent": 167.9
+            }, 
+            "params": null, 
+            "name": "test_sqlite_blobs_backend_get_10_1000k", 
+            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_10_1000k", 
+            "group": null, 
+            "options": {
+                "max_time": 1.0, 
+                "min_time": 5e-06, 
+                "min_rounds": 5, 
+                "warmup": false, 
+                "disable_gc": false, 
+                "timer": "time"
+            }, 
+            "stats": {
+                "q1": 0.0267794132232666, 
+                "iterations": 1, 
+                "q3": 0.027324676513671875, 
+                "stddev_outliers": 2, 
+                "ld15iqr": 0.026620864868164062, 
+                "total": 0.434096097946167, 
+                "iqr_outliers": 1, 
+                "min": 0.026620864868164062, 
+                "median": 0.02689993381500244, 
+                "rounds": 16, 
+                "hd15iqr": 0.028914213180541992, 
+                "iqr": 0.0005452632904052734, 
+                "outliers": "2;1", 
+                "mean": 0.027131006121635437, 
+                "max": 0.028914213180541992, 
+                "stddev": 0.0005876147817628755, 
+                "ops": 36.85819816326519
+            }, 
+            "param": null
+        }
+    ], 
+    "machine_info": {
+        "processor": "", 
+        "machine": "x86_64", 
+        "python_implementation_version": "2.7.13", 
+        "python_compiler": "GCC 6.3.0 20170118", 
+        "python_implementation": "CPython", 
+        "python_build": [
+            "default", 
+            "Jan 19 2017 14:48:08"
+        ], 
+        "python_version": "2.7.13", 
+        "cpu": {
+            "hardware": "unknown", 
+            "brand": "Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz", 
+            "vendor_id": "GenuineIntel"
+        }, 
+        "release": "4.9.0-3-amd64", 
+        "system": "Linux", 
+        "node": "pajeh", 
+        "host": "pajeh"
+    }, 
+    "datetime": "2017-09-13T14:35:32.735207"
+}
+\ No newline at end of file
diff --git a/scripts/benchmark/sqlite-blobs-backend/data/put.json b/scripts/benchmark/sqlite-blobs-backend/data/put.json
new file mode 100644
index 00000000..f56681fc
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/data/put.json
@@ -0,0 +1,187 @@
+{
+    "machine_info": {
+        "python_version": "2.7.13", 
+        "machine": "x86_64", 
+        "python_build": [
+            "default", 
+            "Jan 19 2017 14:48:08"
+        ], 
+        "node": "pajeh", 
+        "processor": "", 
+        "python_implementation": "CPython", 
+        "system": "Linux", 
+        "cpu": {
+            "hardware": "unknown", 
+            "brand": "Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz", 
+            "vendor_id": "GenuineIntel"
+        }, 
+        "host": "pajeh", 
+        "release": "4.9.0-3-amd64", 
+        "python_compiler": "GCC 6.3.0 20170118", 
+        "python_implementation_version": "2.7.13"
+    }, 
+    "commit_info": {
+        "author_time": "2017-09-11T14:38:36-03:00", 
+        "time": "2017-09-13T11:11:15-03:00", 
+        "project": "soledad", 
+        "dirty": true, 
+        "id": "81200dec8ac063a1ad77a5991c2bc7223e4445af", 
+        "branch": "8942"
+    }, 
+    "version": "3.1.1", 
+    "benchmarks": [
+        {
+            "params": null, 
+            "group": null, 
+            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_1_10000k", 
+            "extra_info": {
+                "cpu_percent": 78.4, 
+                "doc": ""
+            }, 
+            "name": "test_sqlite_blobs_backend_put_1_10000k", 
+            "options": {
+                "disable_gc": false, 
+                "min_rounds": 5, 
+                "timer": "time", 
+                "min_time": 5e-06, 
+                "max_time": 1.0, 
+                "warmup": false
+            }, 
+            "stats": {
+                "max": 0.40472412109375, 
+                "q3": 0.4024088382720947, 
+                "q1": 0.3844095468521118, 
+                "median": 0.39255285263061523, 
+                "total": 1.9671721458435059, 
+                "stddev": 0.009608947098416514, 
+                "outliers": "2;0", 
+                "iqr": 0.01799929141998291, 
+                "iqr_outliers": 0, 
+                "min": 0.38356804847717285, 
+                "iterations": 1, 
+                "stddev_outliers": 2, 
+                "rounds": 5, 
+                "hd15iqr": 0.40472412109375, 
+                "ops": 2.5417196001705507, 
+                "mean": 0.3934344291687012, 
+                "ld15iqr": 0.38356804847717285
+            }, 
+            "param": null
+        }, 
+        {
+            "params": null, 
+            "group": null, 
+            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_1000_10k", 
+            "extra_info": {
+                "cpu_percent": 113.7, 
+                "doc": ""
+            }, 
+            "name": "test_sqlite_blobs_backend_put_1000_10k", 
+            "options": {
+                "disable_gc": false, 
+                "min_rounds": 5, 
+                "timer": "time", 
+                "min_time": 5e-06, 
+                "max_time": 1.0, 
+                "warmup": false
+            }, 
+            "stats": {
+                "max": 1.197934865951538, 
+                "q3": 1.1804951429367065, 
+                "q1": 1.051497757434845, 
+                "median": 1.1336700916290283, 
+                "total": 5.600029945373535, 
+                "stddev": 0.07091823162518827, 
+                "outliers": "2;0", 
+                "iqr": 0.12899738550186157, 
+                "iqr_outliers": 0, 
+                "min": 1.0376191139221191, 
+                "iterations": 1, 
+                "stddev_outliers": 2, 
+                "rounds": 5, 
+                "hd15iqr": 1.197934865951538, 
+                "ops": 0.892852368428985, 
+                "mean": 1.120005989074707, 
+                "ld15iqr": 1.0376191139221191
+            }, 
+            "param": null
+        }, 
+        {
+            "params": null, 
+            "group": null, 
+            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_100_100k", 
+            "extra_info": {
+                "cpu_percent": 107.1, 
+                "doc": ""
+            }, 
+            "name": "test_sqlite_blobs_backend_put_100_100k", 
+            "options": {
+                "disable_gc": false, 
+                "min_rounds": 5, 
+                "timer": "time", 
+                "min_time": 5e-06, 
+                "max_time": 1.0, 
+                "warmup": false
+            }, 
+            "stats": {
+                "max": 0.4825630187988281, 
+                "q3": 0.4800873398780823, 
+                "q1": 0.32923150062561035, 
+                "median": 0.3520970344543457, 
+                "total": 1.9610421657562256, 
+                "stddev": 0.08220973334366445, 
+                "outliers": "2;0", 
+                "iqr": 0.15085583925247192, 
+                "iqr_outliers": 0, 
+                "min": 0.31221699714660645, 
+                "iterations": 1, 
+                "stddev_outliers": 2, 
+                "rounds": 5, 
+                "hd15iqr": 0.4825630187988281, 
+                "ops": 2.5496647075265098, 
+                "mean": 0.3922084331512451, 
+                "ld15iqr": 0.31221699714660645
+            }, 
+            "param": null
+        }, 
+        {
+            "params": null, 
+            "group": null, 
+            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_10_1000k", 
+            "extra_info": {
+                "cpu_percent": 89.3, 
+                "doc": ""
+            }, 
+            "name": "test_sqlite_blobs_backend_put_10_1000k", 
+            "options": {
+                "disable_gc": false, 
+                "min_rounds": 5, 
+                "timer": "time", 
+                "min_time": 5e-06, 
+                "max_time": 1.0, 
+                "warmup": false
+            }, 
+            "stats": {
+                "max": 0.2860860824584961, 
+                "q3": 0.28166937828063965, 
+                "q1": 0.2644330859184265, 
+                "median": 0.27631092071533203, 
+                "total": 1.3479201793670654, 
+                "stddev": 0.022973488461055777, 
+                "outliers": "1;1", 
+                "iqr": 0.017236292362213135, 
+                "iqr_outliers": 1, 
+                "min": 0.22912287712097168, 
+                "iterations": 1, 
+                "stddev_outliers": 1, 
+                "rounds": 5, 
+                "hd15iqr": 0.2860860824584961, 
+                "ops": 3.709418463004107, 
+                "mean": 0.26958403587341306, 
+                "ld15iqr": 0.2762031555175781
+            }, 
+            "param": null
+        }
+    ], 
+    "datetime": "2017-09-13T14:36:01.333043"
+}
+\ No newline at end of file
diff --git a/scripts/benchmark/sqlite-blobs-backend/gen-graph.py b/scripts/benchmark/sqlite-blobs-backend/gen-graph.py
new file mode 100755
index 00000000..2bb948f1
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/gen-graph.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+# Plot bars comparing different implementations of mail pipeline.
+#
+# This script can be improved to account for arbitrary number of data sets, but
+# it is not doing it right now.
+
+import json
+import matplotlib.pyplot as plt
+import numpy as np
+import re
+
+# make a prettier graph
+from mpltools import style
+style.use('ggplot')
+
+OUTPUT_FILENAME = 'blobs-sqlite-backend.png'
+
+graphs = [
+    '1_10000k',
+    '10_1000k',
+    '100_100k',
+    '1000_10k',
+]
+
+
+# the JSON structure returned by the following function is ugly, but the
+# original JSONs are even uglier, so this is here just to make the life of the
+# script easier.
+#
+# We want to have something like:
+#
+#   data[get/put][amount_size] = <stats>
+
+def get_data():
+    data = {}
+    for fname in ['get', 'put']:
+        data[fname] = {}
+        with open('data/%s.json' % fname) as f:
+            d = json.loads(f.read())
+            benchmarks = d['benchmarks']
+            for item in benchmarks:
+                name = re.sub('^[^1]+', '', item['name'])
+                data[fname][name] = item['stats']
+    return data
+
+
+def plot_data(data):
+
+    N = 4
+
+    get_means = tuple([data['get'][graph]['mean'] for graph in graphs])
+    put_means = tuple([data['put'][graph]['mean'] for graph in graphs])
+
+    ind = np.arange(N)  # the x locations for the groups
+    width = 0.40        # the width of the bars
+
+    fig, ax = plt.subplots()
+    rects1 = ax.bar(ind, get_means, width)
+    rects2 = ax.bar(ind + width, put_means, width)
+
+    # add some text for labels, title and axes ticks
+    ax.set_ylabel('Time for operation (s)')
+    ax.set_xlabel('Amount and size of blobs')
+    ax.set_title('Blobs storage and retrieval time')
+    ax.set_xticks(ind + (0.5 * width))
+    ax.set_xticklabels(
+        tuple(map(lambda name: name.replace('_', ' x '), graphs)))
+
+    ax.legend(
+        (rects1[0], rects2[0]),
+        ('retrieval time', 'storage time'))
+    # ax.grid()
+
+    plt.savefig(OUTPUT_FILENAME)
+    # plt.show()
+
+
+if __name__ == '__main__':
+    data = get_data()
+    plot_data(data)
diff --git a/scripts/benchmark/sqlite-blobs-backend/makefile b/scripts/benchmark/sqlite-blobs-backend/makefile
new file mode 100644
index 00000000..f8187e9a
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/makefile
@@ -0,0 +1,14 @@
+VIRTUALENV_NAME = plot
+VIRTUALENV_ROOT = ~/.virtualenvs/$(VIRTUALENV_NAME)
+
+all: graph
+
+env:
+	if [ ! -d $(VIRTUALENV_ROOT) ]; then \
+	  mkdir -p $$(dirname $(VIRTUALENV_ROOT)); \
+	  virtualenv $(VIRTUALENV_ROOT); \
+	  $(VIRTUALENV_ROOT)/bin/pip install requirements.pip; \
+	fi
+
+graph: env
+	$(VIRTUALENV_ROOT)/bin/python ./gen-graph.py
diff --git a/scripts/benchmark/sqlite-blobs-backend/requirements.pip b/scripts/benchmark/sqlite-blobs-backend/requirements.pip
new file mode 100644
index 00000000..db5d81e0
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/requirements.pip
@@ -0,0 +1,2 @@
+matplotlib
+numpy
author	drebs <drebs@riseup.net>	2017-09-13 12:09:41 -0300
committer	drebs <drebs@riseup.net>	2017-09-14 12:14:45 -0300
commit	93250414c6ef77b69a0aca008ef28c778c48afe4 (patch)
tree	1a394394ef1b5c0fae392474e95bc529708cf5eb /scripts
parent	dd7b2e414adf2c84873debcd7b7d526859036bc2 (diff)