summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/benchmark/legacy-vs-blobs/README.rst4
-rw-r--r--scripts/benchmark/sqlite-blobs-backend/README.rst35
-rw-r--r--scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.pngbin0 -> 26563 bytes
-rw-r--r--scripts/benchmark/sqlite-blobs-backend/data/get.json187
-rw-r--r--scripts/benchmark/sqlite-blobs-backend/data/put.json187
-rwxr-xr-xscripts/benchmark/sqlite-blobs-backend/gen-graph.py81
-rw-r--r--scripts/benchmark/sqlite-blobs-backend/makefile14
-rw-r--r--scripts/benchmark/sqlite-blobs-backend/requirements.pip2
8 files changed, 508 insertions, 2 deletions
diff --git a/scripts/benchmark/legacy-vs-blobs/README.rst b/scripts/benchmark/legacy-vs-blobs/README.rst
index 3eedb3a4..ca502b8a 100644
--- a/scripts/benchmark/legacy-vs-blobs/README.rst
+++ b/scripts/benchmark/legacy-vs-blobs/README.rst
@@ -1,5 +1,5 @@
-Ploggin Legacy vs Blobs results
-===============================
+Plotting Legacy vs Blobs results
+================================
This script produces a bar graph comparing different implementations of the
mail pipeline (legacy, blobs, blobs with session cache, blobs with session
diff --git a/scripts/benchmark/sqlite-blobs-backend/README.rst b/scripts/benchmark/sqlite-blobs-backend/README.rst
new file mode 100644
index 00000000..2d7a257f
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/README.rst
@@ -0,0 +1,35 @@
+Plotting SQLite Blobs Backend storage/retrieval speed
+=====================================================
+
+This script produces a bar graph comparing storage and retrieval times for
+different amount/sizes of incoming data (1 x 10M, 10 x 1M, 100 x 100K, 1000
+x 10K).
+
+Data acquisition
+----------------
+
+Data was gathered through different runs of the code in
+`soledad/tests/benchmarks/test_sqlite_blobs_backend.py`. As these tests are run
+twice by default, you should remove the second call to pytest in `tox.ini` that
+would cause a second run of the tests for measuring memory. This script only
+plots time, for now.
+
+Once you done the above, chdir into `soledad/tests` and do something like this:
+
+ tox -e benchmarks -- -m sqlite_blobs_backend_get --benchmark-autosave
+ tox -e benchmarks -- -m sqlite_blobs_backend_put --benchmark-autosave
+
+Each run of tox as above produces a JSON file in `soledad/tests/.benchmarks`
+with stats. Copy the first file into `data/get.json` and the second file into
+`data/put.json` (where `data/` is a subdir in the same directory of this readme
+file).
+
+Plotting
+--------
+
+Once you have all your data in subdirectories of the `data/` directory and the
+script is correctly tuned for your data, running `make` should be enough to
+create a virtualenvironment with dependencies and plotting data.
+
+By default, the script will generate a file called `./sqlite-blobs-backend.png`
+in the current directory with the plot of the data.
diff --git a/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png b/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png
new file mode 100644
index 00000000..c989ad3e
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png
Binary files differ
diff --git a/scripts/benchmark/sqlite-blobs-backend/data/get.json b/scripts/benchmark/sqlite-blobs-backend/data/get.json
new file mode 100644
index 00000000..8c2f5ed0
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/data/get.json
@@ -0,0 +1,187 @@
+{
+ "commit_info": {
+ "project": "soledad",
+ "id": "81200dec8ac063a1ad77a5991c2bc7223e4445af",
+ "time": "2017-09-13T11:11:15-03:00",
+ "author_time": "2017-09-11T14:38:36-03:00",
+ "dirty": true,
+ "branch": "8942"
+ },
+ "version": "3.1.1",
+ "benchmarks": [
+ {
+ "extra_info": {
+ "doc": "",
+ "cpu_percent": 152.0
+ },
+ "params": null,
+ "name": "test_sqlite_blobs_backend_get_100_100k",
+ "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_100_100k",
+ "group": null,
+ "options": {
+ "max_time": 1.0,
+ "min_time": 5e-06,
+ "min_rounds": 5,
+ "warmup": false,
+ "disable_gc": false,
+ "timer": "time"
+ },
+ "stats": {
+ "q1": 0.03407096862792969,
+ "iterations": 1,
+ "q3": 0.03496694564819336,
+ "stddev_outliers": 3,
+ "ld15iqr": 0.03357100486755371,
+ "total": 0.6347811222076416,
+ "iqr_outliers": 3,
+ "min": 0.03357100486755371,
+ "median": 0.03432202339172363,
+ "rounds": 18,
+ "hd15iqr": 0.03844714164733887,
+ "iqr": 0.0008959770202636719,
+ "outliers": "3;3",
+ "mean": 0.03526561790042453,
+ "max": 0.041728973388671875,
+ "stddev": 0.002284116271186255,
+ "ops": 28.3562307861324
+ },
+ "param": null
+ },
+ {
+ "extra_info": {
+ "doc": "",
+ "cpu_percent": 79.6
+ },
+ "params": null,
+ "name": "test_sqlite_blobs_backend_get_1_10000k",
+ "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_1_10000k",
+ "group": null,
+ "options": {
+ "max_time": 1.0,
+ "min_time": 5e-06,
+ "min_rounds": 5,
+ "warmup": false,
+ "disable_gc": false,
+ "timer": "time"
+ },
+ "stats": {
+ "q1": 0.04414188861846924,
+ "iterations": 1,
+ "q3": 0.04668295383453369,
+ "stddev_outliers": 3,
+ "ld15iqr": 0.04377603530883789,
+ "total": 0.36537861824035645,
+ "iqr_outliers": 0,
+ "min": 0.04377603530883789,
+ "median": 0.0463564395904541,
+ "rounds": 8,
+ "hd15iqr": 0.04724001884460449,
+ "iqr": 0.002541065216064453,
+ "outliers": "3;0",
+ "mean": 0.045672327280044556,
+ "max": 0.04724001884460449,
+ "stddev": 0.001407690906727707,
+ "ops": 21.89509621150675
+ },
+ "param": null
+ },
+ {
+ "extra_info": {
+ "doc": "",
+ "cpu_percent": 151.1
+ },
+ "params": null,
+ "name": "test_sqlite_blobs_backend_get_1000_10k",
+ "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_1000_10k",
+ "group": null,
+ "options": {
+ "max_time": 1.0,
+ "min_time": 5e-06,
+ "min_rounds": 5,
+ "warmup": false,
+ "disable_gc": false,
+ "timer": "time"
+ },
+ "stats": {
+ "q1": 0.1577029824256897,
+ "iterations": 1,
+ "q3": 0.2019520401954651,
+ "stddev_outliers": 2,
+ "ld15iqr": 0.15537214279174805,
+ "total": 0.8811802864074707,
+ "iqr_outliers": 0,
+ "min": 0.15537214279174805,
+ "median": 0.16169404983520508,
+ "rounds": 5,
+ "hd15iqr": 0.20454716682434082,
+ "iqr": 0.04424905776977539,
+ "outliers": "2;0",
+ "mean": 0.17623605728149414,
+ "max": 0.20454716682434082,
+ "stddev": 0.0243984691038373,
+ "ops": 5.674207738333273
+ },
+ "param": null
+ },
+ {
+ "extra_info": {
+ "doc": "",
+ "cpu_percent": 167.9
+ },
+ "params": null,
+ "name": "test_sqlite_blobs_backend_get_10_1000k",
+ "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_10_1000k",
+ "group": null,
+ "options": {
+ "max_time": 1.0,
+ "min_time": 5e-06,
+ "min_rounds": 5,
+ "warmup": false,
+ "disable_gc": false,
+ "timer": "time"
+ },
+ "stats": {
+ "q1": 0.0267794132232666,
+ "iterations": 1,
+ "q3": 0.027324676513671875,
+ "stddev_outliers": 2,
+ "ld15iqr": 0.026620864868164062,
+ "total": 0.434096097946167,
+ "iqr_outliers": 1,
+ "min": 0.026620864868164062,
+ "median": 0.02689993381500244,
+ "rounds": 16,
+ "hd15iqr": 0.028914213180541992,
+ "iqr": 0.0005452632904052734,
+ "outliers": "2;1",
+ "mean": 0.027131006121635437,
+ "max": 0.028914213180541992,
+ "stddev": 0.0005876147817628755,
+ "ops": 36.85819816326519
+ },
+ "param": null
+ }
+ ],
+ "machine_info": {
+ "processor": "",
+ "machine": "x86_64",
+ "python_implementation_version": "2.7.13",
+ "python_compiler": "GCC 6.3.0 20170118",
+ "python_implementation": "CPython",
+ "python_build": [
+ "default",
+ "Jan 19 2017 14:48:08"
+ ],
+ "python_version": "2.7.13",
+ "cpu": {
+ "hardware": "unknown",
+ "brand": "Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz",
+ "vendor_id": "GenuineIntel"
+ },
+ "release": "4.9.0-3-amd64",
+ "system": "Linux",
+ "node": "pajeh",
+ "host": "pajeh"
+ },
+ "datetime": "2017-09-13T14:35:32.735207"
+} \ No newline at end of file
diff --git a/scripts/benchmark/sqlite-blobs-backend/data/put.json b/scripts/benchmark/sqlite-blobs-backend/data/put.json
new file mode 100644
index 00000000..f56681fc
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/data/put.json
@@ -0,0 +1,187 @@
+{
+ "machine_info": {
+ "python_version": "2.7.13",
+ "machine": "x86_64",
+ "python_build": [
+ "default",
+ "Jan 19 2017 14:48:08"
+ ],
+ "node": "pajeh",
+ "processor": "",
+ "python_implementation": "CPython",
+ "system": "Linux",
+ "cpu": {
+ "hardware": "unknown",
+ "brand": "Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz",
+ "vendor_id": "GenuineIntel"
+ },
+ "host": "pajeh",
+ "release": "4.9.0-3-amd64",
+ "python_compiler": "GCC 6.3.0 20170118",
+ "python_implementation_version": "2.7.13"
+ },
+ "commit_info": {
+ "author_time": "2017-09-11T14:38:36-03:00",
+ "time": "2017-09-13T11:11:15-03:00",
+ "project": "soledad",
+ "dirty": true,
+ "id": "81200dec8ac063a1ad77a5991c2bc7223e4445af",
+ "branch": "8942"
+ },
+ "version": "3.1.1",
+ "benchmarks": [
+ {
+ "params": null,
+ "group": null,
+ "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_1_10000k",
+ "extra_info": {
+ "cpu_percent": 78.4,
+ "doc": ""
+ },
+ "name": "test_sqlite_blobs_backend_put_1_10000k",
+ "options": {
+ "disable_gc": false,
+ "min_rounds": 5,
+ "timer": "time",
+ "min_time": 5e-06,
+ "max_time": 1.0,
+ "warmup": false
+ },
+ "stats": {
+ "max": 0.40472412109375,
+ "q3": 0.4024088382720947,
+ "q1": 0.3844095468521118,
+ "median": 0.39255285263061523,
+ "total": 1.9671721458435059,
+ "stddev": 0.009608947098416514,
+ "outliers": "2;0",
+ "iqr": 0.01799929141998291,
+ "iqr_outliers": 0,
+ "min": 0.38356804847717285,
+ "iterations": 1,
+ "stddev_outliers": 2,
+ "rounds": 5,
+ "hd15iqr": 0.40472412109375,
+ "ops": 2.5417196001705507,
+ "mean": 0.3934344291687012,
+ "ld15iqr": 0.38356804847717285
+ },
+ "param": null
+ },
+ {
+ "params": null,
+ "group": null,
+ "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_1000_10k",
+ "extra_info": {
+ "cpu_percent": 113.7,
+ "doc": ""
+ },
+ "name": "test_sqlite_blobs_backend_put_1000_10k",
+ "options": {
+ "disable_gc": false,
+ "min_rounds": 5,
+ "timer": "time",
+ "min_time": 5e-06,
+ "max_time": 1.0,
+ "warmup": false
+ },
+ "stats": {
+ "max": 1.197934865951538,
+ "q3": 1.1804951429367065,
+ "q1": 1.051497757434845,
+ "median": 1.1336700916290283,
+ "total": 5.600029945373535,
+ "stddev": 0.07091823162518827,
+ "outliers": "2;0",
+ "iqr": 0.12899738550186157,
+ "iqr_outliers": 0,
+ "min": 1.0376191139221191,
+ "iterations": 1,
+ "stddev_outliers": 2,
+ "rounds": 5,
+ "hd15iqr": 1.197934865951538,
+ "ops": 0.892852368428985,
+ "mean": 1.120005989074707,
+ "ld15iqr": 1.0376191139221191
+ },
+ "param": null
+ },
+ {
+ "params": null,
+ "group": null,
+ "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_100_100k",
+ "extra_info": {
+ "cpu_percent": 107.1,
+ "doc": ""
+ },
+ "name": "test_sqlite_blobs_backend_put_100_100k",
+ "options": {
+ "disable_gc": false,
+ "min_rounds": 5,
+ "timer": "time",
+ "min_time": 5e-06,
+ "max_time": 1.0,
+ "warmup": false
+ },
+ "stats": {
+ "max": 0.4825630187988281,
+ "q3": 0.4800873398780823,
+ "q1": 0.32923150062561035,
+ "median": 0.3520970344543457,
+ "total": 1.9610421657562256,
+ "stddev": 0.08220973334366445,
+ "outliers": "2;0",
+ "iqr": 0.15085583925247192,
+ "iqr_outliers": 0,
+ "min": 0.31221699714660645,
+ "iterations": 1,
+ "stddev_outliers": 2,
+ "rounds": 5,
+ "hd15iqr": 0.4825630187988281,
+ "ops": 2.5496647075265098,
+ "mean": 0.3922084331512451,
+ "ld15iqr": 0.31221699714660645
+ },
+ "param": null
+ },
+ {
+ "params": null,
+ "group": null,
+ "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_10_1000k",
+ "extra_info": {
+ "cpu_percent": 89.3,
+ "doc": ""
+ },
+ "name": "test_sqlite_blobs_backend_put_10_1000k",
+ "options": {
+ "disable_gc": false,
+ "min_rounds": 5,
+ "timer": "time",
+ "min_time": 5e-06,
+ "max_time": 1.0,
+ "warmup": false
+ },
+ "stats": {
+ "max": 0.2860860824584961,
+ "q3": 0.28166937828063965,
+ "q1": 0.2644330859184265,
+ "median": 0.27631092071533203,
+ "total": 1.3479201793670654,
+ "stddev": 0.022973488461055777,
+ "outliers": "1;1",
+ "iqr": 0.017236292362213135,
+ "iqr_outliers": 1,
+ "min": 0.22912287712097168,
+ "iterations": 1,
+ "stddev_outliers": 1,
+ "rounds": 5,
+ "hd15iqr": 0.2860860824584961,
+ "ops": 3.709418463004107,
+ "mean": 0.26958403587341306,
+ "ld15iqr": 0.2762031555175781
+ },
+ "param": null
+ }
+ ],
+ "datetime": "2017-09-13T14:36:01.333043"
+} \ No newline at end of file
diff --git a/scripts/benchmark/sqlite-blobs-backend/gen-graph.py b/scripts/benchmark/sqlite-blobs-backend/gen-graph.py
new file mode 100755
index 00000000..2bb948f1
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/gen-graph.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+# Plot bars comparing different implementations of mail pipeline.
+#
+# This script can be improved to account for arbitrary number of data sets, but
+# it is not doing it right now.
+
+import json
+import matplotlib.pyplot as plt
+import numpy as np
+import re
+
+# make a prettier graph
+from mpltools import style
+style.use('ggplot')
+
+OUTPUT_FILENAME = 'blobs-sqlite-backend.png'
+
+graphs = [
+ '1_10000k',
+ '10_1000k',
+ '100_100k',
+ '1000_10k',
+]
+
+
+# the JSON structure returned by the following function is ugly, but the
+# original JSONs are even uglier, so this is here just to make the life of the
+# script easier.
+#
+# We want to have something like:
+#
+# data[get/put][amount_size] = <stats>
+
+def get_data():
+ data = {}
+ for fname in ['get', 'put']:
+ data[fname] = {}
+ with open('data/%s.json' % fname) as f:
+ d = json.loads(f.read())
+ benchmarks = d['benchmarks']
+ for item in benchmarks:
+ name = re.sub('^[^1]+', '', item['name'])
+ data[fname][name] = item['stats']
+ return data
+
+
+def plot_data(data):
+
+ N = 4
+
+ get_means = tuple([data['get'][graph]['mean'] for graph in graphs])
+ put_means = tuple([data['put'][graph]['mean'] for graph in graphs])
+
+ ind = np.arange(N) # the x locations for the groups
+ width = 0.40 # the width of the bars
+
+ fig, ax = plt.subplots()
+ rects1 = ax.bar(ind, get_means, width)
+ rects2 = ax.bar(ind + width, put_means, width)
+
+ # add some text for labels, title and axes ticks
+ ax.set_ylabel('Time for operation (s)')
+ ax.set_xlabel('Amount and size of blobs')
+ ax.set_title('Blobs storage and retrieval time')
+ ax.set_xticks(ind + (0.5 * width))
+ ax.set_xticklabels(
+ tuple(map(lambda name: name.replace('_', ' x '), graphs)))
+
+ ax.legend(
+ (rects1[0], rects2[0]),
+ ('retrieval time', 'storage time'))
+ # ax.grid()
+
+ plt.savefig(OUTPUT_FILENAME)
+ # plt.show()
+
+
+if __name__ == '__main__':
+ data = get_data()
+ plot_data(data)
diff --git a/scripts/benchmark/sqlite-blobs-backend/makefile b/scripts/benchmark/sqlite-blobs-backend/makefile
new file mode 100644
index 00000000..f8187e9a
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/makefile
@@ -0,0 +1,14 @@
+VIRTUALENV_NAME = plot
+VIRTUALENV_ROOT = ~/.virtualenvs/$(VIRTUALENV_NAME)
+
+all: graph
+
+env:
+ if [ ! -d $(VIRTUALENV_ROOT) ]; then \
+ mkdir -p $$(dirname $(VIRTUALENV_ROOT)); \
+ virtualenv $(VIRTUALENV_ROOT); \
+ $(VIRTUALENV_ROOT)/bin/pip install requirements.pip; \
+ fi
+
+graph: env
+ $(VIRTUALENV_ROOT)/bin/python ./gen-graph.py
diff --git a/scripts/benchmark/sqlite-blobs-backend/requirements.pip b/scripts/benchmark/sqlite-blobs-backend/requirements.pip
new file mode 100644
index 00000000..db5d81e0
--- /dev/null
+++ b/scripts/benchmark/sqlite-blobs-backend/requirements.pip
@@ -0,0 +1,2 @@
+matplotlib
+numpy