diff options
Diffstat (limited to 'scripts/benchmark')
| -rw-r--r-- | scripts/benchmark/legacy-vs-blobs/README.rst | 4 | ||||
| -rw-r--r-- | scripts/benchmark/sqlite-blobs-backend/README.rst | 35 | ||||
| -rw-r--r-- | scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png | bin | 0 -> 26563 bytes | |||
| -rw-r--r-- | scripts/benchmark/sqlite-blobs-backend/data/get.json | 187 | ||||
| -rw-r--r-- | scripts/benchmark/sqlite-blobs-backend/data/put.json | 187 | ||||
| -rwxr-xr-x | scripts/benchmark/sqlite-blobs-backend/gen-graph.py | 81 | ||||
| -rw-r--r-- | scripts/benchmark/sqlite-blobs-backend/makefile | 14 | ||||
| -rw-r--r-- | scripts/benchmark/sqlite-blobs-backend/requirements.pip | 2 | 
8 files changed, 508 insertions, 2 deletions
| diff --git a/scripts/benchmark/legacy-vs-blobs/README.rst b/scripts/benchmark/legacy-vs-blobs/README.rst index 3eedb3a4..ca502b8a 100644 --- a/scripts/benchmark/legacy-vs-blobs/README.rst +++ b/scripts/benchmark/legacy-vs-blobs/README.rst @@ -1,5 +1,5 @@ -Ploggin Legacy vs Blobs results -=============================== +Plotting Legacy vs Blobs results +================================  This script produces a bar graph comparing different implementations of the  mail pipeline (legacy, blobs, blobs with session cache, blobs with session diff --git a/scripts/benchmark/sqlite-blobs-backend/README.rst b/scripts/benchmark/sqlite-blobs-backend/README.rst new file mode 100644 index 00000000..2d7a257f --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/README.rst @@ -0,0 +1,35 @@ +Plotting SQLite Blobs Backend storage/retrieval speed +===================================================== + +This script produces a bar graph comparing storage and retrieval times for +different amount/sizes of incoming data (1 x 10M, 10 x 1M, 100 x 100K, 1000 +x 10K). + +Data acquisition +---------------- + +Data was gathered through different runs of the code in +`soledad/tests/benchmarks/test_sqlite_blobs_backend.py`. As these tests are run +twice by default, you should remove the second call to pytest in `tox.ini` that +would cause a second run of the tests for measuring memory. This script only +plots time, for now. + +Once you done the above, chdir into `soledad/tests` and do something like this: + +  tox -e benchmarks -- -m sqlite_blobs_backend_get --benchmark-autosave +  tox -e benchmarks -- -m sqlite_blobs_backend_put --benchmark-autosave + +Each run of tox as above produces a JSON file in `soledad/tests/.benchmarks` +with stats. Copy the first file into `data/get.json` and the second file into +`data/put.json` (where `data/` is a subdir in the same directory of this readme +file). + +Plotting +-------- + +Once you have all your data in subdirectories of the `data/` directory and the +script is correctly tuned for your data, running `make` should be enough to +create a virtualenvironment with dependencies and plotting data. + +By default, the script will generate a file called `./sqlite-blobs-backend.png` +in the current directory with the plot of the data. diff --git a/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png b/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.pngBinary files differ new file mode 100644 index 00000000..c989ad3e --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/blobs-sqlite-backend.png diff --git a/scripts/benchmark/sqlite-blobs-backend/data/get.json b/scripts/benchmark/sqlite-blobs-backend/data/get.json new file mode 100644 index 00000000..8c2f5ed0 --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/data/get.json @@ -0,0 +1,187 @@ +{ +    "commit_info": { +        "project": "soledad",  +        "id": "81200dec8ac063a1ad77a5991c2bc7223e4445af",  +        "time": "2017-09-13T11:11:15-03:00",  +        "author_time": "2017-09-11T14:38:36-03:00",  +        "dirty": true,  +        "branch": "8942" +    },  +    "version": "3.1.1",  +    "benchmarks": [ +        { +            "extra_info": { +                "doc": "",  +                "cpu_percent": 152.0 +            },  +            "params": null,  +            "name": "test_sqlite_blobs_backend_get_100_100k",  +            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_100_100k",  +            "group": null,  +            "options": { +                "max_time": 1.0,  +                "min_time": 5e-06,  +                "min_rounds": 5,  +                "warmup": false,  +                "disable_gc": false,  +                "timer": "time" +            },  +            "stats": { +                "q1": 0.03407096862792969,  +                "iterations": 1,  +                "q3": 0.03496694564819336,  +                "stddev_outliers": 3,  +                "ld15iqr": 0.03357100486755371,  +                "total": 0.6347811222076416,  +                "iqr_outliers": 3,  +                "min": 0.03357100486755371,  +                "median": 0.03432202339172363,  +                "rounds": 18,  +                "hd15iqr": 0.03844714164733887,  +                "iqr": 0.0008959770202636719,  +                "outliers": "3;3",  +                "mean": 0.03526561790042453,  +                "max": 0.041728973388671875,  +                "stddev": 0.002284116271186255,  +                "ops": 28.3562307861324 +            },  +            "param": null +        },  +        { +            "extra_info": { +                "doc": "",  +                "cpu_percent": 79.6 +            },  +            "params": null,  +            "name": "test_sqlite_blobs_backend_get_1_10000k",  +            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_1_10000k",  +            "group": null,  +            "options": { +                "max_time": 1.0,  +                "min_time": 5e-06,  +                "min_rounds": 5,  +                "warmup": false,  +                "disable_gc": false,  +                "timer": "time" +            },  +            "stats": { +                "q1": 0.04414188861846924,  +                "iterations": 1,  +                "q3": 0.04668295383453369,  +                "stddev_outliers": 3,  +                "ld15iqr": 0.04377603530883789,  +                "total": 0.36537861824035645,  +                "iqr_outliers": 0,  +                "min": 0.04377603530883789,  +                "median": 0.0463564395904541,  +                "rounds": 8,  +                "hd15iqr": 0.04724001884460449,  +                "iqr": 0.002541065216064453,  +                "outliers": "3;0",  +                "mean": 0.045672327280044556,  +                "max": 0.04724001884460449,  +                "stddev": 0.001407690906727707,  +                "ops": 21.89509621150675 +            },  +            "param": null +        },  +        { +            "extra_info": { +                "doc": "",  +                "cpu_percent": 151.1 +            },  +            "params": null,  +            "name": "test_sqlite_blobs_backend_get_1000_10k",  +            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_1000_10k",  +            "group": null,  +            "options": { +                "max_time": 1.0,  +                "min_time": 5e-06,  +                "min_rounds": 5,  +                "warmup": false,  +                "disable_gc": false,  +                "timer": "time" +            },  +            "stats": { +                "q1": 0.1577029824256897,  +                "iterations": 1,  +                "q3": 0.2019520401954651,  +                "stddev_outliers": 2,  +                "ld15iqr": 0.15537214279174805,  +                "total": 0.8811802864074707,  +                "iqr_outliers": 0,  +                "min": 0.15537214279174805,  +                "median": 0.16169404983520508,  +                "rounds": 5,  +                "hd15iqr": 0.20454716682434082,  +                "iqr": 0.04424905776977539,  +                "outliers": "2;0",  +                "mean": 0.17623605728149414,  +                "max": 0.20454716682434082,  +                "stddev": 0.0243984691038373,  +                "ops": 5.674207738333273 +            },  +            "param": null +        },  +        { +            "extra_info": { +                "doc": "",  +                "cpu_percent": 167.9 +            },  +            "params": null,  +            "name": "test_sqlite_blobs_backend_get_10_1000k",  +            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_get_10_1000k",  +            "group": null,  +            "options": { +                "max_time": 1.0,  +                "min_time": 5e-06,  +                "min_rounds": 5,  +                "warmup": false,  +                "disable_gc": false,  +                "timer": "time" +            },  +            "stats": { +                "q1": 0.0267794132232666,  +                "iterations": 1,  +                "q3": 0.027324676513671875,  +                "stddev_outliers": 2,  +                "ld15iqr": 0.026620864868164062,  +                "total": 0.434096097946167,  +                "iqr_outliers": 1,  +                "min": 0.026620864868164062,  +                "median": 0.02689993381500244,  +                "rounds": 16,  +                "hd15iqr": 0.028914213180541992,  +                "iqr": 0.0005452632904052734,  +                "outliers": "2;1",  +                "mean": 0.027131006121635437,  +                "max": 0.028914213180541992,  +                "stddev": 0.0005876147817628755,  +                "ops": 36.85819816326519 +            },  +            "param": null +        } +    ],  +    "machine_info": { +        "processor": "",  +        "machine": "x86_64",  +        "python_implementation_version": "2.7.13",  +        "python_compiler": "GCC 6.3.0 20170118",  +        "python_implementation": "CPython",  +        "python_build": [ +            "default",  +            "Jan 19 2017 14:48:08" +        ],  +        "python_version": "2.7.13",  +        "cpu": { +            "hardware": "unknown",  +            "brand": "Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz",  +            "vendor_id": "GenuineIntel" +        },  +        "release": "4.9.0-3-amd64",  +        "system": "Linux",  +        "node": "pajeh",  +        "host": "pajeh" +    },  +    "datetime": "2017-09-13T14:35:32.735207" +}
\ No newline at end of file diff --git a/scripts/benchmark/sqlite-blobs-backend/data/put.json b/scripts/benchmark/sqlite-blobs-backend/data/put.json new file mode 100644 index 00000000..f56681fc --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/data/put.json @@ -0,0 +1,187 @@ +{ +    "machine_info": { +        "python_version": "2.7.13",  +        "machine": "x86_64",  +        "python_build": [ +            "default",  +            "Jan 19 2017 14:48:08" +        ],  +        "node": "pajeh",  +        "processor": "",  +        "python_implementation": "CPython",  +        "system": "Linux",  +        "cpu": { +            "hardware": "unknown",  +            "brand": "Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz",  +            "vendor_id": "GenuineIntel" +        },  +        "host": "pajeh",  +        "release": "4.9.0-3-amd64",  +        "python_compiler": "GCC 6.3.0 20170118",  +        "python_implementation_version": "2.7.13" +    },  +    "commit_info": { +        "author_time": "2017-09-11T14:38:36-03:00",  +        "time": "2017-09-13T11:11:15-03:00",  +        "project": "soledad",  +        "dirty": true,  +        "id": "81200dec8ac063a1ad77a5991c2bc7223e4445af",  +        "branch": "8942" +    },  +    "version": "3.1.1",  +    "benchmarks": [ +        { +            "params": null,  +            "group": null,  +            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_1_10000k",  +            "extra_info": { +                "cpu_percent": 78.4,  +                "doc": "" +            },  +            "name": "test_sqlite_blobs_backend_put_1_10000k",  +            "options": { +                "disable_gc": false,  +                "min_rounds": 5,  +                "timer": "time",  +                "min_time": 5e-06,  +                "max_time": 1.0,  +                "warmup": false +            },  +            "stats": { +                "max": 0.40472412109375,  +                "q3": 0.4024088382720947,  +                "q1": 0.3844095468521118,  +                "median": 0.39255285263061523,  +                "total": 1.9671721458435059,  +                "stddev": 0.009608947098416514,  +                "outliers": "2;0",  +                "iqr": 0.01799929141998291,  +                "iqr_outliers": 0,  +                "min": 0.38356804847717285,  +                "iterations": 1,  +                "stddev_outliers": 2,  +                "rounds": 5,  +                "hd15iqr": 0.40472412109375,  +                "ops": 2.5417196001705507,  +                "mean": 0.3934344291687012,  +                "ld15iqr": 0.38356804847717285 +            },  +            "param": null +        },  +        { +            "params": null,  +            "group": null,  +            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_1000_10k",  +            "extra_info": { +                "cpu_percent": 113.7,  +                "doc": "" +            },  +            "name": "test_sqlite_blobs_backend_put_1000_10k",  +            "options": { +                "disable_gc": false,  +                "min_rounds": 5,  +                "timer": "time",  +                "min_time": 5e-06,  +                "max_time": 1.0,  +                "warmup": false +            },  +            "stats": { +                "max": 1.197934865951538,  +                "q3": 1.1804951429367065,  +                "q1": 1.051497757434845,  +                "median": 1.1336700916290283,  +                "total": 5.600029945373535,  +                "stddev": 0.07091823162518827,  +                "outliers": "2;0",  +                "iqr": 0.12899738550186157,  +                "iqr_outliers": 0,  +                "min": 1.0376191139221191,  +                "iterations": 1,  +                "stddev_outliers": 2,  +                "rounds": 5,  +                "hd15iqr": 1.197934865951538,  +                "ops": 0.892852368428985,  +                "mean": 1.120005989074707,  +                "ld15iqr": 1.0376191139221191 +            },  +            "param": null +        },  +        { +            "params": null,  +            "group": null,  +            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_100_100k",  +            "extra_info": { +                "cpu_percent": 107.1,  +                "doc": "" +            },  +            "name": "test_sqlite_blobs_backend_put_100_100k",  +            "options": { +                "disable_gc": false,  +                "min_rounds": 5,  +                "timer": "time",  +                "min_time": 5e-06,  +                "max_time": 1.0,  +                "warmup": false +            },  +            "stats": { +                "max": 0.4825630187988281,  +                "q3": 0.4800873398780823,  +                "q1": 0.32923150062561035,  +                "median": 0.3520970344543457,  +                "total": 1.9610421657562256,  +                "stddev": 0.08220973334366445,  +                "outliers": "2;0",  +                "iqr": 0.15085583925247192,  +                "iqr_outliers": 0,  +                "min": 0.31221699714660645,  +                "iterations": 1,  +                "stddev_outliers": 2,  +                "rounds": 5,  +                "hd15iqr": 0.4825630187988281,  +                "ops": 2.5496647075265098,  +                "mean": 0.3922084331512451,  +                "ld15iqr": 0.31221699714660645 +            },  +            "param": null +        },  +        { +            "params": null,  +            "group": null,  +            "fullname": "tests/benchmarks/test_sqlite_blobs_backend.py::test_sqlite_blobs_backend_put_10_1000k",  +            "extra_info": { +                "cpu_percent": 89.3,  +                "doc": "" +            },  +            "name": "test_sqlite_blobs_backend_put_10_1000k",  +            "options": { +                "disable_gc": false,  +                "min_rounds": 5,  +                "timer": "time",  +                "min_time": 5e-06,  +                "max_time": 1.0,  +                "warmup": false +            },  +            "stats": { +                "max": 0.2860860824584961,  +                "q3": 0.28166937828063965,  +                "q1": 0.2644330859184265,  +                "median": 0.27631092071533203,  +                "total": 1.3479201793670654,  +                "stddev": 0.022973488461055777,  +                "outliers": "1;1",  +                "iqr": 0.017236292362213135,  +                "iqr_outliers": 1,  +                "min": 0.22912287712097168,  +                "iterations": 1,  +                "stddev_outliers": 1,  +                "rounds": 5,  +                "hd15iqr": 0.2860860824584961,  +                "ops": 3.709418463004107,  +                "mean": 0.26958403587341306,  +                "ld15iqr": 0.2762031555175781 +            },  +            "param": null +        } +    ],  +    "datetime": "2017-09-13T14:36:01.333043" +}
\ No newline at end of file diff --git a/scripts/benchmark/sqlite-blobs-backend/gen-graph.py b/scripts/benchmark/sqlite-blobs-backend/gen-graph.py new file mode 100755 index 00000000..2bb948f1 --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/gen-graph.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +# Plot bars comparing different implementations of mail pipeline. +# +# This script can be improved to account for arbitrary number of data sets, but +# it is not doing it right now. + +import json +import matplotlib.pyplot as plt +import numpy as np +import re + +# make a prettier graph +from mpltools import style +style.use('ggplot') + +OUTPUT_FILENAME = 'blobs-sqlite-backend.png' + +graphs = [ +    '1_10000k', +    '10_1000k', +    '100_100k', +    '1000_10k', +] + + +# the JSON structure returned by the following function is ugly, but the +# original JSONs are even uglier, so this is here just to make the life of the +# script easier. +# +# We want to have something like: +# +#   data[get/put][amount_size] = <stats> + +def get_data(): +    data = {} +    for fname in ['get', 'put']: +        data[fname] = {} +        with open('data/%s.json' % fname) as f: +            d = json.loads(f.read()) +            benchmarks = d['benchmarks'] +            for item in benchmarks: +                name = re.sub('^[^1]+', '', item['name']) +                data[fname][name] = item['stats'] +    return data + + +def plot_data(data): + +    N = 4 + +    get_means = tuple([data['get'][graph]['mean'] for graph in graphs]) +    put_means = tuple([data['put'][graph]['mean'] for graph in graphs]) + +    ind = np.arange(N)  # the x locations for the groups +    width = 0.40        # the width of the bars + +    fig, ax = plt.subplots() +    rects1 = ax.bar(ind, get_means, width) +    rects2 = ax.bar(ind + width, put_means, width) + +    # add some text for labels, title and axes ticks +    ax.set_ylabel('Time for operation (s)') +    ax.set_xlabel('Amount and size of blobs') +    ax.set_title('Blobs storage and retrieval time') +    ax.set_xticks(ind + (0.5 * width)) +    ax.set_xticklabels( +        tuple(map(lambda name: name.replace('_', ' x '), graphs))) + +    ax.legend( +        (rects1[0], rects2[0]), +        ('retrieval time', 'storage time')) +    # ax.grid() + +    plt.savefig(OUTPUT_FILENAME) +    # plt.show() + + +if __name__ == '__main__': +    data = get_data() +    plot_data(data) diff --git a/scripts/benchmark/sqlite-blobs-backend/makefile b/scripts/benchmark/sqlite-blobs-backend/makefile new file mode 100644 index 00000000..f8187e9a --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/makefile @@ -0,0 +1,14 @@ +VIRTUALENV_NAME = plot +VIRTUALENV_ROOT = ~/.virtualenvs/$(VIRTUALENV_NAME) + +all: graph + +env: +	if [ ! -d $(VIRTUALENV_ROOT) ]; then \ +	  mkdir -p $$(dirname $(VIRTUALENV_ROOT)); \ +	  virtualenv $(VIRTUALENV_ROOT); \ +	  $(VIRTUALENV_ROOT)/bin/pip install requirements.pip; \ +	fi + +graph: env +	$(VIRTUALENV_ROOT)/bin/python ./gen-graph.py diff --git a/scripts/benchmark/sqlite-blobs-backend/requirements.pip b/scripts/benchmark/sqlite-blobs-backend/requirements.pip new file mode 100644 index 00000000..db5d81e0 --- /dev/null +++ b/scripts/benchmark/sqlite-blobs-backend/requirements.pip @@ -0,0 +1,2 @@ +matplotlib +numpy | 
