summaryrefslogtreecommitdiff
path: root/scripts/benchmark/sqlite-blobs-backend/gen-graph.py
blob: 2bb948f11fb5f7548753824cb599c3cfff467479 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python

# Plot bars comparing different implementations of mail pipeline.
#
# This script can be improved to account for arbitrary number of data sets, but
# it is not doing it right now.

import json
import matplotlib.pyplot as plt
import numpy as np
import re

# make a prettier graph
from mpltools import style
style.use('ggplot')

OUTPUT_FILENAME = 'blobs-sqlite-backend.png'

graphs = [
    '1_10000k',
    '10_1000k',
    '100_100k',
    '1000_10k',
]


# the JSON structure returned by the following function is ugly, but the
# original JSONs are even uglier, so this is here just to make the life of the
# script easier.
#
# We want to have something like:
#
#   data[get/put][amount_size] = <stats>

def get_data():
    data = {}
    for fname in ['get', 'put']:
        data[fname] = {}
        with open('data/%s.json' % fname) as f:
            d = json.loads(f.read())
            benchmarks = d['benchmarks']
            for item in benchmarks:
                name = re.sub('^[^1]+', '', item['name'])
                data[fname][name] = item['stats']
    return data


def plot_data(data):

    N = 4

    get_means = tuple([data['get'][graph]['mean'] for graph in graphs])
    put_means = tuple([data['put'][graph]['mean'] for graph in graphs])

    ind = np.arange(N)  # the x locations for the groups
    width = 0.40        # the width of the bars

    fig, ax = plt.subplots()
    rects1 = ax.bar(ind, get_means, width)
    rects2 = ax.bar(ind + width, put_means, width)

    # add some text for labels, title and axes ticks
    ax.set_ylabel('Time for operation (s)')
    ax.set_xlabel('Amount and size of blobs')
    ax.set_title('Blobs storage and retrieval time')
    ax.set_xticks(ind + (0.5 * width))
    ax.set_xticklabels(
        tuple(map(lambda name: name.replace('_', ' x '), graphs)))

    ax.legend(
        (rects1[0], rects2[0]),
        ('retrieval time', 'storage time'))
    # ax.grid()

    plt.savefig(OUTPUT_FILENAME)
    # plt.show()


if __name__ == '__main__':
    data = get_data()
    plot_data(data)