docs/misc/benchmarks-python-graphs.rst


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154

Benchmarks python graphs
========================

This page documents an example of how to query elasticsearch and plot the
results with python. We are currently using ``kibana`` to plot and show graphs,
but in the future we might want/need the flexibility of python for that.

Generated image
---------------

.. image:: benchmarks-python-graphs.png
   :alt: Example of image generated with the code above.

Code example
------------

Some notes about the code example:

* Depends on ``elasticsearch`` for querying and ``matplotlib`` for plotting.
* Searches need to be scrolled to get all results using a ``scroll_id``.
* Commit datetime ranges (`1 <https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-range-query.html>`_, `2 <https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math>`_, `3 <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-daterange-aggregation.html#date-format-pattern>`_) can be put in place to limit results.


.. code:: python

    #!/usr/bin/env python

    import os

    from math import sqrt
    from dateutil.parser import parse

    from elasticsearch import Elasticsearch
    from elasticsearch.exceptions import NotFoundError

    import matplotlib.pyplot as plt


    def sort_commits(a, b):
        ta = parse(a['_source']['commit_info']['author_time'])
        tb = parse(b['_source']['commit_info']['author_time'])
        return -1 if ta < tb else 1


    def _query_elasticsearch():
        http_auth = os.environ.get('ES_CREDS').split(':')

        es = Elasticsearch([{
            'host': 'moose.leap.se',
            'port': 9200,
            'use_ssl': True,
            'http_auth': http_auth,
        }])

        q = "commit_info.project:soledad " \
            "AND machine_info.host='weasel' " \
            "AND name='test_async_create_1000_10k' " \
            "AND commit_info.time:[\"now-1d\" TO \"now\"]"

        res = es.search(index='benchmark', q=q, scroll='1m', size=50)

        total = res['hits']['total']
        scroll_size = total
        scroll_id = res['_scroll_id']
        hits = res['hits']['hits'][:]

        print("There are %d hits to get." % total)
        print("(got %d...)" % len(hits))

        # scroll to get all results
        print("(started scrolling...)")
        while scroll_size > 0:
            try:
                res = es.scroll(scroll_id=scroll_id, scroll='1m')
                scroll_size = len(res['hits']['hits'])
                scroll_id = res['_scroll_id']
                print("(got %d more...)" % scroll_size)
                hits += res['hits']['hits'][:]
            except NotFoundError:
                print("(finished scrolling.)")
                pass
                break

        print("Found %d hits." % len(hits))
        hits.sort(sort_commits)

        stats = []
        means = []

        for hit in hits:
            st = hit['_source']['stats']
            commit_id = hit['_source']['commit_info']['id'][:7]

            item = {}

            item["label"] = commit_id
            item["mean"] = st['mean']
            item["med"] = st['median']
            item["q1"] = st['q1']
            item["q3"] = st['q3']
            # item["cilo"] = 5.3 # not required
            # item["cihi"] = 5.7 # not required
            item["whislo"] = st['mean'] - st['stddev']
            item["whishi"] = st['mean'] + st['stddev']
            item["fliers"] = []  # required if showfliers=True

            stats.append(item)
            means.append(st['mean'])

            # print(hit['_source']['commit_info'])

        return stats, means


    def mean(lst):
        return sum(lst) / len(lst)


    def stddev(lst):
        mn = mean(lst)
        variance = sum([(e - mn)**2 for e in lst]) / len(lst)
        return sqrt(variance)


    def _plot_graph(results):
        print("Plotting graph...")
        stats, means = results
        fig, axes = plt.subplots(1, 1)
        plt.grid()
        axes.bxp(stats)

        mmean = mean(means)
        mstddev = stddev(means)
        plt.axhline(y=mmean + (1.5 * mstddev), color='b', linestyle='--')
        plt.axhline(y=mmean - (1.5 * mstddev), color='b', linestyle='--')
        plt.axhline(y=mmean, color='b', linestyle='-')

        axes.set_title('Time taken for test_async_create_1000_10k')

        plt.xticks(rotation=45, ha='right', size='small')
        plt.ylim(ymin=0)
        plt.tight_layout()

        # boxplot
        filename = '/tmp/test.png'
        print("Saving to %s" % filename)
        plt.savefig(filename)
        # plt.figure()
        # plt.show()


    if __name__ == '__main__':
        results = _query_elasticsearch()
        _plot_graph(results)