diff options
author | drebs <drebs@leap.se> | 2017-07-13 15:10:10 -0300 |
---|---|---|
committer | drebs <drebs@leap.se> | 2017-07-13 15:10:10 -0300 |
commit | d1abf301c75548938009a650ec953920ac9bdd1e (patch) | |
tree | a8e6d07a727dd764bd4517803d2f3e3dcecac3bd /docs/misc/benchmarks-python-graphs.rst | |
parent | 256182a91414b8a57e9614f65409fc06d62b39d1 (diff) |
[docs] add code example for plotting elasticsearch with python
Diffstat (limited to 'docs/misc/benchmarks-python-graphs.rst')
-rw-r--r-- | docs/misc/benchmarks-python-graphs.rst | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/docs/misc/benchmarks-python-graphs.rst b/docs/misc/benchmarks-python-graphs.rst new file mode 100644 index 00000000..4d1db339 --- /dev/null +++ b/docs/misc/benchmarks-python-graphs.rst @@ -0,0 +1,148 @@ +Benchmarks python graphs +======================== + +This page documents an example of how to query elasticsearch and plot the +results with python. We are currently using ``kibana`` to plot and show graphs, +but in the future we might want/need the flexibility of python for that. + +Some notes about the code example: + +* Depends on ``elasticsearch`` for querying and ``matplotlib`` for plotting. +* Searches need to be scrolled to get all results using a ``scroll_id``. +* Commit datetime ranges (`1 <https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-range-query.html>`_, `2 <https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math>`_, `3 <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-daterange-aggregation.html#date-format-pattern>`_) can be put in place to limit results. + + +Code example +------------ + +.. code:: python + + #!/usr/bin/env python + + import os + + from math import sqrt + from dateutil.parser import parse + + from elasticsearch import Elasticsearch + from elasticsearch.exceptions import NotFoundError + + import matplotlib.pyplot as plt + + + def sort_commits(a, b): + ta = parse(a['_source']['commit_info']['author_time']) + tb = parse(b['_source']['commit_info']['author_time']) + return -1 if ta < tb else 1 + + + def _query_elasticsearch(): + http_auth = os.environ.get('ES_CREDS').split(':') + + es = Elasticsearch([{ + 'host': 'moose.leap.se', + 'port': 9200, + 'use_ssl': True, + 'http_auth': http_auth, + }]) + + q = "commit_info.project:soledad " \ + "AND machine_info.host='weasel' " \ + "AND name='test_async_create_1000_10k' " \ + "AND commit_info.time:[\"now-1d\" TO \"now\"]" + + res = es.search(index='benchmark', q=q, scroll='1m', size=50) + + total = res['hits']['total'] + scroll_size = total + scroll_id = res['_scroll_id'] + hits = res['hits']['hits'][:] + + print("There are %d hits to get." % total) + print("(got %d...)" % len(hits)) + + # scroll to get all results + print("(started scrolling...)") + while scroll_size > 0: + try: + res = es.scroll(scroll_id=scroll_id, scroll='1m') + scroll_size = len(res['hits']['hits']) + scroll_id = res['_scroll_id'] + print("(got %d more...)" % scroll_size) + hits += res['hits']['hits'][:] + except NotFoundError: + print("(finished scrolling.)") + pass + break + + print("Found %d hits." % len(hits)) + hits.sort(sort_commits) + + stats = [] + means = [] + + for hit in hits: + st = hit['_source']['stats'] + commit_id = hit['_source']['commit_info']['id'][:7] + + item = {} + + item["label"] = commit_id + item["mean"] = st['mean'] + item["med"] = st['median'] + item["q1"] = st['q1'] + item["q3"] = st['q3'] + # item["cilo"] = 5.3 # not required + # item["cihi"] = 5.7 # not required + item["whislo"] = st['mean'] - st['stddev'] + item["whishi"] = st['mean'] + st['stddev'] + item["fliers"] = [] # required if showfliers=True + + stats.append(item) + means.append(st['mean']) + + # print(hit['_source']['commit_info']) + + return stats, means + + + def mean(lst): + return sum(lst) / len(lst) + + + def stddev(lst): + mn = mean(lst) + variance = sum([(e - mn)**2 for e in lst]) / len(lst) + return sqrt(variance) + + + def _plot_graph(results): + print("Plotting graph...") + stats, means = results + fig, axes = plt.subplots(1, 1) + plt.grid() + axes.bxp(stats) + + mmean = mean(means) + mstddev = stddev(means) + plt.axhline(y=mmean + (1.5 * mstddev), color='b', linestyle='--') + plt.axhline(y=mmean - (1.5 * mstddev), color='b', linestyle='--') + plt.axhline(y=mmean, color='b', linestyle='-') + + axes.set_title('Time taken for test_async_create_1000_10k') + + plt.xticks(rotation=45, ha='right', size='small') + plt.ylim(ymin=0) + plt.tight_layout() + + # boxplot + filename = '/tmp/test.png' + print("Saving to %s" % filename) + plt.savefig(filename) + # plt.figure() + # plt.show() + + + if __name__ == '__main__': + results = _query_elasticsearch() + _plot_graph(results) |