[test] move legacy-vs-blobs graphing to benchmark scripts dir

author: drebs <drebs@riseup.net> 2017-09-13 11:39:36 -0300
committer: drebs <drebs@riseup.net> 2017-09-14 12:14:44 -0300
commit: dd7b2e414adf2c84873debcd7b7d526859036bc2 (patch)
tree: 57f44bd454e9f4612a1c4fc808fbb944d0716c18 /scripts/legacy-vs-blobs/legacy-vs-blobs.py
parent: affc09e18a243b215dd1430f53c74a46eab8c098 (diff)
1 files changed, 0 insertions, 126 deletions
diff --git a/scripts/legacy-vs-blobs/legacy-vs-blobs.py b/scripts/legacy-vs-blobs/legacy-vs-blobs.py
deleted file mode 100755
index 57a42376..00000000
--- a/scripts/legacy-vs-blobs/legacy-vs-blobs.py
+++ /dev/null
@@ -1,126 +0,0 @@
-#!/usr/bin/env python
-
-# Plot bars comparing different implementations of mail pipeline.
-#
-# This script can be improved to account for arbitrary number of data sets, but
-# it is not doing it right now.
-
-import json
-import numpy as np
-import matplotlib.pyplot as plt
-
-# make a prettier graph
-from mpltools import style
-style.use('ggplot')
-
-OUTPUT_FILENAME = 'legacy-vs-blobs.png'
-
-# each value below will generate one bar for each for each (amount, size) pair.
-# The script expects to find files in ./data/SET/ for each set of
-# implementations.
-#
-# The baseline values will be the legacy results in ./data/no-cache/.
-
-graphs = [
-    '1000_10k',
-    '100_100k',
-    '10_1000k',
-    '1_10000k',
-]
-
-
-# the JSON structure returned by the following function is ugly, but the
-# original JSONs are even uglier, so this is here just to make the life of the
-# script easier.
-#
-# We want to have something like:
-#
-#   data[variation][graph][implementation] = <stats>
-#
-# Where:
-#
-#   - variation is one data set under ./data (i.e. no-cache, cache, persistent,
-#     etc).
-#   - graph is one of the values in graphs variable above.
-#   - implementation is either legacy or blobs (we just need legacy for the
-#     no-cache variation, as that is the one we are using as baseline.
-
-def get_data():
-    folders = ['cache', 'no-cache', 'persistent']
-    data = {}
-    for folder in folders:
-        data[folder] = {}
-        for graph in graphs:
-            with open('data/%s/%s.json' % (folder, graph)) as f:
-                d = json.loads(f.read())
-                benchmarks = d['benchmarks']
-            data[folder][graph] = {}
-            for t in ['blobs', 'legacy']:
-                result = filter(lambda b: t in b['name'], benchmarks)
-                if result:
-                    result = result.pop()
-                    data[folder][graph][t] = result['stats']
-    return data
-
-
-def plot_data(data):
-
-    N = 4
-
-    # this is our baseline (i.e. legacy / legacy)
-    absolutes = (1, 1, 1, 1)
-
-    ind = np.arange(N)  # the x locations for the groups
-    width = 0.20        # the width of the bars
-
-    fig, ax = plt.subplots()
-    rects1 = ax.bar(ind, absolutes, width)
-
-    # for each graph, calculate the ratios
-    ratios = {'no-cache': [], 'cache': [], 'persistent': []}
-    for graph in graphs:
-        legacy = data['no-cache'][graph]['legacy']['mean']
-
-        # calculate ratios for no-cache / legacy
-        ratio = data['no-cache'][graph]['blobs']['mean'] / legacy
-        ratios['no-cache'].append(ratio)
-
-        # calculate ratios for cache / legacy
-        ratio = data['cache'][graph]['blobs']['mean'] / legacy
-        ratios['cache'].append(ratio)
-
-        # calculate ratios for persistent / legacy
-        ratio = data['persistent'][graph]['blobs']['mean'] / legacy
-        ratios['persistent'].append(ratio)
-
-    # create the boxes with the ratios
-    nocache = tuple(ratios['no-cache'])
-    rects2 = ax.bar(ind + width, nocache, width)
-
-    cache = tuple(ratios['cache'])
-    rects3 = ax.bar(ind + (2 * width), cache, width)
-
-    persistent = tuple(ratios['persistent'])
-    rects4 = ax.bar(ind + (3 * width), persistent, width)
-
-    # add some text for labels, title and axes ticks
-    ax.set_ylabel('Ratio of time (legacy is baseline)')
-    ax.set_xlabel('Amount and size of email messages')
-    ax.set_title('Inbox loading time: legacy vs blobs mail pipeline')
-    ax.set_xticks(ind + (1.5 * width))
-    ax.set_xticklabels(
-        tuple(map(lambda name: name.replace('_', ' x '), graphs)))
-
-    ax.legend(
-        (rects1[0], rects2[0], rects3[0], rects4[0]),
-        ('legacy', 'blobs', 'blobs + session cache',
-         'blobs + session cache + persistent http'))
-    # ax.grid()
-
-    plt.savefig(OUTPUT_FILENAME)
-    # plt.show()
-
-
-if __name__ == '__main__':
-    data = get_data()
-    plot_data(data)
author	drebs <drebs@riseup.net>	2017-09-13 11:39:36 -0300
committer	drebs <drebs@riseup.net>	2017-09-14 12:14:44 -0300
commit	dd7b2e414adf2c84873debcd7b7d526859036bc2 (patch)
tree	57f44bd454e9f4612a1c4fc808fbb944d0716c18 /scripts/legacy-vs-blobs/legacy-vs-blobs.py
parent	affc09e18a243b215dd1430f53c74a46eab8c098 (diff)