summaryrefslogtreecommitdiff
path: root/scripts/legacy-vs-blobs/legacy-vs-blobs.py
diff options
context:
space:
mode:
authordrebs <drebs@riseup.net>2017-09-13 11:39:36 -0300
committerdrebs <drebs@riseup.net>2017-09-14 12:14:44 -0300
commitdd7b2e414adf2c84873debcd7b7d526859036bc2 (patch)
tree57f44bd454e9f4612a1c4fc808fbb944d0716c18 /scripts/legacy-vs-blobs/legacy-vs-blobs.py
parentaffc09e18a243b215dd1430f53c74a46eab8c098 (diff)
[test] move legacy-vs-blobs graphing to benchmark scripts dir
Diffstat (limited to 'scripts/legacy-vs-blobs/legacy-vs-blobs.py')
-rwxr-xr-xscripts/legacy-vs-blobs/legacy-vs-blobs.py126
1 files changed, 0 insertions, 126 deletions
diff --git a/scripts/legacy-vs-blobs/legacy-vs-blobs.py b/scripts/legacy-vs-blobs/legacy-vs-blobs.py
deleted file mode 100755
index 57a42376..00000000
--- a/scripts/legacy-vs-blobs/legacy-vs-blobs.py
+++ /dev/null
@@ -1,126 +0,0 @@
-#!/usr/bin/env python
-
-# Plot bars comparing different implementations of mail pipeline.
-#
-# This script can be improved to account for arbitrary number of data sets, but
-# it is not doing it right now.
-
-import json
-import numpy as np
-import matplotlib.pyplot as plt
-
-# make a prettier graph
-from mpltools import style
-style.use('ggplot')
-
-OUTPUT_FILENAME = 'legacy-vs-blobs.png'
-
-# each value below will generate one bar for each for each (amount, size) pair.
-# The script expects to find files in ./data/SET/ for each set of
-# implementations.
-#
-# The baseline values will be the legacy results in ./data/no-cache/.
-
-graphs = [
- '1000_10k',
- '100_100k',
- '10_1000k',
- '1_10000k',
-]
-
-
-# the JSON structure returned by the following function is ugly, but the
-# original JSONs are even uglier, so this is here just to make the life of the
-# script easier.
-#
-# We want to have something like:
-#
-# data[variation][graph][implementation] = <stats>
-#
-# Where:
-#
-# - variation is one data set under ./data (i.e. no-cache, cache, persistent,
-# etc).
-# - graph is one of the values in graphs variable above.
-# - implementation is either legacy or blobs (we just need legacy for the
-# no-cache variation, as that is the one we are using as baseline.
-
-def get_data():
- folders = ['cache', 'no-cache', 'persistent']
- data = {}
- for folder in folders:
- data[folder] = {}
- for graph in graphs:
- with open('data/%s/%s.json' % (folder, graph)) as f:
- d = json.loads(f.read())
- benchmarks = d['benchmarks']
- data[folder][graph] = {}
- for t in ['blobs', 'legacy']:
- result = filter(lambda b: t in b['name'], benchmarks)
- if result:
- result = result.pop()
- data[folder][graph][t] = result['stats']
- return data
-
-
-def plot_data(data):
-
- N = 4
-
- # this is our baseline (i.e. legacy / legacy)
- absolutes = (1, 1, 1, 1)
-
- ind = np.arange(N) # the x locations for the groups
- width = 0.20 # the width of the bars
-
- fig, ax = plt.subplots()
- rects1 = ax.bar(ind, absolutes, width)
-
- # for each graph, calculate the ratios
- ratios = {'no-cache': [], 'cache': [], 'persistent': []}
- for graph in graphs:
- legacy = data['no-cache'][graph]['legacy']['mean']
-
- # calculate ratios for no-cache / legacy
- ratio = data['no-cache'][graph]['blobs']['mean'] / legacy
- ratios['no-cache'].append(ratio)
-
- # calculate ratios for cache / legacy
- ratio = data['cache'][graph]['blobs']['mean'] / legacy
- ratios['cache'].append(ratio)
-
- # calculate ratios for persistent / legacy
- ratio = data['persistent'][graph]['blobs']['mean'] / legacy
- ratios['persistent'].append(ratio)
-
- # create the boxes with the ratios
- nocache = tuple(ratios['no-cache'])
- rects2 = ax.bar(ind + width, nocache, width)
-
- cache = tuple(ratios['cache'])
- rects3 = ax.bar(ind + (2 * width), cache, width)
-
- persistent = tuple(ratios['persistent'])
- rects4 = ax.bar(ind + (3 * width), persistent, width)
-
- # add some text for labels, title and axes ticks
- ax.set_ylabel('Ratio of time (legacy is baseline)')
- ax.set_xlabel('Amount and size of email messages')
- ax.set_title('Inbox loading time: legacy vs blobs mail pipeline')
- ax.set_xticks(ind + (1.5 * width))
- ax.set_xticklabels(
- tuple(map(lambda name: name.replace('_', ' x '), graphs)))
-
- ax.legend(
- (rects1[0], rects2[0], rects3[0], rects4[0]),
- ('legacy', 'blobs', 'blobs + session cache',
- 'blobs + session cache + persistent http'))
- # ax.grid()
-
- plt.savefig(OUTPUT_FILENAME)
- # plt.show()
-
-
-if __name__ == '__main__':
- data = get_data()
- plot_data(data)