From dd7b2e414adf2c84873debcd7b7d526859036bc2 Mon Sep 17 00:00:00 2001 From: drebs Date: Wed, 13 Sep 2017 11:39:36 -0300 Subject: [test] move legacy-vs-blobs graphing to benchmark scripts dir --- scripts/legacy-vs-blobs/legacy-vs-blobs.py | 126 ----------------------------- 1 file changed, 126 deletions(-) delete mode 100755 scripts/legacy-vs-blobs/legacy-vs-blobs.py (limited to 'scripts/legacy-vs-blobs/legacy-vs-blobs.py') diff --git a/scripts/legacy-vs-blobs/legacy-vs-blobs.py b/scripts/legacy-vs-blobs/legacy-vs-blobs.py deleted file mode 100755 index 57a42376..00000000 --- a/scripts/legacy-vs-blobs/legacy-vs-blobs.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python - -# Plot bars comparing different implementations of mail pipeline. -# -# This script can be improved to account for arbitrary number of data sets, but -# it is not doing it right now. - -import json -import numpy as np -import matplotlib.pyplot as plt - -# make a prettier graph -from mpltools import style -style.use('ggplot') - -OUTPUT_FILENAME = 'legacy-vs-blobs.png' - -# each value below will generate one bar for each for each (amount, size) pair. -# The script expects to find files in ./data/SET/ for each set of -# implementations. -# -# The baseline values will be the legacy results in ./data/no-cache/. - -graphs = [ - '1000_10k', - '100_100k', - '10_1000k', - '1_10000k', -] - - -# the JSON structure returned by the following function is ugly, but the -# original JSONs are even uglier, so this is here just to make the life of the -# script easier. -# -# We want to have something like: -# -# data[variation][graph][implementation] = -# -# Where: -# -# - variation is one data set under ./data (i.e. no-cache, cache, persistent, -# etc). -# - graph is one of the values in graphs variable above. -# - implementation is either legacy or blobs (we just need legacy for the -# no-cache variation, as that is the one we are using as baseline. - -def get_data(): - folders = ['cache', 'no-cache', 'persistent'] - data = {} - for folder in folders: - data[folder] = {} - for graph in graphs: - with open('data/%s/%s.json' % (folder, graph)) as f: - d = json.loads(f.read()) - benchmarks = d['benchmarks'] - data[folder][graph] = {} - for t in ['blobs', 'legacy']: - result = filter(lambda b: t in b['name'], benchmarks) - if result: - result = result.pop() - data[folder][graph][t] = result['stats'] - return data - - -def plot_data(data): - - N = 4 - - # this is our baseline (i.e. legacy / legacy) - absolutes = (1, 1, 1, 1) - - ind = np.arange(N) # the x locations for the groups - width = 0.20 # the width of the bars - - fig, ax = plt.subplots() - rects1 = ax.bar(ind, absolutes, width) - - # for each graph, calculate the ratios - ratios = {'no-cache': [], 'cache': [], 'persistent': []} - for graph in graphs: - legacy = data['no-cache'][graph]['legacy']['mean'] - - # calculate ratios for no-cache / legacy - ratio = data['no-cache'][graph]['blobs']['mean'] / legacy - ratios['no-cache'].append(ratio) - - # calculate ratios for cache / legacy - ratio = data['cache'][graph]['blobs']['mean'] / legacy - ratios['cache'].append(ratio) - - # calculate ratios for persistent / legacy - ratio = data['persistent'][graph]['blobs']['mean'] / legacy - ratios['persistent'].append(ratio) - - # create the boxes with the ratios - nocache = tuple(ratios['no-cache']) - rects2 = ax.bar(ind + width, nocache, width) - - cache = tuple(ratios['cache']) - rects3 = ax.bar(ind + (2 * width), cache, width) - - persistent = tuple(ratios['persistent']) - rects4 = ax.bar(ind + (3 * width), persistent, width) - - # add some text for labels, title and axes ticks - ax.set_ylabel('Ratio of time (legacy is baseline)') - ax.set_xlabel('Amount and size of email messages') - ax.set_title('Inbox loading time: legacy vs blobs mail pipeline') - ax.set_xticks(ind + (1.5 * width)) - ax.set_xticklabels( - tuple(map(lambda name: name.replace('_', ' x '), graphs))) - - ax.legend( - (rects1[0], rects2[0], rects3[0], rects4[0]), - ('legacy', 'blobs', 'blobs + session cache', - 'blobs + session cache + persistent http')) - # ax.grid() - - plt.savefig(OUTPUT_FILENAME) - # plt.show() - - -if __name__ == '__main__': - data = get_data() - plot_data(data) -- cgit v1.2.3