#!/usr/bin/env python # Plot bars comparing different implementations of mail pipeline. # # This script can be improved to account for arbitrary number of data sets, but # it is not doing it right now. import json import matplotlib.pyplot as plt import numpy as np import re # make a prettier graph from mpltools import style style.use('ggplot') OUTPUT_FILENAME = 'blobs-sqlite-backend.png' graphs = [ '1_10000k', '10_1000k', '100_100k', '1000_10k', ] # the JSON structure returned by the following function is ugly, but the # original JSONs are even uglier, so this is here just to make the life of the # script easier. # # We want to have something like: # # data[get/put][amount_size] = def get_data(): data = {} for fname in ['get', 'put']: data[fname] = {} with open('data/%s.json' % fname) as f: d = json.loads(f.read()) benchmarks = d['benchmarks'] for item in benchmarks: name = re.sub('^[^1]+', '', item['name']) data[fname][name] = item['stats'] return data def plot_data(data): N = 4 get_means = tuple([data['get'][graph]['mean'] for graph in graphs]) put_means = tuple([data['put'][graph]['mean'] for graph in graphs]) ind = np.arange(N) # the x locations for the groups width = 0.40 # the width of the bars fig, ax = plt.subplots() rects1 = ax.bar(ind, get_means, width) rects2 = ax.bar(ind + width, put_means, width) # add some text for labels, title and axes ticks ax.set_ylabel('Time for operation (s)') ax.set_xlabel('Amount and size of blobs') ax.set_title('Blobs storage and retrieval time') ax.set_xticks(ind + (0.5 * width)) ax.set_xticklabels( tuple(map(lambda name: name.replace('_', ' x '), graphs))) ax.legend( (rects1[0], rects2[0]), ('retrieval time', 'storage time')) # ax.grid() plt.savefig(OUTPUT_FILENAME) # plt.show() if __name__ == '__main__': data = get_data() plot_data(data)