#!/usr/bin/env python # Given a JSON file output by pytest-benchmark, this script compares the # results of a test session with the results stored in elasticsearch. # # - iterate through test results in pytest-benchmark JSON file. # # - for each one, get mean and stddev of the mean of last 20 results from # master branch. # # - compare the result in the file with the results in elastic. # # - if there are bad outliers, exit with status code given in command line. import argparse import copy import json import requests import sys URL = "https://moose.leap.se:9200/benchmark/_search" BLOCK_SIZE = 20 MULTIPLIER = 1.5 def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( 'file', help='The file with JSON results of pytest-benchmark') parser.add_argument( '--status-code', type=int, default=0, help='The status code to exit with in case bad outliers are detected.') return parser.parse_args() def parse_file(file): data = None tests = [] with open(file) as f: data = json.loads(f.read()) for test in data['benchmarks']: name = test['name'] mean = test['stats']['mean'] extra = test['extra_info'] tests.append((name, mean, extra)) return tests base_query = { "query": { "bool": { "must": [ {"term": {"machine_info.host": "weasel"}}, {"term": {"commit_info.branch": "master"}}, {"term": {"commit_info.project": "soledad"}}, {"exists": {"field": "extra_info"}}, {"exists": {"field": "extra_info.cpu_percent"}} ], "must_not": [ ], }, }, "aggs": { "commit_id_time": { "terms": { "field": "commit_info.id", "size": BLOCK_SIZE, "order": {"commit_info_time": "desc"}, }, "aggs": { "commit_info_time": {"max": {"field": "commit_info.time"}}, } } }, } def get_time_cpu_stats(test): query = copy.deepcopy(base_query) query['query']['bool']['must'].append({ 'term': {'name': test}}) query['query']['bool']['must_not'].append( {'exists': {'field': "extra_info.memory_percent"}}) query['aggs']['commit_id_time']['aggs']['time'] = \ {"stats": {"field": "stats.mean"}} query['aggs']['commit_id_time']['aggs']['cpu'] = \ {"stats": {"field": "extra_info.cpu_percent"}} response = requests.get("%s?size=0" % URL, data=json.dumps(query)) data = response.json() time = [] cpu = [] buckets = data['aggregations']['commit_id_time']['buckets'] for bucket in buckets: time.append(bucket['time']['avg']) cpu.append(bucket['cpu']['avg']) return time, cpu def get_mem_stats(test): query = copy.deepcopy(base_query) query['query']['bool']['must'].append({ 'term': {'name': test}}) query['query']['bool']['must'].append( {'exists': {'field': "extra_info.memory_percent"}}) query['aggs']['commit_id_time']['aggs']['mem'] = \ {"stats": {"field": "extra_info.memory_percent.stats.max"}} response = requests.get("%s?size=0" % URL, data=json.dumps(query)) data = response.json() mem = [] buckets = data['aggregations']['commit_id_time']['buckets'] for bucket in buckets: mem.append(bucket['mem']['avg']) return mem def _mean(l): return float(sum(l)) / len(l) def _std(l): if len(l) <= 1: return 0 mean = _mean(l) squares = [(x - mean) ** 2 for x in l] return (sum(squares) / (len(l) - 1)) ** 0.5 def detect_bad_outlier(test, mean, extra): bad = False if 'memory_percent' in extra: mem = get_mem_stats(test) value = extra['memory_percent']['stats']['max'] bad |= _detect_outlier(test, 'mem', value, mem) > 0 else: time, cpu = get_time_cpu_stats(test) value = mean bad |= _detect_outlier(test, 'time', value, time) > 0 value = extra['cpu_percent'] bad |= _detect_outlier(test, 'cpu', value, cpu) > 0 return bad def _detect_outlier(test, name, value, list): if not list: return 0 mean = _mean(list) std = _std(list) result = 0 print "Checking %s (%s):" % (test, name) print " value: %f" % (value,) print " lower limit: %f" % (mean - (MULTIPLIER * std)) print " upper limit: %f" % (mean + (MULTIPLIER * std)) if value < mean - MULTIPLIER * std: print " => good outlier detected!" result = -1 elif value > mean + MULTIPLIER * std: print " => bad outlier detected!" result = 1 return result if __name__ == '__main__': args = parse_args() tests = parse_file(args.file) print "Checking %d test results for outliers..." % len(tests) failed = False for test, mean, extra in tests: failed |= detect_bad_outlier(test, mean, extra) if failed: print "Tests have bad outliers! o_O" sys.exit(args.status_code) else: print "All good, no outliers were detected. :-)"