From 1a8cc67319c9006fdcdafdb76e8583ef896cb0d2 Mon Sep 17 00:00:00 2001 From: drebs Date: Sun, 15 Oct 2017 15:41:36 -0200 Subject: [ci] compare benchmark results with history --- scripts/benchmark/compare-results-with-history.py | 166 ++++++++++++++++++++++ scripts/benchmark/run-benchmarks-ci-job.sh | 47 +++++- 2 files changed, 206 insertions(+), 7 deletions(-) create mode 100755 scripts/benchmark/compare-results-with-history.py diff --git a/scripts/benchmark/compare-results-with-history.py b/scripts/benchmark/compare-results-with-history.py new file mode 100755 index 00000000..ed609552 --- /dev/null +++ b/scripts/benchmark/compare-results-with-history.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python + +# Given a JSON file output by pytest-benchmark, this script compares the +# results of a test session with the results stored in elasticsearch. +# +# - iterate through test results in pytest-benchmark JSON file. +# +# - for each one, get mean and stddev of the mean of last 20 results from +# master branch. +# +# - compare the result in the file with the results in elastic. + +import argparse +import copy +import json +import requests +import sys + + +URL = "https://moose.leap.se:9200/benchmark/_search" +BLOCK_SIZE = 20 +MULTIPLIER = 1.5 + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + 'file', + help='The file with JSON results of pytest-benchmark') + return parser.parse_args() + + +def parse_file(file): + data = None + tests = [] + with open(file) as f: + data = json.loads(f.read()) + for test in data['benchmarks']: + name = test['name'] + mean = test['stats']['mean'] + extra = test['extra_info'] + tests.append((name, mean, extra)) + return tests + + +base_query = { + "query": { + "bool": { + "must": [ + {"term": {"machine_info.host": "weasel"}}, + {"term": {"commit_info.branch": "master"}}, + {"term": {"commit_info.project": "soledad"}}, + {"exists": {"field": "extra_info"}}, + {"exists": {"field": "extra_info.cpu_percent"}} + ], + "must_not": [ + ], + }, + }, + "aggs": { + "commit_id_time": { + "terms": { + "field": "commit_info.id", + "size": BLOCK_SIZE, + "order": {"commit_info_time": "desc"}, + }, + "aggs": { + "commit_info_time": {"max": {"field": "commit_info.time"}}, + } + } + }, +} + + +def get_time_cpu_stats(test): + query = copy.deepcopy(base_query) + query['query']['bool']['must'].append({ + 'term': {'name': test}}) + query['query']['bool']['must_not'].append( + {'exists': {'field': "extra_info.memory_percent"}}) + query['aggs']['commit_id_time']['aggs']['time'] = \ + {"stats": {"field": "stats.mean"}} + query['aggs']['commit_id_time']['aggs']['cpu'] = \ + {"stats": {"field": "extra_info.cpu_percent"}} + response = requests.get("%s?size=0" % URL, data=json.dumps(query)) + data = response.json() + time = [] + cpu = [] + buckets = data['aggregations']['commit_id_time']['buckets'] + for bucket in buckets: + time.append(bucket['time']['avg']) + cpu.append(bucket['cpu']['avg']) + return time, cpu + + +def get_mem_stats(test): + query = copy.deepcopy(base_query) + query['query']['bool']['must'].append({ + 'term': {'name': test}}) + query['query']['bool']['must'].append( + {'exists': {'field': "extra_info.memory_percent"}}) + query['aggs']['commit_id_time']['aggs']['mem'] = \ + {"stats": {"field": "extra_info.memory_percent.stats.max"}} + response = requests.get("%s?size=0" % URL, data=json.dumps(query)) + data = response.json() + mem = [] + buckets = data['aggregations']['commit_id_time']['buckets'] + for bucket in buckets: + mem.append(bucket['mem']['avg']) + return mem + + +def _mean(l): + return float(sum(l)) / len(l) + + +def _std(l): + if len(l) <= 1: + return 0 + mean = _mean(l) + squares = [(x - mean) ** 2 for x in l] + return (sum(squares) / (len(l) - 1)) ** 0.5 + + +def detect_bad_outlier(test, mean, extra): + bad = False + if 'memory_percent' in extra: + mem = get_mem_stats(test) + value = extra['memory_percent']['stats']['max'] + bad |= _detect_outlier('mem', value, mem) > 0 + else: + time, cpu = get_time_cpu_stats(test) + + value = mean + bad |= _detect_outlier('time', value, time) > 0 + + value = extra['cpu_percent'] + bad |= _detect_outlier('cpu', value, cpu) > 0 + return bad + + +def _detect_outlier(name, value, list): + mean = _mean(list) + std = _std(list) + result = 0 + print "%s: %f ? %f +- %f * %f" \ + % (name, value, mean, MULTIPLIER, std) + if value < mean - MULTIPLIER * std: + print "%s: %f < %f - %f * %f" \ + % (name, value, mean, MULTIPLIER, std) + result = -1 + elif value > mean + MULTIPLIER * std: + print "%s: %f > %f - %f * %f" \ + % (name, value, mean, MULTIPLIER, std) + result = 1 + return result + + +if __name__ == '__main__': + args = parse_args() + tests = parse_file(args.file) + failed = False + for test, mean, extra in tests: + failed |= detect_bad_outlier(test, mean, extra) + if failed: + sys.exit(1) diff --git a/scripts/benchmark/run-benchmarks-ci-job.sh b/scripts/benchmark/run-benchmarks-ci-job.sh index b2a8c417..835f8c7f 100755 --- a/scripts/benchmark/run-benchmarks-ci-job.sh +++ b/scripts/benchmark/run-benchmarks-ci-job.sh @@ -1,16 +1,23 @@ #!/bin/sh +# Run benchmakr tests for CI jobs, and optionally compare results with historic +# series. +# +# Usage Example +# ------------- +# +# Run this script with the environment name as the only argument: +# +# ./run-benchmarks-ci-job.sh environment-name +# # This script is used in .gitlab-ci.yml to run benchmark jobs. It has been # factored out from that file to avoid bloating it with too much information. # -# The benchmark job will be skiped if the RUN_BENCHMARKS variable is not set, -# thus allowing for opting in to benchmarking. +# Environment Variables +# --------------------- # -# This is an attempt to make life of developers easier, by diminishing the time -# of the pipeline by not running benchmarks by default. The canonical repo -# (i.e. https://0xacab.org/leap/soledad) should have the RUN_BENCHMARKS -# variable set to ensure that these jobs will run. Developers can add or remove -# the variable from their environments as they see fit. +# RUN_BENCHMARKS: If not set, skip this run. +# CHECK_FOR_OUTLIERS: If set, check if results are outliers. set -eu set -o xtrace @@ -25,10 +32,36 @@ fi echo "Running tox in environment ${ENVIRONMENT}..." +# +# run benchmark tests with tox +# + +tempfile=$(mktemp) /usr/bin/unbuffer \ tox \ --recreate \ -e ${ENVIRONMENT} \ -- \ --couch-url http://couchdb:5984 \ + --benchmark-json=${tempfile} \ + -m runme \ | /usr/bin/ts -s + +# +# check results for bad outlier detecion +# + +# stop here unless environment starts with "benchmark-" +if [ -z "$(echo ${ENVIRONMENT} | grep ^benchmark-)" ]; then + exit 0 +fi + +# stop here unless the CHECK_FOR_OUTLIERS environment variable is set +if [ -z "${CHECK_FOR_OUTLIERS:-}" ]; then + exit 0 +fi + +# fail test for bad outliers +echo "Comparing current test results with history..." +basedir=$(dirname "${0}") +${basedir}/compare-results-with-history.py ${tempfile} -- cgit v1.2.3