summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authordrebs <drebs@riseup.net>2017-10-15 15:41:36 -0200
committerdrebs <drebs@riseup.net>2017-10-16 09:32:02 -0200
commit1a8cc67319c9006fdcdafdb76e8583ef896cb0d2 (patch)
tree16e5bd1b1257d1349e9498024521bbb72ccd2b6f /scripts
parent2bd21dd9d9fc1e2ff66f3d923a55558dc6972fc9 (diff)
[ci] compare benchmark results with history
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/benchmark/compare-results-with-history.py166
-rwxr-xr-xscripts/benchmark/run-benchmarks-ci-job.sh47
2 files changed, 206 insertions, 7 deletions
diff --git a/scripts/benchmark/compare-results-with-history.py b/scripts/benchmark/compare-results-with-history.py
new file mode 100755
index 00000000..ed609552
--- /dev/null
+++ b/scripts/benchmark/compare-results-with-history.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+# Given a JSON file output by pytest-benchmark, this script compares the
+# results of a test session with the results stored in elasticsearch.
+#
+# - iterate through test results in pytest-benchmark JSON file.
+#
+# - for each one, get mean and stddev of the mean of last 20 results from
+# master branch.
+#
+# - compare the result in the file with the results in elastic.
+
+import argparse
+import copy
+import json
+import requests
+import sys
+
+
+URL = "https://moose.leap.se:9200/benchmark/_search"
+BLOCK_SIZE = 20
+MULTIPLIER = 1.5
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'file',
+ help='The file with JSON results of pytest-benchmark')
+ return parser.parse_args()
+
+
+def parse_file(file):
+ data = None
+ tests = []
+ with open(file) as f:
+ data = json.loads(f.read())
+ for test in data['benchmarks']:
+ name = test['name']
+ mean = test['stats']['mean']
+ extra = test['extra_info']
+ tests.append((name, mean, extra))
+ return tests
+
+
+base_query = {
+ "query": {
+ "bool": {
+ "must": [
+ {"term": {"machine_info.host": "weasel"}},
+ {"term": {"commit_info.branch": "master"}},
+ {"term": {"commit_info.project": "soledad"}},
+ {"exists": {"field": "extra_info"}},
+ {"exists": {"field": "extra_info.cpu_percent"}}
+ ],
+ "must_not": [
+ ],
+ },
+ },
+ "aggs": {
+ "commit_id_time": {
+ "terms": {
+ "field": "commit_info.id",
+ "size": BLOCK_SIZE,
+ "order": {"commit_info_time": "desc"},
+ },
+ "aggs": {
+ "commit_info_time": {"max": {"field": "commit_info.time"}},
+ }
+ }
+ },
+}
+
+
+def get_time_cpu_stats(test):
+ query = copy.deepcopy(base_query)
+ query['query']['bool']['must'].append({
+ 'term': {'name': test}})
+ query['query']['bool']['must_not'].append(
+ {'exists': {'field': "extra_info.memory_percent"}})
+ query['aggs']['commit_id_time']['aggs']['time'] = \
+ {"stats": {"field": "stats.mean"}}
+ query['aggs']['commit_id_time']['aggs']['cpu'] = \
+ {"stats": {"field": "extra_info.cpu_percent"}}
+ response = requests.get("%s?size=0" % URL, data=json.dumps(query))
+ data = response.json()
+ time = []
+ cpu = []
+ buckets = data['aggregations']['commit_id_time']['buckets']
+ for bucket in buckets:
+ time.append(bucket['time']['avg'])
+ cpu.append(bucket['cpu']['avg'])
+ return time, cpu
+
+
+def get_mem_stats(test):
+ query = copy.deepcopy(base_query)
+ query['query']['bool']['must'].append({
+ 'term': {'name': test}})
+ query['query']['bool']['must'].append(
+ {'exists': {'field': "extra_info.memory_percent"}})
+ query['aggs']['commit_id_time']['aggs']['mem'] = \
+ {"stats": {"field": "extra_info.memory_percent.stats.max"}}
+ response = requests.get("%s?size=0" % URL, data=json.dumps(query))
+ data = response.json()
+ mem = []
+ buckets = data['aggregations']['commit_id_time']['buckets']
+ for bucket in buckets:
+ mem.append(bucket['mem']['avg'])
+ return mem
+
+
+def _mean(l):
+ return float(sum(l)) / len(l)
+
+
+def _std(l):
+ if len(l) <= 1:
+ return 0
+ mean = _mean(l)
+ squares = [(x - mean) ** 2 for x in l]
+ return (sum(squares) / (len(l) - 1)) ** 0.5
+
+
+def detect_bad_outlier(test, mean, extra):
+ bad = False
+ if 'memory_percent' in extra:
+ mem = get_mem_stats(test)
+ value = extra['memory_percent']['stats']['max']
+ bad |= _detect_outlier('mem', value, mem) > 0
+ else:
+ time, cpu = get_time_cpu_stats(test)
+
+ value = mean
+ bad |= _detect_outlier('time', value, time) > 0
+
+ value = extra['cpu_percent']
+ bad |= _detect_outlier('cpu', value, cpu) > 0
+ return bad
+
+
+def _detect_outlier(name, value, list):
+ mean = _mean(list)
+ std = _std(list)
+ result = 0
+ print "%s: %f ? %f +- %f * %f" \
+ % (name, value, mean, MULTIPLIER, std)
+ if value < mean - MULTIPLIER * std:
+ print "%s: %f < %f - %f * %f" \
+ % (name, value, mean, MULTIPLIER, std)
+ result = -1
+ elif value > mean + MULTIPLIER * std:
+ print "%s: %f > %f - %f * %f" \
+ % (name, value, mean, MULTIPLIER, std)
+ result = 1
+ return result
+
+
+if __name__ == '__main__':
+ args = parse_args()
+ tests = parse_file(args.file)
+ failed = False
+ for test, mean, extra in tests:
+ failed |= detect_bad_outlier(test, mean, extra)
+ if failed:
+ sys.exit(1)
diff --git a/scripts/benchmark/run-benchmarks-ci-job.sh b/scripts/benchmark/run-benchmarks-ci-job.sh
index b2a8c417..835f8c7f 100755
--- a/scripts/benchmark/run-benchmarks-ci-job.sh
+++ b/scripts/benchmark/run-benchmarks-ci-job.sh
@@ -1,16 +1,23 @@
#!/bin/sh
+# Run benchmakr tests for CI jobs, and optionally compare results with historic
+# series.
+#
+# Usage Example
+# -------------
+#
+# Run this script with the environment name as the only argument:
+#
+# ./run-benchmarks-ci-job.sh environment-name
+#
# This script is used in .gitlab-ci.yml to run benchmark jobs. It has been
# factored out from that file to avoid bloating it with too much information.
#
-# The benchmark job will be skiped if the RUN_BENCHMARKS variable is not set,
-# thus allowing for opting in to benchmarking.
+# Environment Variables
+# ---------------------
#
-# This is an attempt to make life of developers easier, by diminishing the time
-# of the pipeline by not running benchmarks by default. The canonical repo
-# (i.e. https://0xacab.org/leap/soledad) should have the RUN_BENCHMARKS
-# variable set to ensure that these jobs will run. Developers can add or remove
-# the variable from their environments as they see fit.
+# RUN_BENCHMARKS: If not set, skip this run.
+# CHECK_FOR_OUTLIERS: If set, check if results are outliers.
set -eu
set -o xtrace
@@ -25,10 +32,36 @@ fi
echo "Running tox in environment ${ENVIRONMENT}..."
+#
+# run benchmark tests with tox
+#
+
+tempfile=$(mktemp)
/usr/bin/unbuffer \
tox \
--recreate \
-e ${ENVIRONMENT} \
-- \
--couch-url http://couchdb:5984 \
+ --benchmark-json=${tempfile} \
+ -m runme \
| /usr/bin/ts -s
+
+#
+# check results for bad outlier detecion
+#
+
+# stop here unless environment starts with "benchmark-"
+if [ -z "$(echo ${ENVIRONMENT} | grep ^benchmark-)" ]; then
+ exit 0
+fi
+
+# stop here unless the CHECK_FOR_OUTLIERS environment variable is set
+if [ -z "${CHECK_FOR_OUTLIERS:-}" ]; then
+ exit 0
+fi
+
+# fail test for bad outliers
+echo "Comparing current test results with history..."
+basedir=$(dirname "${0}")
+${basedir}/compare-results-with-history.py ${tempfile}