From 3b46ddd3b336f55510fc7304eb7badbd5005350d Mon Sep 17 00:00:00 2001
From: drebs <drebs@riseup.net>
Date: Tue, 17 Oct 2017 10:49:31 -0200
Subject: [ci] improve outlier detection output

---
 scripts/benchmark/check-for-outliers.py           | 175 ++++++++++++++++++++++
 scripts/benchmark/compare-results-with-history.py | 166 --------------------
 scripts/benchmark/run-benchmarks-ci-job.sh        |  14 +-
 3 files changed, 179 insertions(+), 176 deletions(-)
 create mode 100755 scripts/benchmark/check-for-outliers.py
 delete mode 100755 scripts/benchmark/compare-results-with-history.py

diff --git a/scripts/benchmark/check-for-outliers.py b/scripts/benchmark/check-for-outliers.py
new file mode 100755
index 00000000..6037ef00
--- /dev/null
+++ b/scripts/benchmark/check-for-outliers.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python
+
+# Given a JSON file output by pytest-benchmark, this script compares the
+# results of a test session with the results stored in elasticsearch.
+#
+#   - iterate through test results in pytest-benchmark JSON file.
+#
+#   - for each one, get mean and stddev of the mean of last 20 results from
+#     master branch.
+#
+#   - compare the result in the file with the results in elastic.
+#
+#   - if there are bad outliers, exit with status code given in command line.
+
+import argparse
+import copy
+import json
+import requests
+import sys
+
+
+URL = "https://moose.leap.se:9200/benchmark/_search"
+BLOCK_SIZE = 20
+MULTIPLIER = 1.5
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'file',
+        help='The file with JSON results of pytest-benchmark')
+    parser.add_argument(
+        '--status-code', type=int, default=0,
+        help='The status code to exit with in case bad outliers are detected.')
+    return parser.parse_args()
+
+
+def parse_file(file):
+    data = None
+    tests = []
+    with open(file) as f:
+        data = json.loads(f.read())
+    for test in data['benchmarks']:
+        name = test['name']
+        mean = test['stats']['mean']
+        extra = test['extra_info']
+        tests.append((name, mean, extra))
+    return tests
+
+
+base_query = {
+    "query": {
+        "bool": {
+            "must": [
+                {"term": {"machine_info.host": "weasel"}},
+                {"term": {"commit_info.branch": "master"}},
+                {"term": {"commit_info.project": "soledad"}},
+                {"exists": {"field": "extra_info"}},
+                {"exists": {"field": "extra_info.cpu_percent"}}
+            ],
+            "must_not": [
+            ],
+        },
+    },
+    "aggs": {
+        "commit_id_time": {
+            "terms": {
+                "field": "commit_info.id",
+                "size": BLOCK_SIZE,
+                "order": {"commit_info_time": "desc"},
+            },
+            "aggs": {
+                "commit_info_time": {"max": {"field": "commit_info.time"}},
+            }
+        }
+    },
+}
+
+
+def get_time_cpu_stats(test):
+    query = copy.deepcopy(base_query)
+    query['query']['bool']['must'].append({
+        'term': {'name': test}})
+    query['query']['bool']['must_not'].append(
+        {'exists': {'field': "extra_info.memory_percent"}})
+    query['aggs']['commit_id_time']['aggs']['time'] = \
+        {"stats": {"field": "stats.mean"}}
+    query['aggs']['commit_id_time']['aggs']['cpu'] = \
+        {"stats": {"field": "extra_info.cpu_percent"}}
+    response = requests.get("%s?size=0" % URL, data=json.dumps(query))
+    data = response.json()
+    time = []
+    cpu = []
+    buckets = data['aggregations']['commit_id_time']['buckets']
+    for bucket in buckets:
+        time.append(bucket['time']['avg'])
+        cpu.append(bucket['cpu']['avg'])
+    return time, cpu
+
+
+def get_mem_stats(test):
+    query = copy.deepcopy(base_query)
+    query['query']['bool']['must'].append({
+        'term': {'name': test}})
+    query['query']['bool']['must'].append(
+        {'exists': {'field': "extra_info.memory_percent"}})
+    query['aggs']['commit_id_time']['aggs']['mem'] = \
+        {"stats": {"field": "extra_info.memory_percent.stats.max"}}
+    response = requests.get("%s?size=0" % URL, data=json.dumps(query))
+    data = response.json()
+    mem = []
+    buckets = data['aggregations']['commit_id_time']['buckets']
+    for bucket in buckets:
+        mem.append(bucket['mem']['avg'])
+    return mem
+
+
+def _mean(l):
+    return float(sum(l)) / len(l)
+
+
+def _std(l):
+    if len(l) <= 1:
+        return 0
+    mean = _mean(l)
+    squares = [(x - mean) ** 2 for x in l]
+    return (sum(squares) / (len(l) - 1)) ** 0.5
+
+
+def detect_bad_outlier(test, mean, extra):
+    bad = False
+    if 'memory_percent' in extra:
+        mem = get_mem_stats(test)
+        value = extra['memory_percent']['stats']['max']
+        bad |= _detect_outlier(test, 'mem', value, mem) > 0
+    else:
+        time, cpu = get_time_cpu_stats(test)
+
+        value = mean
+        bad |= _detect_outlier(test, 'time', value, time) > 0
+
+        value = extra['cpu_percent']
+        bad |= _detect_outlier(test, 'cpu', value, cpu) > 0
+    return bad
+
+
+def _detect_outlier(test, name, value, list):
+    mean = _mean(list)
+    std = _std(list)
+    result = 0
+    print "Checking %s (%s):" % (test, name)
+    print "  value: %f" % (value,)
+    print "  lower limit: %f" % (mean - (MULTIPLIER * std))
+    print "  upper limit: %f" % (mean + (MULTIPLIER * std))
+    if value < mean - MULTIPLIER * std:
+        print "  => good outlier detected!"
+        result = -1
+    elif value > mean + MULTIPLIER * std:
+        print "  => bad outlier detected!"
+        result = 1
+    return result
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    tests = parse_file(args.file)
+    print "Checking %d test results for outliers..." % len(tests)
+    failed = False
+    for test, mean, extra in tests:
+        failed |= detect_bad_outlier(test, mean, extra)
+    if failed:
+        print "Tests have bad outliers! o_O"
+        sys.exit(args.status_code)
+    else:
+        print "All good, no outliers were detected. :-)"
diff --git a/scripts/benchmark/compare-results-with-history.py b/scripts/benchmark/compare-results-with-history.py
deleted file mode 100755
index ed609552..00000000
--- a/scripts/benchmark/compare-results-with-history.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python
-
-# Given a JSON file output by pytest-benchmark, this script compares the
-# results of a test session with the results stored in elasticsearch.
-#
-#   - iterate through test results in pytest-benchmark JSON file.
-#
-#   - for each one, get mean and stddev of the mean of last 20 results from
-#     master branch.
-#
-#   - compare the result in the file with the results in elastic.
-
-import argparse
-import copy
-import json
-import requests
-import sys
-
-
-URL = "https://moose.leap.se:9200/benchmark/_search"
-BLOCK_SIZE = 20
-MULTIPLIER = 1.5
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        'file',
-        help='The file with JSON results of pytest-benchmark')
-    return parser.parse_args()
-
-
-def parse_file(file):
-    data = None
-    tests = []
-    with open(file) as f:
-        data = json.loads(f.read())
-    for test in data['benchmarks']:
-        name = test['name']
-        mean = test['stats']['mean']
-        extra = test['extra_info']
-        tests.append((name, mean, extra))
-    return tests
-
-
-base_query = {
-    "query": {
-        "bool": {
-            "must": [
-                {"term": {"machine_info.host": "weasel"}},
-                {"term": {"commit_info.branch": "master"}},
-                {"term": {"commit_info.project": "soledad"}},
-                {"exists": {"field": "extra_info"}},
-                {"exists": {"field": "extra_info.cpu_percent"}}
-            ],
-            "must_not": [
-            ],
-        },
-    },
-    "aggs": {
-        "commit_id_time": {
-            "terms": {
-                "field": "commit_info.id",
-                "size": BLOCK_SIZE,
-                "order": {"commit_info_time": "desc"},
-            },
-            "aggs": {
-                "commit_info_time": {"max": {"field": "commit_info.time"}},
-            }
-        }
-    },
-}
-
-
-def get_time_cpu_stats(test):
-    query = copy.deepcopy(base_query)
-    query['query']['bool']['must'].append({
-        'term': {'name': test}})
-    query['query']['bool']['must_not'].append(
-        {'exists': {'field': "extra_info.memory_percent"}})
-    query['aggs']['commit_id_time']['aggs']['time'] = \
-        {"stats": {"field": "stats.mean"}}
-    query['aggs']['commit_id_time']['aggs']['cpu'] = \
-        {"stats": {"field": "extra_info.cpu_percent"}}
-    response = requests.get("%s?size=0" % URL, data=json.dumps(query))
-    data = response.json()
-    time = []
-    cpu = []
-    buckets = data['aggregations']['commit_id_time']['buckets']
-    for bucket in buckets:
-        time.append(bucket['time']['avg'])
-        cpu.append(bucket['cpu']['avg'])
-    return time, cpu
-
-
-def get_mem_stats(test):
-    query = copy.deepcopy(base_query)
-    query['query']['bool']['must'].append({
-        'term': {'name': test}})
-    query['query']['bool']['must'].append(
-        {'exists': {'field': "extra_info.memory_percent"}})
-    query['aggs']['commit_id_time']['aggs']['mem'] = \
-        {"stats": {"field": "extra_info.memory_percent.stats.max"}}
-    response = requests.get("%s?size=0" % URL, data=json.dumps(query))
-    data = response.json()
-    mem = []
-    buckets = data['aggregations']['commit_id_time']['buckets']
-    for bucket in buckets:
-        mem.append(bucket['mem']['avg'])
-    return mem
-
-
-def _mean(l):
-    return float(sum(l)) / len(l)
-
-
-def _std(l):
-    if len(l) <= 1:
-        return 0
-    mean = _mean(l)
-    squares = [(x - mean) ** 2 for x in l]
-    return (sum(squares) / (len(l) - 1)) ** 0.5
-
-
-def detect_bad_outlier(test, mean, extra):
-    bad = False
-    if 'memory_percent' in extra:
-        mem = get_mem_stats(test)
-        value = extra['memory_percent']['stats']['max']
-        bad |= _detect_outlier('mem', value, mem) > 0
-    else:
-        time, cpu = get_time_cpu_stats(test)
-
-        value = mean
-        bad |= _detect_outlier('time', value, time) > 0
-
-        value = extra['cpu_percent']
-        bad |= _detect_outlier('cpu', value, cpu) > 0
-    return bad
-
-
-def _detect_outlier(name, value, list):
-    mean = _mean(list)
-    std = _std(list)
-    result = 0
-    print "%s: %f ? %f +- %f * %f" \
-          % (name, value, mean, MULTIPLIER, std)
-    if value < mean - MULTIPLIER * std:
-        print "%s: %f < %f - %f * %f" \
-              % (name, value, mean, MULTIPLIER, std)
-        result = -1
-    elif value > mean + MULTIPLIER * std:
-        print "%s: %f > %f - %f * %f" \
-              % (name, value, mean, MULTIPLIER, std)
-        result = 1
-    return result
-
-
-if __name__ == '__main__':
-    args = parse_args()
-    tests = parse_file(args.file)
-    failed = False
-    for test, mean, extra in tests:
-        failed |= detect_bad_outlier(test, mean, extra)
-    if failed:
-        sys.exit(1)
diff --git a/scripts/benchmark/run-benchmarks-ci-job.sh b/scripts/benchmark/run-benchmarks-ci-job.sh
index 30c6ecf5..adf37b7a 100755
--- a/scripts/benchmark/run-benchmarks-ci-job.sh
+++ b/scripts/benchmark/run-benchmarks-ci-job.sh
@@ -16,8 +16,8 @@
 # Environment Variables
 # ---------------------
 #
-#   RUN_BENCHMARKS:     If not set, skip this run.
-#   CHECK_FOR_OUTLIERS: If set, check if results are outliers.
+#   RUN_BENCHMARKS          - If not set, skip this run.
+#   STATUS_CODE_IF_OUTLIERS - Exit with this status code if outliers are detected.
 
 set -eu
 set -o xtrace
@@ -55,12 +55,6 @@ if [ -z "$(echo ${ENVIRONMENT} | grep ^benchmark-)" ]; then
   exit 0
 fi
 
-# stop here unless the CHECK_FOR_OUTLIERS environment variable is set
-if [ -z "${CHECK_FOR_OUTLIERS:-}" ]; then
-  exit 0
-fi
-
-# fail test for bad outliers
-echo "Comparing current test results with history..."
+# check for bad outliers
 basedir=$(dirname "${0}")
-${basedir}/compare-results-with-history.py ${tempfile}
+${basedir}/check-for-outliers.py --status-code ${STATUS_CODE_IF_OUTLIERS:-0} ${tempfile} 
-- 
cgit v1.2.3