diff options
| -rwxr-xr-x | scripts/benchmark/compare-results-with-history.py | 166 | ||||
| -rwxr-xr-x | scripts/benchmark/run-benchmarks-ci-job.sh | 47 | 
2 files changed, 206 insertions, 7 deletions
| diff --git a/scripts/benchmark/compare-results-with-history.py b/scripts/benchmark/compare-results-with-history.py new file mode 100755 index 00000000..ed609552 --- /dev/null +++ b/scripts/benchmark/compare-results-with-history.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python + +# Given a JSON file output by pytest-benchmark, this script compares the +# results of a test session with the results stored in elasticsearch. +# +#   - iterate through test results in pytest-benchmark JSON file. +# +#   - for each one, get mean and stddev of the mean of last 20 results from +#     master branch. +# +#   - compare the result in the file with the results in elastic. + +import argparse +import copy +import json +import requests +import sys + + +URL = "https://moose.leap.se:9200/benchmark/_search" +BLOCK_SIZE = 20 +MULTIPLIER = 1.5 + + +def parse_args(): +    parser = argparse.ArgumentParser() +    parser.add_argument( +        'file', +        help='The file with JSON results of pytest-benchmark') +    return parser.parse_args() + + +def parse_file(file): +    data = None +    tests = [] +    with open(file) as f: +        data = json.loads(f.read()) +    for test in data['benchmarks']: +        name = test['name'] +        mean = test['stats']['mean'] +        extra = test['extra_info'] +        tests.append((name, mean, extra)) +    return tests + + +base_query = { +    "query": { +        "bool": { +            "must": [ +                {"term": {"machine_info.host": "weasel"}}, +                {"term": {"commit_info.branch": "master"}}, +                {"term": {"commit_info.project": "soledad"}}, +                {"exists": {"field": "extra_info"}}, +                {"exists": {"field": "extra_info.cpu_percent"}} +            ], +            "must_not": [ +            ], +        }, +    }, +    "aggs": { +        "commit_id_time": { +            "terms": { +                "field": "commit_info.id", +                "size": BLOCK_SIZE, +                "order": {"commit_info_time": "desc"}, +            }, +            "aggs": { +                "commit_info_time": {"max": {"field": "commit_info.time"}}, +            } +        } +    }, +} + + +def get_time_cpu_stats(test): +    query = copy.deepcopy(base_query) +    query['query']['bool']['must'].append({ +        'term': {'name': test}}) +    query['query']['bool']['must_not'].append( +        {'exists': {'field': "extra_info.memory_percent"}}) +    query['aggs']['commit_id_time']['aggs']['time'] = \ +        {"stats": {"field": "stats.mean"}} +    query['aggs']['commit_id_time']['aggs']['cpu'] = \ +        {"stats": {"field": "extra_info.cpu_percent"}} +    response = requests.get("%s?size=0" % URL, data=json.dumps(query)) +    data = response.json() +    time = [] +    cpu = [] +    buckets = data['aggregations']['commit_id_time']['buckets'] +    for bucket in buckets: +        time.append(bucket['time']['avg']) +        cpu.append(bucket['cpu']['avg']) +    return time, cpu + + +def get_mem_stats(test): +    query = copy.deepcopy(base_query) +    query['query']['bool']['must'].append({ +        'term': {'name': test}}) +    query['query']['bool']['must'].append( +        {'exists': {'field': "extra_info.memory_percent"}}) +    query['aggs']['commit_id_time']['aggs']['mem'] = \ +        {"stats": {"field": "extra_info.memory_percent.stats.max"}} +    response = requests.get("%s?size=0" % URL, data=json.dumps(query)) +    data = response.json() +    mem = [] +    buckets = data['aggregations']['commit_id_time']['buckets'] +    for bucket in buckets: +        mem.append(bucket['mem']['avg']) +    return mem + + +def _mean(l): +    return float(sum(l)) / len(l) + + +def _std(l): +    if len(l) <= 1: +        return 0 +    mean = _mean(l) +    squares = [(x - mean) ** 2 for x in l] +    return (sum(squares) / (len(l) - 1)) ** 0.5 + + +def detect_bad_outlier(test, mean, extra): +    bad = False +    if 'memory_percent' in extra: +        mem = get_mem_stats(test) +        value = extra['memory_percent']['stats']['max'] +        bad |= _detect_outlier('mem', value, mem) > 0 +    else: +        time, cpu = get_time_cpu_stats(test) + +        value = mean +        bad |= _detect_outlier('time', value, time) > 0 + +        value = extra['cpu_percent'] +        bad |= _detect_outlier('cpu', value, cpu) > 0 +    return bad + + +def _detect_outlier(name, value, list): +    mean = _mean(list) +    std = _std(list) +    result = 0 +    print "%s: %f ? %f +- %f * %f" \ +          % (name, value, mean, MULTIPLIER, std) +    if value < mean - MULTIPLIER * std: +        print "%s: %f < %f - %f * %f" \ +              % (name, value, mean, MULTIPLIER, std) +        result = -1 +    elif value > mean + MULTIPLIER * std: +        print "%s: %f > %f - %f * %f" \ +              % (name, value, mean, MULTIPLIER, std) +        result = 1 +    return result + + +if __name__ == '__main__': +    args = parse_args() +    tests = parse_file(args.file) +    failed = False +    for test, mean, extra in tests: +        failed |= detect_bad_outlier(test, mean, extra) +    if failed: +        sys.exit(1) diff --git a/scripts/benchmark/run-benchmarks-ci-job.sh b/scripts/benchmark/run-benchmarks-ci-job.sh index b2a8c417..835f8c7f 100755 --- a/scripts/benchmark/run-benchmarks-ci-job.sh +++ b/scripts/benchmark/run-benchmarks-ci-job.sh @@ -1,16 +1,23 @@  #!/bin/sh +# Run benchmakr tests for CI jobs, and optionally compare results with historic +# series. +# +# Usage Example +# ------------- +# +# Run this script with the environment name as the only argument: +# +#   ./run-benchmarks-ci-job.sh environment-name +#  # This script is used in .gitlab-ci.yml to run benchmark jobs. It has been  # factored out from that file to avoid bloating it with too much information.  # -# The benchmark job will be skiped if the RUN_BENCHMARKS variable is not set, -# thus allowing for opting in to benchmarking. +# Environment Variables +# ---------------------  # -# This is an attempt to make life of developers easier, by diminishing the time -# of the pipeline by not running benchmarks by default. The canonical repo -# (i.e. https://0xacab.org/leap/soledad) should have the RUN_BENCHMARKS -# variable set to ensure that these jobs will run. Developers can add or remove -# the variable from their environments as they see fit. +#   RUN_BENCHMARKS:     If not set, skip this run. +#   CHECK_FOR_OUTLIERS: If set, check if results are outliers.  set -eu  set -o xtrace @@ -25,10 +32,36 @@ fi  echo "Running tox in environment ${ENVIRONMENT}..." +# +# run benchmark tests with tox +# + +tempfile=$(mktemp)  /usr/bin/unbuffer \    tox \      --recreate \      -e ${ENVIRONMENT} \      -- \      --couch-url http://couchdb:5984 \ +    --benchmark-json=${tempfile} \ +    -m runme \    | /usr/bin/ts -s + +# +# check results for bad outlier detecion +# + +# stop here unless environment starts with "benchmark-" +if [ -z "$(echo ${ENVIRONMENT} | grep ^benchmark-)" ]; then +  exit 0 +fi + +# stop here unless the CHECK_FOR_OUTLIERS environment variable is set +if [ -z "${CHECK_FOR_OUTLIERS:-}" ]; then +  exit 0 +fi + +# fail test for bad outliers +echo "Comparing current test results with history..." +basedir=$(dirname "${0}") +${basedir}/compare-results-with-history.py ${tempfile} | 
