diff options
Diffstat (limited to 'testing/tests')
| -rw-r--r-- | testing/tests/benchmarks/test_legacy_vs_blobs.py | 259 | 
1 files changed, 259 insertions, 0 deletions
| diff --git a/testing/tests/benchmarks/test_legacy_vs_blobs.py b/testing/tests/benchmarks/test_legacy_vs_blobs.py new file mode 100644 index 00000000..646f7d3f --- /dev/null +++ b/testing/tests/benchmarks/test_legacy_vs_blobs.py @@ -0,0 +1,259 @@ +# "Legacy" versus "Incoming blobs" pipeline comparison +# ==================================================== +# +# This benchmarking aims to compare the legacy and new mail incoming pipeline, +# to asses performance improvements brought by the introduction of blobs. +# +# We use the following sizes in these tests: +# +#   - headers:  4   KB +#   - metadata: 0.1 KB +#   - flags:    0.5 KB +#   - content:  variable +# +# "Legacy" incoming mail pipeline: +# +#   - email arrives at MX. +#   - MX encrypts to public key and puts into couch. +#   - pubkey encrypted doc is synced to soledad client as "incoming". +#   - bitmask mail processes "incoming" and generates 3 metadocs + 1 payload +#     doc per message. +#   - soledad client syncs 4 documents back to server. +# +# "Incoming blobs" mail pipeline: +# +#   - email arrives at MX. +#   - MX encyrpts to public key and puts into soledad server. +#   - soledad server writes a blob to filesystem. +#   - soledad client gets the incoming blob from server and generates 3 +#     metadocs + 1 blob. +#   - soledad client syncs 3 meta documents and 1 blob back to server. +# +# Some notes about the tests in this file: +# +#   - This is a simulation of the legacy and new incoming mail pipelines. +#     There is no actual mail processing operation done (i.e. no pubkey crypto, +#     no mail parsing), only usual soledad document manipulation and sync (with +#     local 1network and crypto). +# +#   - Each test simulates a whole incoming mail pipeline, including get new +#     incoming messages from server, create new documents that represent the +#     parsed message, and synchronize those back to the server. +# +#   - These tests are disabled by default because it doesn't make much sense to +#     have them run automatically for all commits in the repository. Instead, +#     we will run them manually for specific releases and store results and +#     analisys in a subfolder. + +import base64 +import pytest +import sys +import treq +import uuid + +from io import BytesIO + +from twisted.internet.defer import gatherResults, returnValue + +from leap.soledad.common.blobs import Flags +from leap.soledad.client._db.blobs import BlobDoc + + +PARTS_SIZES = { +    'headers': 4000, +    'metadata': 100, +    'flags': 500, +} + + +# +# "Legacy" incoming mail pipeline. +# + +@pytest.inlineCallbacks +def load_up_legacy(client, amount, content): +    # make sure there are no document from previous runs +    yield client.sync() +    _, docs = yield client.get_all_docs() +    deferreds = [] +    for doc in docs: +        d = client.delete_doc(doc) +        deferreds.append(d) +    yield gatherResults(deferreds) +    yield client.sync() + +    # create a bunch of local documents representing email messages +    deferreds = [] +    for i in xrange(amount): +        deferreds.append(client.create_doc(content)) +    yield gatherResults(deferreds) +    yield client.sync() + + +def create_legacy(downloads, size): +    @pytest.inlineCallbacks +    @pytest.mark.skip(reason="avoid running for all commits") +    @pytest.mark.benchmark(group="test_legacy_vs_blobs") +    def test(soledad_client, txbenchmark_with_setup, payload): +        client = soledad_client() + +        content = {'content': payload(size), 'incoming': True} +        for n, s in PARTS_SIZES.items(): +            content[n] = payload(s) + +        @pytest.inlineCallbacks +        def setup(): +            yield load_up_legacy(client, downloads, content) +            returnValue(soledad_client(force_fresh_db=True)) + +        @pytest.inlineCallbacks +        def legacy_pipeline(clean_client): + +            # create indexes so we can later retrieve incoming docs +            yield clean_client.create_index('incoming', 'bool(incoming)') + +            # receive all documents from server +            yield clean_client.sync() + +            # get incoming documents +            docs = yield clean_client.get_from_index('incoming', '1') + +            # process incoming documents +            deferreds = [] +            for doc in docs: + +                # create fake documents that represent message +                for name in PARTS_SIZES.keys(): +                    d = clean_client.create_doc({name: doc.content[name]}) +                    deferreds.append(d) + +                # create one document with content +                key = 'content' +                d = clean_client.create_doc({key: doc.content[key]}) +                deferreds.append(d) + +                # delete the old incoming document +                d = clean_client.delete_doc(doc) +                deferreds.append(d) + +            # wait for all operatios to succeed +            yield gatherResults(deferreds) + +            # sync new documents back to server +            yield clean_client.sync() + +        yield txbenchmark_with_setup(setup, legacy_pipeline) +    return test + + +# ATTENTION: update the documentation in ../docs/benchmarks.rst if you change +# the number of docs or the doc sizes for the tests below. +test_legacy_10_1000k = create_legacy(10, 1000 * 1000) +test_legacy_100_100k = create_legacy(100, 100 * 1000) +test_legacy_1000_10k = create_legacy(1000, 10 * 1000) + + +# +# "Incoming blobs" mail pipeline: +# + +@pytest.inlineCallbacks +def deliver(url, user_uuid, token, payload): +    auth = 'Token %s' % base64.b64encode('%s:%s' % (user_uuid, token)) +    uri = "%s/incoming/%s/%s?namespace=MX" % (url, user_uuid, uuid.uuid4().hex) +    yield treq.put(uri, headers={'Authorization': auth}, +                   data=BytesIO(payload)) + + +def should_brake(i): +    return ((i + 1) % 5) == 0 + + +@pytest.inlineCallbacks +def load_up_blobs(client, amount, payload): +    # create a bunch of local documents representing email messages +    deferreds = [] +    for i in xrange(amount): +        d = deliver(client.server_url, client.uuid, client.token, payload) +        deferreds.append(d) +        if should_brake(i): +            yield gatherResults(deferreds) +            deferreds = [] +    yield gatherResults(deferreds) + + +def create_blobs(downloads, size): +    @pytest.inlineCallbacks +    @pytest.mark.skip(reason="avoid running for all commits") +    @pytest.mark.benchmark(group="test_legacy_vs_blobs") +    def test(soledad_client, txbenchmark_with_setup, payload): +        client = soledad_client() +        blob_payload = payload(size) + +        @pytest.inlineCallbacks +        def setup(): +            yield load_up_blobs(client, downloads, blob_payload) +            returnValue(soledad_client(force_fresh_db=True)) + +        @pytest.inlineCallbacks +        def blobs_pipeline(clean_client): + +            # get list of pending incoming blobs +            pending = yield clean_client.blobmanager.remote_list( +                namespace='MX', filter_flags=Flags.PENDING) + +            # download incoming blobs +            deferreds = [] +            incoming = [] +            i = 0 +            for item in pending: +                d = clean_client.blobmanager.get(item, namespace='MX') +                deferreds.append(d) +                if should_brake(i): +                    incoming += yield gatherResults(deferreds) +                    deferreds = [] +                i += 1 +            incoming += yield gatherResults(deferreds) + +            # create data on local client +            deferreds = [] +            i = 0 +            for item in incoming: +                for name, size in PARTS_SIZES.items(): +                    d = clean_client.create_doc({name: payload(size)}) +                    deferreds.append(d) +                doc = BlobDoc(item, blob_id=uuid.uuid4().hex) +                size = sys.getsizeof(item) +                d = clean_client.blobmanager.put( +                    doc, size, namespace='payload') +                deferreds.append(d) +                if should_brake(i): +                    gatherResults(deferreds) +                    deferreds = [] +                i += 1 +            yield gatherResults(deferreds) + +            # delete incoming from server +            deferreds = [] +            for item in pending: +                d = clean_client.blobmanager.delete(item, namespace='MX') +                deferreds.append(d) +            yield gatherResults(deferreds) + +            # sync and send blobs in parallel +            deferreds = [] +            d = clean_client.sync() +            deferreds.append(d) +            d = clean_client.blobmanager.send_missing(namespace='payload') +            deferreds.append(d) +            yield gatherResults(deferreds) + +        yield txbenchmark_with_setup(setup, blobs_pipeline) +    return test + + +# ATTENTION: update the documentation in ../docs/benchmarks.rst if you change +# the number of docs or the doc sizes for the tests below. +test_blobs_10_1000k = create_blobs(10, 1000 * 1000) +test_blobs_100_100k = create_blobs(100, 100 * 1000) +test_blobs_1000_10k = create_blobs(1000, 10 * 1000) | 
