From 61212438a57d2450db767860c6e09e43d9e53532 Mon Sep 17 00:00:00 2001
From: Kali Kaneko <kali@leap.se>
Date: Tue, 23 Dec 2014 02:10:47 -0400
Subject: add some benchmarking skeleton

---
 .../soledad/client/examples/benchmarks/.gitignore  |   1 +
 .../client/examples/benchmarks/get_sample.sh       |   3 +
 .../examples/benchmarks/measure_index_times.py     | 177 +++++++++++++++++++++
 .../benchmarks/measure_index_times_custom_docid.py | 177 +++++++++++++++++++++
 4 files changed, 358 insertions(+)
 create mode 100644 client/src/leap/soledad/client/examples/benchmarks/.gitignore
 create mode 100755 client/src/leap/soledad/client/examples/benchmarks/get_sample.sh
 create mode 100644 client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py
 create mode 100644 client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py

diff --git a/client/src/leap/soledad/client/examples/benchmarks/.gitignore b/client/src/leap/soledad/client/examples/benchmarks/.gitignore
new file mode 100644
index 00000000..2211df63
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/.gitignore
@@ -0,0 +1 @@
+*.txt
diff --git a/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh
new file mode 100755
index 00000000..1995eee1
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+mkdir tmp
+wget http://www.gutenberg.org/cache/epub/101/pg101.txt -O hacker_crackdown.txt
diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py
new file mode 100644
index 00000000..7fa1e38f
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+# measure_index_times.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+Measure u1db retrieval times for different u1db index situations.
+"""
+from __future__ import print_function
+from functools import partial
+import datetime
+import hashlib
+import os
+import sys
+
+import u1db
+from twisted.internet import defer, reactor
+
+from leap.soledad.client import adbapi
+from leap.soledad.client.sqlcipher import SQLCipherOptions
+
+
+folder = os.environ.get("TMPDIR", "tmp")
+numdocs = int(os.environ.get("DOCS", "1000"))
+silent = os.environ.get("SILENT", False)
+tmpdb = os.path.join(folder, "test.soledad")
+
+
+sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt")
+sample_path = os.path.join(os.curdir, sample_file)
+
+try:
+    with open(sample_file) as f:
+        SAMPLE = f.readlines()
+except Exception:
+    print("[!] Problem opening sample file. Did you download "
+          "the sample, or correctly set 'SAMPLE' env var?")
+    sys.exit(1)
+
+if numdocs > len(SAMPLE):
+    print("[!] Sorry! The requested DOCS number is larger than "
+          "the num of lines in our sample file")
+    sys.exit(1)
+
+
+def debug(*args):
+    if not silent:
+        print(*args)
+
+debug("[+] db path:", tmpdb)
+debug("[+] num docs", numdocs)
+
+if os.path.isfile(tmpdb):
+    debug("[+] Removing existing db file...")
+    os.remove(tmpdb)
+
+start_time = datetime.datetime.now()
+
+opts = SQLCipherOptions(tmpdb, "secret", create=True)
+dbpool = adbapi.getConnectionPool(opts)
+
+
+def createDoc(doc):
+    return dbpool.runU1DBQuery("create_doc", doc)
+
+db_indexes = {
+    'by-chash': ['chash'],
+    'by-number': ['number']}
+
+
+def create_indexes(_):
+    deferreds = []
+    for index, definition in db_indexes.items():
+        d = dbpool.runU1DBQuery("create_index", index, *definition)
+        deferreds.append(d)
+    return defer.gatherResults(deferreds)
+
+
+class TimeWitness(object):
+    def __init__(self, init_time):
+        self.init_time = init_time
+
+    def get_time_count(self):
+        return datetime.datetime.now() - self.init_time
+
+
+def get_from_index(_):
+    init_time = datetime.datetime.now()
+    debug("GETTING FROM INDEX...", init_time)
+
+    def printValue(res, time):
+        print("RESULT->", res)
+        print("Index Query Took: ", time.get_time_count())
+        return res
+
+    d = dbpool.runU1DBQuery(
+        "get_from_index", "by-chash",
+        #"1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5")
+        # XXX this is line 89 from the hacker crackdown...
+        # Should accept any other optional hash as an enviroment variable.
+        "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469")
+    d.addCallback(printValue, TimeWitness(init_time))
+    return d
+
+
+def getAllDocs():
+    return dbpool.runU1DBQuery("get_all_docs")
+
+
+def errBack(e):
+    debug("[!] ERROR FOUND!!!")
+    e.printTraceback()
+    reactor.stop()
+
+
+def countDocs(_):
+    debug("counting docs...")
+    d = getAllDocs()
+    d.addCallbacks(printResult, errBack)
+    d.addCallbacks(allDone, errBack)
+    return d
+
+
+def printResult(r, **kwargs):
+    if kwargs:
+        debug(*kwargs.values())
+    elif isinstance(r, u1db.Document):
+        debug(r.doc_id, r.content['number'])
+    else:
+        len_results = len(r[1])
+        debug("GOT %s results" % len(r[1]))
+
+        if len_results == numdocs:
+            debug("ALL GOOD")
+        else:
+            debug("[!] MISSING DOCS!!!!!")
+            raise ValueError("We didn't expect this result len")
+
+
+def allDone(_):
+    debug("ALL DONE!")
+
+    #if silent:
+    end_time = datetime.datetime.now()
+    print((end_time - start_time).total_seconds())
+    reactor.stop()
+
+
+def insert_docs(_):
+    deferreds = []
+    for i in range(numdocs):
+        payload = SAMPLE[i]
+        chash = hashlib.sha256(payload).hexdigest()
+        doc = {"number": i, "payload": payload, 'chash': chash}
+        d = createDoc(doc)
+        d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload),
+                       lambda e: e.printTraceback())
+        deferreds.append(d)
+    return defer.gatherResults(deferreds, consumeErrors=True)
+
+d = create_indexes(None)
+d.addCallback(insert_docs)
+d.addCallback(get_from_index)
+d.addCallback(countDocs)
+
+reactor.run()
diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py
new file mode 100644
index 00000000..c6d76e6b
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+# measure_index_times.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+Measure u1db retrieval times for different u1db index situations.
+"""
+from __future__ import print_function
+from functools import partial
+import datetime
+import hashlib
+import os
+import sys
+
+import u1db
+from twisted.internet import defer, reactor
+
+from leap.soledad.client import adbapi
+from leap.soledad.client.sqlcipher import SQLCipherOptions
+
+
+folder = os.environ.get("TMPDIR", "tmp")
+numdocs = int(os.environ.get("DOCS", "1000"))
+silent = os.environ.get("SILENT", False)
+tmpdb = os.path.join(folder, "test.soledad")
+
+
+sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt")
+sample_path = os.path.join(os.curdir, sample_file)
+
+try:
+    with open(sample_file) as f:
+        SAMPLE = f.readlines()
+except Exception:
+    print("[!] Problem opening sample file. Did you download "
+          "the sample, or correctly set 'SAMPLE' env var?")
+    sys.exit(1)
+
+if numdocs > len(SAMPLE):
+    print("[!] Sorry! The requested DOCS number is larger than "
+          "the num of lines in our sample file")
+    sys.exit(1)
+
+
+def debug(*args):
+    if not silent:
+        print(*args)
+
+debug("[+] db path:", tmpdb)
+debug("[+] num docs", numdocs)
+
+if os.path.isfile(tmpdb):
+    debug("[+] Removing existing db file...")
+    os.remove(tmpdb)
+
+start_time = datetime.datetime.now()
+
+opts = SQLCipherOptions(tmpdb, "secret", create=True)
+dbpool = adbapi.getConnectionPool(opts)
+
+
+def createDoc(doc, doc_id):
+    return dbpool.runU1DBQuery("create_doc", doc, doc_id=doc_id)
+
+db_indexes = {
+    'by-chash': ['chash'],
+    'by-number': ['number']}
+
+
+def create_indexes(_):
+    deferreds = []
+    for index, definition in db_indexes.items():
+        d = dbpool.runU1DBQuery("create_index", index, *definition)
+        deferreds.append(d)
+    return defer.gatherResults(deferreds)
+
+
+class TimeWitness(object):
+    def __init__(self, init_time):
+        self.init_time = init_time
+
+    def get_time_count(self):
+        return datetime.datetime.now() - self.init_time
+
+
+def get_from_index(_):
+    init_time = datetime.datetime.now()
+    debug("GETTING FROM INDEX...", init_time)
+
+    def printValue(res, time):
+        print("RESULT->", res)
+        print("Index Query Took: ", time.get_time_count())
+        return res
+
+    d = dbpool.runU1DBQuery(
+        "get_doc",
+        #"1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5")
+        # XXX this is line 89 from the hacker crackdown...
+        # Should accept any other optional hash as an enviroment variable.
+        "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469")
+    d.addCallback(printValue, TimeWitness(init_time))
+    return d
+
+
+def getAllDocs():
+    return dbpool.runU1DBQuery("get_all_docs")
+
+
+def errBack(e):
+    debug("[!] ERROR FOUND!!!")
+    e.printTraceback()
+    reactor.stop()
+
+
+def countDocs(_):
+    debug("counting docs...")
+    d = getAllDocs()
+    d.addCallbacks(printResult, errBack)
+    d.addCallbacks(allDone, errBack)
+    return d
+
+
+def printResult(r, **kwargs):
+    if kwargs:
+        debug(*kwargs.values())
+    elif isinstance(r, u1db.Document):
+        debug(r.doc_id, r.content['number'])
+    else:
+        len_results = len(r[1])
+        debug("GOT %s results" % len(r[1]))
+
+        if len_results == numdocs:
+            debug("ALL GOOD")
+        else:
+            debug("[!] MISSING DOCS!!!!!")
+            raise ValueError("We didn't expect this result len")
+
+
+def allDone(_):
+    debug("ALL DONE!")
+
+    #if silent:
+    end_time = datetime.datetime.now()
+    print((end_time - start_time).total_seconds())
+    reactor.stop()
+
+
+def insert_docs(_):
+    deferreds = []
+    for i in range(numdocs):
+        payload = SAMPLE[i]
+        chash = hashlib.sha256(payload).hexdigest()
+        doc = {"number": i, "payload": payload, 'chash': chash}
+        d = createDoc(doc, doc_id=chash)
+        d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload),
+                       lambda e: e.printTraceback())
+        deferreds.append(d)
+    return defer.gatherResults(deferreds, consumeErrors=True)
+
+d = create_indexes(None)
+d.addCallback(insert_docs)
+d.addCallback(get_from_index)
+d.addCallback(countDocs)
+
+reactor.run()
-- 
cgit v1.2.3