summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKali Kaneko <kali@leap.se>2014-12-23 02:10:47 -0400
committerKali Kaneko <kali@leap.se>2015-02-11 14:03:18 -0400
commit61212438a57d2450db767860c6e09e43d9e53532 (patch)
tree2dae2b26186b310e09f2420ef75f5e7d845edcec
parent661ee9eb1ce29056f219a2a95688840cebcf889a (diff)
add some benchmarking skeleton
-rw-r--r--client/src/leap/soledad/client/examples/benchmarks/.gitignore1
-rwxr-xr-xclient/src/leap/soledad/client/examples/benchmarks/get_sample.sh3
-rw-r--r--client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py177
-rw-r--r--client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py177
4 files changed, 358 insertions, 0 deletions
diff --git a/client/src/leap/soledad/client/examples/benchmarks/.gitignore b/client/src/leap/soledad/client/examples/benchmarks/.gitignore
new file mode 100644
index 00000000..2211df63
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/.gitignore
@@ -0,0 +1 @@
+*.txt
diff --git a/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh
new file mode 100755
index 00000000..1995eee1
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/get_sample.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+mkdir tmp
+wget http://www.gutenberg.org/cache/epub/101/pg101.txt -O hacker_crackdown.txt
diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py
new file mode 100644
index 00000000..7fa1e38f
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+# measure_index_times.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Measure u1db retrieval times for different u1db index situations.
+"""
+from __future__ import print_function
+from functools import partial
+import datetime
+import hashlib
+import os
+import sys
+
+import u1db
+from twisted.internet import defer, reactor
+
+from leap.soledad.client import adbapi
+from leap.soledad.client.sqlcipher import SQLCipherOptions
+
+
+folder = os.environ.get("TMPDIR", "tmp")
+numdocs = int(os.environ.get("DOCS", "1000"))
+silent = os.environ.get("SILENT", False)
+tmpdb = os.path.join(folder, "test.soledad")
+
+
+sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt")
+sample_path = os.path.join(os.curdir, sample_file)
+
+try:
+ with open(sample_file) as f:
+ SAMPLE = f.readlines()
+except Exception:
+ print("[!] Problem opening sample file. Did you download "
+ "the sample, or correctly set 'SAMPLE' env var?")
+ sys.exit(1)
+
+if numdocs > len(SAMPLE):
+ print("[!] Sorry! The requested DOCS number is larger than "
+ "the num of lines in our sample file")
+ sys.exit(1)
+
+
+def debug(*args):
+ if not silent:
+ print(*args)
+
+debug("[+] db path:", tmpdb)
+debug("[+] num docs", numdocs)
+
+if os.path.isfile(tmpdb):
+ debug("[+] Removing existing db file...")
+ os.remove(tmpdb)
+
+start_time = datetime.datetime.now()
+
+opts = SQLCipherOptions(tmpdb, "secret", create=True)
+dbpool = adbapi.getConnectionPool(opts)
+
+
+def createDoc(doc):
+ return dbpool.runU1DBQuery("create_doc", doc)
+
+db_indexes = {
+ 'by-chash': ['chash'],
+ 'by-number': ['number']}
+
+
+def create_indexes(_):
+ deferreds = []
+ for index, definition in db_indexes.items():
+ d = dbpool.runU1DBQuery("create_index", index, *definition)
+ deferreds.append(d)
+ return defer.gatherResults(deferreds)
+
+
+class TimeWitness(object):
+ def __init__(self, init_time):
+ self.init_time = init_time
+
+ def get_time_count(self):
+ return datetime.datetime.now() - self.init_time
+
+
+def get_from_index(_):
+ init_time = datetime.datetime.now()
+ debug("GETTING FROM INDEX...", init_time)
+
+ def printValue(res, time):
+ print("RESULT->", res)
+ print("Index Query Took: ", time.get_time_count())
+ return res
+
+ d = dbpool.runU1DBQuery(
+ "get_from_index", "by-chash",
+ #"1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5")
+ # XXX this is line 89 from the hacker crackdown...
+ # Should accept any other optional hash as an enviroment variable.
+ "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469")
+ d.addCallback(printValue, TimeWitness(init_time))
+ return d
+
+
+def getAllDocs():
+ return dbpool.runU1DBQuery("get_all_docs")
+
+
+def errBack(e):
+ debug("[!] ERROR FOUND!!!")
+ e.printTraceback()
+ reactor.stop()
+
+
+def countDocs(_):
+ debug("counting docs...")
+ d = getAllDocs()
+ d.addCallbacks(printResult, errBack)
+ d.addCallbacks(allDone, errBack)
+ return d
+
+
+def printResult(r, **kwargs):
+ if kwargs:
+ debug(*kwargs.values())
+ elif isinstance(r, u1db.Document):
+ debug(r.doc_id, r.content['number'])
+ else:
+ len_results = len(r[1])
+ debug("GOT %s results" % len(r[1]))
+
+ if len_results == numdocs:
+ debug("ALL GOOD")
+ else:
+ debug("[!] MISSING DOCS!!!!!")
+ raise ValueError("We didn't expect this result len")
+
+
+def allDone(_):
+ debug("ALL DONE!")
+
+ #if silent:
+ end_time = datetime.datetime.now()
+ print((end_time - start_time).total_seconds())
+ reactor.stop()
+
+
+def insert_docs(_):
+ deferreds = []
+ for i in range(numdocs):
+ payload = SAMPLE[i]
+ chash = hashlib.sha256(payload).hexdigest()
+ doc = {"number": i, "payload": payload, 'chash': chash}
+ d = createDoc(doc)
+ d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload),
+ lambda e: e.printTraceback())
+ deferreds.append(d)
+ return defer.gatherResults(deferreds, consumeErrors=True)
+
+d = create_indexes(None)
+d.addCallback(insert_docs)
+d.addCallback(get_from_index)
+d.addCallback(countDocs)
+
+reactor.run()
diff --git a/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py
new file mode 100644
index 00000000..c6d76e6b
--- /dev/null
+++ b/client/src/leap/soledad/client/examples/benchmarks/measure_index_times_custom_docid.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+# measure_index_times.py
+# Copyright (C) 2014 LEAP
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Measure u1db retrieval times for different u1db index situations.
+"""
+from __future__ import print_function
+from functools import partial
+import datetime
+import hashlib
+import os
+import sys
+
+import u1db
+from twisted.internet import defer, reactor
+
+from leap.soledad.client import adbapi
+from leap.soledad.client.sqlcipher import SQLCipherOptions
+
+
+folder = os.environ.get("TMPDIR", "tmp")
+numdocs = int(os.environ.get("DOCS", "1000"))
+silent = os.environ.get("SILENT", False)
+tmpdb = os.path.join(folder, "test.soledad")
+
+
+sample_file = os.environ.get("SAMPLE", "hacker_crackdown.txt")
+sample_path = os.path.join(os.curdir, sample_file)
+
+try:
+ with open(sample_file) as f:
+ SAMPLE = f.readlines()
+except Exception:
+ print("[!] Problem opening sample file. Did you download "
+ "the sample, or correctly set 'SAMPLE' env var?")
+ sys.exit(1)
+
+if numdocs > len(SAMPLE):
+ print("[!] Sorry! The requested DOCS number is larger than "
+ "the num of lines in our sample file")
+ sys.exit(1)
+
+
+def debug(*args):
+ if not silent:
+ print(*args)
+
+debug("[+] db path:", tmpdb)
+debug("[+] num docs", numdocs)
+
+if os.path.isfile(tmpdb):
+ debug("[+] Removing existing db file...")
+ os.remove(tmpdb)
+
+start_time = datetime.datetime.now()
+
+opts = SQLCipherOptions(tmpdb, "secret", create=True)
+dbpool = adbapi.getConnectionPool(opts)
+
+
+def createDoc(doc, doc_id):
+ return dbpool.runU1DBQuery("create_doc", doc, doc_id=doc_id)
+
+db_indexes = {
+ 'by-chash': ['chash'],
+ 'by-number': ['number']}
+
+
+def create_indexes(_):
+ deferreds = []
+ for index, definition in db_indexes.items():
+ d = dbpool.runU1DBQuery("create_index", index, *definition)
+ deferreds.append(d)
+ return defer.gatherResults(deferreds)
+
+
+class TimeWitness(object):
+ def __init__(self, init_time):
+ self.init_time = init_time
+
+ def get_time_count(self):
+ return datetime.datetime.now() - self.init_time
+
+
+def get_from_index(_):
+ init_time = datetime.datetime.now()
+ debug("GETTING FROM INDEX...", init_time)
+
+ def printValue(res, time):
+ print("RESULT->", res)
+ print("Index Query Took: ", time.get_time_count())
+ return res
+
+ d = dbpool.runU1DBQuery(
+ "get_doc",
+ #"1150c7f10fabce0a57ce13071349fc5064f15bdb0cc1bf2852f74ef3f103aff5")
+ # XXX this is line 89 from the hacker crackdown...
+ # Should accept any other optional hash as an enviroment variable.
+ "57793320d4997a673fc7062652da0596c36a4e9fbe31310d2281e67d56d82469")
+ d.addCallback(printValue, TimeWitness(init_time))
+ return d
+
+
+def getAllDocs():
+ return dbpool.runU1DBQuery("get_all_docs")
+
+
+def errBack(e):
+ debug("[!] ERROR FOUND!!!")
+ e.printTraceback()
+ reactor.stop()
+
+
+def countDocs(_):
+ debug("counting docs...")
+ d = getAllDocs()
+ d.addCallbacks(printResult, errBack)
+ d.addCallbacks(allDone, errBack)
+ return d
+
+
+def printResult(r, **kwargs):
+ if kwargs:
+ debug(*kwargs.values())
+ elif isinstance(r, u1db.Document):
+ debug(r.doc_id, r.content['number'])
+ else:
+ len_results = len(r[1])
+ debug("GOT %s results" % len(r[1]))
+
+ if len_results == numdocs:
+ debug("ALL GOOD")
+ else:
+ debug("[!] MISSING DOCS!!!!!")
+ raise ValueError("We didn't expect this result len")
+
+
+def allDone(_):
+ debug("ALL DONE!")
+
+ #if silent:
+ end_time = datetime.datetime.now()
+ print((end_time - start_time).total_seconds())
+ reactor.stop()
+
+
+def insert_docs(_):
+ deferreds = []
+ for i in range(numdocs):
+ payload = SAMPLE[i]
+ chash = hashlib.sha256(payload).hexdigest()
+ doc = {"number": i, "payload": payload, 'chash': chash}
+ d = createDoc(doc, doc_id=chash)
+ d.addCallbacks(partial(printResult, i=i, chash=chash, payload=payload),
+ lambda e: e.printTraceback())
+ deferreds.append(d)
+ return defer.gatherResults(deferreds, consumeErrors=True)
+
+d = create_indexes(None)
+d.addCallback(insert_docs)
+d.addCallback(get_from_index)
+d.addCallback(countDocs)
+
+reactor.run()