From 79b3b48170aed5d975b8e664eb85b2a99ca578f6 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Sun, 9 Aug 2015 14:02:56 -0400 Subject: [feat] update profile-sync script, add plop profiling Updating the profile-sync script to the latest deferred-based sync. - Added a couple of options to have finer control about what output to get. - Add support for the plop profiler https://pypi.python.org/pypi/plop/ - To get meaningful plop profiles, make sure to disable the system stats collection, like this:: ./profile-sync.py --no-stats --plop -b /tmp/syncdata/ -p sikret user@provider A good practice for this is having a pre-seeded soledad account (currently you have to do that through the wizard, a cli will be very nice to have in the coming future) with a known amount of data (for instance, sending some mails with known payload weight). It is VERY IMPORTANT that you *NEVER* process the data in this account (ie, do not ever log in with a mail client, for instance, since that will alter the original documents). In order to have comparable results, you should always run this sync script in similar conditions. Ideally, on a machine with runlevel 1. Also, make sure of deleting the contents in the folder if you are not using a temporal dir. --- scripts/profiling/sync/profile-sync.py | 84 +++++++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 12 deletions(-) mode change 100644 => 100755 scripts/profiling/sync/profile-sync.py (limited to 'scripts/profiling/sync/profile-sync.py') diff --git a/scripts/profiling/sync/profile-sync.py b/scripts/profiling/sync/profile-sync.py old mode 100644 new mode 100755 index fdd5b5a6..6c8d249e --- a/scripts/profiling/sync/profile-sync.py +++ b/scripts/profiling/sync/profile-sync.py @@ -1,13 +1,16 @@ -#!/usr/bin/python - +#!/usr/bin/env python import argparse +import commands +import getpass import logging +import tempfile +from datetime import datetime +from twisted.internet import reactor from util import StatsLogger, ValidateUserHandle -from client_side_db import get_soledad_instance -#from plot import plot +from client_side_db import _get_soledad_instance, _get_soledad_info # create a logger @@ -15,6 +18,15 @@ logger = logging.getLogger(__name__) LOG_FORMAT = '%(asctime)s %(message)s' logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) +GITVER = commands.getoutput('git describe') + + +def get_and_run_plop_collector(): + from plop.collector import Collector + collector = Collector() + collector.start() + return collector + # main program @@ -33,6 +45,16 @@ if __name__ == '__main__': parser.add_argument( '-l', dest='logfile', required=False, default='/tmp/profile.log', help='the file to which write the log') + parser.add_argument( + '--no-stats', dest='do_stats', action='store_false', + help='skip system stats') + parser.add_argument( + '--plot', dest='do_plot', action='store_true', + help='do a graphical plot') + parser.add_argument( + '--plop', dest='do_plop', action='store_true', + help='run sync script under plop profiler') + parser.set_defaults(do_stats=True, do_plot=False, do_plop=False) args = parser.parse_args() # get the password @@ -47,16 +69,54 @@ if __name__ == '__main__': basedir = tempfile.mkdtemp() logger.info('Using %s as base directory.' % basedir) + uuid, server_url, cert_file, token = \ + _get_soledad_info( + args.username, args.provider, passphrase, basedir) # get the soledad instance - s = get_soledad_instance( - args.username, args.provider, passphrase, basedir) + s = _get_soledad_instance( + uuid, passphrase, basedir, server_url, cert_file, token) + for i in xrange(10): + # XXX Profile this with more realistic payloads s.create_doc({}) - sl = StatsLogger( - "soledad-sync", args.logfile, procs=["python"], interval=0.001) - sl.start() - s.sync() - sl.stop() + def start_sync(): + if args.do_stats: + sl = StatsLogger( + "soledad-sync", args.logfile, procs=["python"], interval=0.001) + sl.start() + else: + sl = None + + if args.do_plop: + plop_collector = get_and_run_plop_collector() + else: + plop_collector = None + + t0 = datetime.now() + d = s.sync() + d.addCallback(onSyncDone, sl, t0, plop_collector) + + def onSyncDone(sync_result, sl, t0, plop_collector): + print "GOT SYNC RESULT: ", sync_result + t1 = datetime.now() + if sl: + sl.stop() + if plop_collector: + from plop.collector import PlopFormatter + formatter = PlopFormatter() + plop_collector.stop() + # XXX mkdir profiles dir if not exist + with open('profiles/plop-sync-%s' % GITVER, 'w') as f: + f.write(formatter.format(plop_collector)) + + delta = (t1 - t0).total_seconds() + print "[+] Sync took %s seconds." % delta + reactor.stop() + + if args.do_plot: + from plot import plot + plot(args.logfile) - #plot(args.logfile) + reactor.callWhenRunning(start_sync) + reactor.run() -- cgit v1.2.3 From d41aad86c8ec1b071b09feaa61a04bf3dc3d5b90 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Sun, 9 Aug 2015 14:39:11 -0400 Subject: [bug] disable events on profiling --- scripts/profiling/sync/profile-sync.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'scripts/profiling/sync/profile-sync.py') diff --git a/scripts/profiling/sync/profile-sync.py b/scripts/profiling/sync/profile-sync.py index 6c8d249e..fb887588 100755 --- a/scripts/profiling/sync/profile-sync.py +++ b/scripts/profiling/sync/profile-sync.py @@ -11,6 +11,9 @@ from twisted.internet import reactor from util import StatsLogger, ValidateUserHandle from client_side_db import _get_soledad_instance, _get_soledad_info +from leap.common.events import flags + +flags.set_events_enabled(False) # create a logger -- cgit v1.2.3 From 75c833df2a14a754789976d60bbd98e3ca0cd257 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Sun, 9 Aug 2015 15:22:29 -0400 Subject: [feat] add support for twisted-aware theseus profiler --- scripts/profiling/sync/profile-sync.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'scripts/profiling/sync/profile-sync.py') diff --git a/scripts/profiling/sync/profile-sync.py b/scripts/profiling/sync/profile-sync.py index fb887588..5f01f1f5 100755 --- a/scripts/profiling/sync/profile-sync.py +++ b/scripts/profiling/sync/profile-sync.py @@ -31,6 +31,12 @@ def get_and_run_plop_collector(): return collector +def get_and_run_theseus_tracer(): + from theseus import Tracer + t = Tracer() + t.install() + return t + # main program if __name__ == '__main__': @@ -57,7 +63,11 @@ if __name__ == '__main__': parser.add_argument( '--plop', dest='do_plop', action='store_true', help='run sync script under plop profiler') - parser.set_defaults(do_stats=True, do_plot=False, do_plop=False) + parser.add_argument( + '--theseus', dest='do_theseus', action='store_true', + help='run sync script under theseus profiler') + parser.set_defaults( + do_stats=True, do_plot=False, do_plop=False, do_theseus=False) args = parser.parse_args() # get the password @@ -79,8 +89,11 @@ if __name__ == '__main__': s = _get_soledad_instance( uuid, passphrase, basedir, server_url, cert_file, token) + # TODO Profile this with more realistic payloads + # TODO Add option to disable sending new docs. If we're profiling + # receiving against a fixed account, this will alter each run's results. + for i in xrange(10): - # XXX Profile this with more realistic payloads s.create_doc({}) def start_sync(): @@ -96,11 +109,16 @@ if __name__ == '__main__': else: plop_collector = None + if args.do_theseus: + theseus = get_and_run_theseus_tracer() + else: + theseus = None + t0 = datetime.now() d = s.sync() - d.addCallback(onSyncDone, sl, t0, plop_collector) + d.addCallback(onSyncDone, sl, t0, plop_collector, theseus) - def onSyncDone(sync_result, sl, t0, plop_collector): + def onSyncDone(sync_result, sl, t0, plop_collector, theseus): print "GOT SYNC RESULT: ", sync_result t1 = datetime.now() if sl: @@ -112,6 +130,10 @@ if __name__ == '__main__': # XXX mkdir profiles dir if not exist with open('profiles/plop-sync-%s' % GITVER, 'w') as f: f.write(formatter.format(plop_collector)) + if theseus: + with open('callgrind.theseus', 'wb') as outfile: + theseus.write_data(outfile) + theseus.uninstall() delta = (t1 - t0).total_seconds() print "[+] Sync took %s seconds." % delta -- cgit v1.2.3 From bec13f649e9262e1cf1d80825f2a19083fb0904a Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 10 Aug 2015 11:27:16 -0400 Subject: [feat] control sending messages in script - ability to disable sending (to get raw receive times) - ability to specify size and number of payloads - ability to point to a file to be used as raw source of payloads --- scripts/profiling/sync/profile-sync.py | 60 +++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 8 deletions(-) (limited to 'scripts/profiling/sync/profile-sync.py') diff --git a/scripts/profiling/sync/profile-sync.py b/scripts/profiling/sync/profile-sync.py index 5f01f1f5..9ef2ea92 100755 --- a/scripts/profiling/sync/profile-sync.py +++ b/scripts/profiling/sync/profile-sync.py @@ -4,6 +4,8 @@ import argparse import commands import getpass import logging +import mmap +import os import tempfile from datetime import datetime @@ -37,6 +39,35 @@ def get_and_run_theseus_tracer(): t.install() return t + +def bail(msg): + print "[!] %s" % msg + + +def create_docs(soledad, args): + """ + Populates the soledad database with dummy messages, so we can exercise + sending payloads during the sync. + """ + sample_path = args.payload_f + if not sample_path: + bail('Need to pass a --payload-file') + return + if not os.path.isfile(sample_path): + bail('--payload-file does not exist!') + return + + numdocs = args.send_num + docsize = args.send_size + + # XXX this will FAIL if the payload source is smaller to size * num + # XXX could use a cycle iterator + with open(sample_path, "r+b") as sample_f: + fmap = mmap.mmap(sample_f.fileno(), 0, prot=mmap.PROT_READ) + for index in xrange(numdocs): + payload = fmap.read(docsize * 1024) + s.create_doc({payload: payload}) + # main program if __name__ == '__main__': @@ -54,6 +85,18 @@ if __name__ == '__main__': parser.add_argument( '-l', dest='logfile', required=False, default='/tmp/profile.log', help='the file to which write the log') + parser.add_argument( + '--no-send', dest='do_send', action='store_false', + help='skip sending messages') + parser.add_argument( + '--send-size', dest='send_size', default=10, + help='size of doc to send, in KB (default: 10)') + parser.add_argument( + '--send-num', dest='send_num', default=10, + help='number of docs to send (default: 10)') + parser.add_argument( + '--payload-file', dest="payload_f", default=None, + help='path to a sample file to use for the payloads') parser.add_argument( '--no-stats', dest='do_stats', action='store_false', help='skip system stats') @@ -67,7 +110,9 @@ if __name__ == '__main__': '--theseus', dest='do_theseus', action='store_true', help='run sync script under theseus profiler') parser.set_defaults( - do_stats=True, do_plot=False, do_plop=False, do_theseus=False) + do_send=True, do_stats=True, do_plot=False, do_plop=False, + do_theseus=False, + ) args = parser.parse_args() # get the password @@ -89,12 +134,8 @@ if __name__ == '__main__': s = _get_soledad_instance( uuid, passphrase, basedir, server_url, cert_file, token) - # TODO Profile this with more realistic payloads - # TODO Add option to disable sending new docs. If we're profiling - # receiving against a fixed account, this will alter each run's results. - - for i in xrange(10): - s.create_doc({}) + if args.do_send: + create_docs(s, args) def start_sync(): if args.do_stats: @@ -119,6 +160,7 @@ if __name__ == '__main__': d.addCallback(onSyncDone, sl, t0, plop_collector, theseus) def onSyncDone(sync_result, sl, t0, plop_collector, theseus): + # TODO should write this to a result file print "GOT SYNC RESULT: ", sync_result t1 = datetime.now() if sl: @@ -127,7 +169,8 @@ if __name__ == '__main__': from plop.collector import PlopFormatter formatter = PlopFormatter() plop_collector.stop() - # XXX mkdir profiles dir if not exist + if not os.path.isdir('profiles'): + os.mkdir('profiles') with open('profiles/plop-sync-%s' % GITVER, 'w') as f: f.write(formatter.format(plop_collector)) if theseus: @@ -136,6 +179,7 @@ if __name__ == '__main__': theseus.uninstall() delta = (t1 - t0).total_seconds() + # TODO should write this to a result file print "[+] Sync took %s seconds." % delta reactor.stop() -- cgit v1.2.3