From f2c68ba96498f8b88e415aaf27ced735c790510e Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 27 Sep 2013 16:07:29 -0400 Subject: Improve error handling during soledad boostrapping in the client. The aim is to have better logs for debugging the different problems behind issues like #3619 and #3867. As I'm finding a good quantity of SSL handshake timeouts, I'm also adding a litte retry subroutine to the load_and_sync. Also, initialization and sync calls are separeted to be able to correlate logs with server-side soledad. --- .../services/soledad/soledadbootstrapper.py | 205 ++++++++++++++++----- src/leap/bitmask/util/__init__.py | 14 ++ 2 files changed, 170 insertions(+), 49 deletions(-) diff --git a/src/leap/bitmask/services/soledad/soledadbootstrapper.py b/src/leap/bitmask/services/soledad/soledadbootstrapper.py index cac91440..afd7d623 100644 --- a/src/leap/bitmask/services/soledad/soledadbootstrapper.py +++ b/src/leap/bitmask/services/soledad/soledadbootstrapper.py @@ -14,15 +14,15 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . - """ Soledad bootstrapping """ - import logging import os import socket +from ssl import SSLError + from PySide import QtCore from u1db import errors as u1db_errors @@ -32,9 +32,10 @@ from leap.bitmask.crypto.srpauth import SRPAuth from leap.bitmask.services.abstractbootstrapper import AbstractBootstrapper from leap.bitmask.services.soledad.soledadconfig import SoledadConfig from leap.bitmask.util.request_helpers import get_content +from leap.bitmask.util import is_file, is_empty_file from leap.bitmask.util import get_path_prefix -from leap.common.check import leap_assert, leap_assert_type from leap.common.files import get_mtime +from leap.common.check import leap_assert, leap_assert_type, leap_check from leap.keymanager import KeyManager, openpgp from leap.keymanager.errors import KeyNotFound from leap.soledad.client import Soledad @@ -42,6 +43,17 @@ from leap.soledad.client import Soledad logger = logging.getLogger(__name__) +# TODO these exceptions could be moved to soledad itself +# after settling this down. + +class SoledadSyncError(Exception): + message = "Error while syncing Soledad" + + +class SoledadInitError(Exception): + message = "Error while initializing Soledad" + + class SoledadBootstrapper(AbstractBootstrapper): """ Soledad init procedure @@ -109,68 +121,159 @@ class SoledadBootstrapper(AbstractBootstrapper): """ self._soledad_retries += 1 + def _get_db_paths(self, uuid): + """ + Returns the secrets and local db paths needed for soledad + initialization + + :param uuid: uuid for user + :type uuid: str + + :return: a tuple with secrets, local_db paths + :rtype: tuple + """ + prefix = os.path.join(get_path_prefix(), "leap", "soledad") + secrets = "%s/%s.secret" % (prefix, uuid) + local_db = "%s/%s.db" % (prefix, uuid) + + # We remove an empty file if found to avoid complains + # about the db not being properly initialized + if is_file(local_db) and is_empty_file(local_db): + try: + os.remove(local_db) + except OSError: + logger.warning("Could not remove empty file %s" + % local_db) + return secrets, local_db + # initialization def load_and_sync_soledad(self): """ Once everthing is in the right place, we instantiate and sync Soledad - - :param srp_auth: SRPAuth object used - :type srp_auth: SRPAuth """ - srp_auth = self.srpauth - uuid = srp_auth.get_uid() + uuid = self.srpauth.get_uid() + token = self.srpauth.get_token() - prefix = os.path.join(get_path_prefix(), "leap", "soledad") - secrets_path = "%s/%s.secret" % (prefix, uuid) - local_db_path = "%s/%s.db" % (prefix, uuid) + secrets_path, local_db_path = self._get_db_paths(uuid) # TODO: Select server based on timezone (issue #3308) server_dict = self._soledad_config.get_hosts() - if server_dict.keys(): - selected_server = server_dict[server_dict.keys()[0]] - server_url = "https://%s:%s/user-%s" % ( - selected_server["hostname"], - selected_server["port"], - uuid) + if not server_dict.keys(): + # XXX raise more specific exception + raise Exception("No soledad server found") + + selected_server = server_dict[server_dict.keys()[0]] + server_url = "https://%s:%s/user-%s" % ( + selected_server["hostname"], + selected_server["port"], + uuid) + + logger.debug("Using soledad server url: %s" % (server_url,)) - logger.debug("Using soledad server url: %s" % (server_url,)) + cert_file = self._provider_config.get_ca_cert_path() - cert_file = self._provider_config.get_ca_cert_path() + logger.debug('local_db:%s' % (local_db_path,)) + logger.debug('secrets_path:%s' % (secrets_path,)) - # TODO: If selected server fails, retry with another host - # (issue #3309) + try: + self._try_soledad_init( + uuid, secrets_path, local_db_path, + server_url, cert_file, token) + except: + # re-raise the exceptions from try_init, + # we're currently handling the retries from the + # soledad-launcher in the gui. + raise + + leap_check(self._soledad is not None, + "Null soledad, error while initializing") + + # and now, let's sync + sync_tries = 10 + while sync_tries > 0: try: - self._soledad = Soledad( - uuid, - self._password.encode("utf-8"), - secrets_path=secrets_path, - local_db_path=local_db_path, - server_url=server_url, - cert_file=cert_file, - auth_token=srp_auth.get_token()) - self._soledad.sync() - - # XXX All these errors should be handled by soledad itself, - # and return a subclass of SoledadInitializationFailed - except socket.timeout: - logger.debug("SOLEDAD TIMED OUT...") - self.soledad_timeout.emit() - except socket.error as exc: - logger.error("Socket error while initializing soledad") - self.soledad_failed.emit() - except u1db_errors.Unauthorized: - logger.error("Error while initializing soledad " - "(unauthorized).") - self.soledad_failed.emit() - except Exception as exc: - logger.error("Unhandled error while initializating " + self._try_soledad_sync() + logger.debug("Soledad has been synced.") + # so long, and thanks for all the fish + return + except SoledadSyncError: + # maybe it's my connection, but I'm getting + # ssl handshake timeouts and read errors quite often. + # A particularly big sync is a disaster. + # This deserves further investigation, maybe the + # retry strategy can be pushed to u1db, or at least + # it's something worthy to talk about with the + # ubuntu folks. + sync_tries -= 1 + continue + + # reached bottom, failed to sync + # and there's nothing we can do... + self.soledad_failed.emit() + raise SoledadSyncError() + + def _try_soledad_init(self, uuid, secrets_path, local_db_path, + server_url, cert_file, auth_token): + """ + Tries to initialize soledad. + + :param uuid: user identifier + :param secrets_path: path to secrets file + :param local_db_path: path to local db file + :param server_url: soledad server uri + :cert_file: + :param auth token: auth token + :type auth_token: str + """ + + # TODO: If selected server fails, retry with another host + # (issue #3309) + try: + self._soledad = Soledad( + uuid, + self._password.encode("utf-8"), + secrets_path=secrets_path, + local_db_path=local_db_path, + server_url=server_url, + cert_file=cert_file, + auth_token=auth_token) + + # XXX All these errors should be handled by soledad itself, + # and return a subclass of SoledadInitializationFailed + except socket.timeout: + logger.debug("SOLEDAD initialization TIMED OUT...") + self.soledad_timeout.emit() + except socket.error as exc: + logger.error("Socket error while initializing soledad") + self.soledad_failed.emit() + except u1db_errors.Unauthorized: + logger.error("Error while initializing soledad " + "(unauthorized).") + self.soledad_failed.emit() + except Exception as exc: + logger.exception("Unhandled error while initializating " "soledad: %r" % (exc,)) - raise - else: - raise Exception("No soledad server found") + self.soledad_failed.emit() + + def _try_soledad_sync(self): + """ + Tries to sync soledad. + Raises SoledadSyncError if not successful. + """ + try: + logger.error("trying to sync soledad....") + self._soledad.sync() + except SSLError as exc: + logger.error("%r" % (exc,)) + raise SoledadSyncError("Failed to sync soledad") + except Exception as exc: + logger.exception("Unhandled error while syncing" + "soledad: %r" % (exc,)) + self.soledad_failed.emit() + raise SoledadSyncError("Failed to sync soledad") def _download_config(self): """ @@ -185,6 +288,8 @@ class SoledadBootstrapper(AbstractBootstrapper): self._soledad_config = SoledadConfig() + # TODO factor out with eip/provider configs. + headers = {} mtime = get_mtime( os.path.join(get_path_prefix(), "leap", "providers", @@ -242,8 +347,10 @@ class SoledadBootstrapper(AbstractBootstrapper): Generates the key pair if needed, uploads it to the webapp and nickserver """ - leap_assert(self._provider_config, + leap_assert(self._provider_config is not None, "We need a provider configuration!") + leap_assert(self._soledad is not None, + "We need a non-null soledad to generate keys") address = "%s@%s" % (self._user, self._provider_config.get_domain()) diff --git a/src/leap/bitmask/util/__init__.py b/src/leap/bitmask/util/__init__.py index f762a350..b58e6e3b 100644 --- a/src/leap/bitmask/util/__init__.py +++ b/src/leap/bitmask/util/__init__.py @@ -62,3 +62,17 @@ def update_modification_ts(path): """ os.utime(path, None) return get_modification_ts(path) + + +def is_file(path): + """ + Returns True if the path exists and is a file. + """ + return os.path.isfile(path) + + +def is_empty_file(path): + """ + Returns True if the file at path is empty. + """ + return os.stat(path).st_size is 0 -- cgit v1.2.3 From e55af608c81284f50133f53df6f49d69da0756b7 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 27 Sep 2013 17:29:10 -0400 Subject: add comments --- src/leap/bitmask/gui/mainwindow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/leap/bitmask/gui/mainwindow.py b/src/leap/bitmask/gui/mainwindow.py index 200d68aa..33af37e3 100644 --- a/src/leap/bitmask/gui/mainwindow.py +++ b/src/leap/bitmask/gui/mainwindow.py @@ -1007,6 +1007,7 @@ class MainWindow(QtGui.QMainWindow): """ Retries soledad connection. """ + # XXX should move logic to soledad boostrapper itself logger.debug("Retrying soledad connection.") if self._soledad_bootstrapper.should_retry_initialization(): self._soledad_bootstrapper.increment_retries_count() @@ -1031,8 +1032,9 @@ class MainWindow(QtGui.QMainWindow): """ passed = data[self._soledad_bootstrapper.PASSED_KEY] if not passed: + # TODO should actually *display* on the panel. logger.debug("ERROR on soledad bootstrapping:") - logger.error(data[self._soledad_bootstrapper.ERROR_KEY]) + logger.error("%r" % data[self._soledad_bootstrapper.ERROR_KEY]) return else: logger.debug("Done bootstrapping Soledad") -- cgit v1.2.3 From aea7b3dde14074d7462b5a0854dfad9d5d60e330 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 27 Sep 2013 17:29:58 -0400 Subject: remove unused import --- src/leap/bitmask/services/eip/eipbootstrapper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/leap/bitmask/services/eip/eipbootstrapper.py b/src/leap/bitmask/services/eip/eipbootstrapper.py index 885c4420..5a238a1c 100644 --- a/src/leap/bitmask/services/eip/eipbootstrapper.py +++ b/src/leap/bitmask/services/eip/eipbootstrapper.py @@ -28,7 +28,6 @@ from leap.bitmask.services import download_service_config from leap.bitmask.services.abstractbootstrapper import AbstractBootstrapper from leap.bitmask.services.eip.eipconfig import EIPConfig from leap.common import certs as leap_certs -from leap.bitmask.util import get_path_prefix from leap.common.check import leap_assert, leap_assert_type from leap.common.files import check_and_fix_urw_only -- cgit v1.2.3 From 7eb0052c95585cd71f61a5477a963f6357b34400 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 27 Sep 2013 17:32:40 -0400 Subject: refactor to use generic download_service_config --- .../services/soledad/soledadbootstrapper.py | 146 +++++++++------------ 1 file changed, 62 insertions(+), 84 deletions(-) diff --git a/src/leap/bitmask/services/soledad/soledadbootstrapper.py b/src/leap/bitmask/services/soledad/soledadbootstrapper.py index afd7d623..835e4cd9 100644 --- a/src/leap/bitmask/services/soledad/soledadbootstrapper.py +++ b/src/leap/bitmask/services/soledad/soledadbootstrapper.py @@ -29,13 +29,13 @@ from u1db import errors as u1db_errors from leap.bitmask.config import flags from leap.bitmask.config.providerconfig import ProviderConfig from leap.bitmask.crypto.srpauth import SRPAuth +from leap.bitmask.services import download_service_config from leap.bitmask.services.abstractbootstrapper import AbstractBootstrapper from leap.bitmask.services.soledad.soledadconfig import SoledadConfig -from leap.bitmask.util.request_helpers import get_content from leap.bitmask.util import is_file, is_empty_file from leap.bitmask.util import get_path_prefix -from leap.common.files import get_mtime from leap.common.check import leap_assert, leap_assert_type, leap_check +from leap.common.files import which from leap.keymanager import KeyManager, openpgp from leap.keymanager.errors import KeyNotFound from leap.soledad.client import Soledad @@ -58,13 +58,13 @@ class SoledadBootstrapper(AbstractBootstrapper): """ Soledad init procedure """ - SOLEDAD_KEY = "soledad" KEYMANAGER_KEY = "keymanager" PUBKEY_KEY = "user[public_key]" MAX_INIT_RETRIES = 10 + MAX_SYNC_RETRIES = 10 # All dicts returned are of the form # {"passed": bool, "error": str} @@ -80,6 +80,7 @@ class SoledadBootstrapper(AbstractBootstrapper): self._soledad_config = None self._keymanager = None self._download_if_needed = False + self._user = "" self._password = "" self._srpauth = None @@ -153,6 +154,7 @@ class SoledadBootstrapper(AbstractBootstrapper): Once everthing is in the right place, we instantiate and sync Soledad """ + # TODO this method is still too large uuid = self.srpauth.get_uid() token = self.srpauth.get_token() @@ -162,7 +164,7 @@ class SoledadBootstrapper(AbstractBootstrapper): server_dict = self._soledad_config.get_hosts() if not server_dict.keys(): - # XXX raise more specific exception + # XXX raise more specific exception, and catch it properly! raise Exception("No soledad server found") selected_server = server_dict[server_dict.keys()[0]] @@ -170,7 +172,6 @@ class SoledadBootstrapper(AbstractBootstrapper): selected_server["hostname"], selected_server["port"], uuid) - logger.debug("Using soledad server url: %s" % (server_url,)) cert_file = self._provider_config.get_ca_cert_path() @@ -192,10 +193,14 @@ class SoledadBootstrapper(AbstractBootstrapper): "Null soledad, error while initializing") # and now, let's sync - sync_tries = 10 + sync_tries = self.MAX_SYNC_RETRIES while sync_tries > 0: try: self._try_soledad_sync() + + # at this point, sometimes the client + # gets stuck and does not progress to + # the _gen_key step. XXX investigate. logger.debug("Soledad has been synced.") # so long, and thanks for all the fish return @@ -224,11 +229,13 @@ class SoledadBootstrapper(AbstractBootstrapper): :param secrets_path: path to secrets file :param local_db_path: path to local db file :param server_url: soledad server uri - :cert_file: + :param cert_file: path to the certificate of the ca used + to validate the SSL certificate used by the remote + soledad server. + :type cert_file: str :param auth token: auth token :type auth_token: str """ - # TODO: If selected server fails, retry with another host # (issue #3309) try: @@ -282,90 +289,46 @@ class SoledadBootstrapper(AbstractBootstrapper): leap_assert(self._provider_config, "We need a provider configuration!") - logger.debug("Downloading Soledad config for %s" % (self._provider_config.get_domain(),)) self._soledad_config = SoledadConfig() - - # TODO factor out with eip/provider configs. - - headers = {} - mtime = get_mtime( - os.path.join(get_path_prefix(), "leap", "providers", - self._provider_config.get_domain(), - "soledad-service.json")) - - if self._download_if_needed and mtime: - headers['if-modified-since'] = mtime - - api_version = self._provider_config.get_api_version() - - # there is some confusion with this uri, - config_uri = "%s/%s/config/soledad-service.json" % ( - self._provider_config.get_api_uri(), - api_version) - logger.debug('Downloading soledad config from: %s' % config_uri) - - # TODO factor out this srpauth protected get (make decorator) - srp_auth = self.srpauth - session_id = srp_auth.get_session_id() - cookies = None - if session_id: - cookies = {"_session_id": session_id} - - res = self._session.get(config_uri, - verify=self._provider_config - .get_ca_cert_path(), - headers=headers, - cookies=cookies) - res.raise_for_status() - - self._soledad_config.set_api_version(api_version) - - # Not modified - if res.status_code == 304: - logger.debug("Soledad definition has not been modified") - self._soledad_config.load( - os.path.join( - "leap", "providers", - self._provider_config.get_domain(), - "soledad-service.json")) - else: - soledad_definition, mtime = get_content(res) - - self._soledad_config.load(data=soledad_definition, mtime=mtime) - self._soledad_config.save(["leap", - "providers", - self._provider_config.get_domain(), - "soledad-service.json"]) - + download_service_config( + self._provider_config, + self._soledad_config, + self._session, + self._download_if_needed) + + # soledad config is ok, let's proceed to load and sync soledad + # XXX but honestly, this is a pretty strange entry point for that. + # it feels like it should be the other way around: + # load_and_sync, and from there, if needed, call download_config self.load_and_sync_soledad() - def _gen_key(self, _): + def _get_gpg_bin_path(self): """ - Generates the key pair if needed, uploads it to the webapp and - nickserver + Returns the path to gpg binary. + :returns: the gpg binary path + :rtype: str """ - leap_assert(self._provider_config is not None, - "We need a provider configuration!") - leap_assert(self._soledad is not None, - "We need a non-null soledad to generate keys") - - address = "%s@%s" % (self._user, self._provider_config.get_domain()) - - logger.debug("Retrieving key for %s" % (address,)) - - srp_auth = self.srpauth - - # TODO: use which implementation with known paths # TODO: Fix for Windows - gpgbin = "/usr/bin/gpg" - + gpgbin = None if flags.STANDALONE: - gpgbin = os.path.join(get_path_prefix(), - "..", "apps", "mail", "gpg") + gpgbin = os.path.join( + get_path_prefix(), "..", "apps", "mail", "gpg") + else: + gpgbin = which("gpg") + leap_check(gpgbin is not None, "Could not find gpg binary") + return gpgbin + def _init_keymanager(self, address): + """ + Initializes the keymanager. + :param address: the address to initialize the keymanager with. + :type address: str + """ + srp_auth = self.srpauth + logger.debug('initializing keymanager...') self._keymanager = KeyManager( address, "https://nicknym.%s:6425" % (self._provider_config.get_domain(),), @@ -376,10 +339,25 @@ class SoledadBootstrapper(AbstractBootstrapper): api_uri=self._provider_config.get_api_uri(), api_version=self._provider_config.get_api_version(), uid=srp_auth.get_uid(), - gpgbinary=gpgbin) + gpgbinary=self._get_gpg_bin_path()) + + def _gen_key(self, _): + """ + Generates the key pair if needed, uploads it to the webapp and + nickserver + """ + leap_assert(self._provider_config is not None, + "We need a provider configuration!") + leap_assert(self._soledad is not None, + "We need a non-null soledad to generate keys") + + address = "%s@%s" % (self._user, self._provider_config.get_domain()) + self._init_keymanager(address) + logger.debug("Retrieving key for %s" % (address,)) + try: - self._keymanager.get_key(address, openpgp.OpenPGPKey, - private=True, fetch_remote=False) + self._keymanager.get_key( + address, openpgp.OpenPGPKey, private=True, fetch_remote=False) except KeyNotFound: logger.debug("Key not found. Generating key for %s" % (address,)) self._keymanager.gen_key(openpgp.OpenPGPKey) -- cgit v1.2.3 From 2f13fb345ef0d400b099956654958025c7ef7392 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 27 Sep 2013 17:46:08 -0400 Subject: make socket errors during initialization recoverable --- src/leap/bitmask/gui/mainwindow.py | 2 ++ src/leap/bitmask/services/soledad/soledadbootstrapper.py | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/leap/bitmask/gui/mainwindow.py b/src/leap/bitmask/gui/mainwindow.py index 33af37e3..947ce58c 100644 --- a/src/leap/bitmask/gui/mainwindow.py +++ b/src/leap/bitmask/gui/mainwindow.py @@ -234,6 +234,8 @@ class MainWindow(QtGui.QMainWindow): self._soledad_bootstrapped_stage) self._soledad_bootstrapper.soledad_timeout.connect( self._retry_soledad_connection) + # XXX missing connect to soledad_failed (signal unrecoverable to user) + # TODO wait until chiiph ui refactor. self._smtp_bootstrapper = SMTPBootstrapper() self._smtp_bootstrapper.download_config.connect( diff --git a/src/leap/bitmask/services/soledad/soledadbootstrapper.py b/src/leap/bitmask/services/soledad/soledadbootstrapper.py index 835e4cd9..d348661d 100644 --- a/src/leap/bitmask/services/soledad/soledadbootstrapper.py +++ b/src/leap/bitmask/services/soledad/soledadbootstrapper.py @@ -250,12 +250,16 @@ class SoledadBootstrapper(AbstractBootstrapper): # XXX All these errors should be handled by soledad itself, # and return a subclass of SoledadInitializationFailed + + # recoverable, will guarantee retries except socket.timeout: logger.debug("SOLEDAD initialization TIMED OUT...") self.soledad_timeout.emit() except socket.error as exc: logger.error("Socket error while initializing soledad") - self.soledad_failed.emit() + self.soledad_timeout.emit() + + # unrecoverable except u1db_errors.Unauthorized: logger.error("Error while initializing soledad " "(unauthorized).") -- cgit v1.2.3 From decae946e725ce5275f8035e33a9d6adb69b3342 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 27 Sep 2013 17:57:07 -0400 Subject: add changes file --- changes/feature_3965_soledad-bootstrap-error-handling | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 changes/feature_3965_soledad-bootstrap-error-handling diff --git a/changes/feature_3965_soledad-bootstrap-error-handling b/changes/feature_3965_soledad-bootstrap-error-handling new file mode 100644 index 00000000..d9f16378 --- /dev/null +++ b/changes/feature_3965_soledad-bootstrap-error-handling @@ -0,0 +1,2 @@ + o Improve error handling during soledad bootstrap. Closes: #3965. + Affects: #3619, #3867, #3966 -- cgit v1.2.3