From 27a70fbbde42166c268c60e624ed11eac7788b55 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Mon, 27 Jan 2014 14:49:59 -0300 Subject: Always return unicode, even on UnicodeError. --- client/changes/bug_return-always-unicode | 1 + client/src/leap/soledad/client/__init__.py | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) create mode 100644 client/changes/bug_return-always-unicode diff --git a/client/changes/bug_return-always-unicode b/client/changes/bug_return-always-unicode new file mode 100644 index 00000000..f4ee51ed --- /dev/null +++ b/client/changes/bug_return-always-unicode @@ -0,0 +1 @@ + o Always return unicode in helper method, even on UnicodeError. Related to #4998. diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 48c703ed..3fb037c8 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -859,7 +859,7 @@ class Soledad(object): def _convert_to_unicode(self, content): """ - Converts content to utf8 (or all the strings in content) + Converts content to unicode (or all the strings in content) NOTE: Even though this method supports any type, it will currently ignore contents of lists, tuple or any other @@ -874,13 +874,14 @@ class Soledad(object): if isinstance(content, unicode): return content elif isinstance(content, str): + result = chardet.detect(content) + default = "utf-8" + encoding = result["encoding"] or default try: - result = chardet.detect(content) - default = "utf-8" - encoding = result["encoding"] or default content = content.decode(encoding) - except UnicodeError: - pass + except UnicodeError as e: + logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) + content = content.decode(encoding, 'replace') return content else: if isinstance(content, dict): -- cgit v1.2.3