diff options
-rw-r--r-- | client/changes/next-changelog.rst | 2 | ||||
-rw-r--r-- | client/src/leap/soledad/client/api.py | 46 |
2 files changed, 3 insertions, 45 deletions
diff --git a/client/changes/next-changelog.rst b/client/changes/next-changelog.rst index 6e97386c..050d84be 100644 --- a/client/changes/next-changelog.rst +++ b/client/changes/next-changelog.rst @@ -16,6 +16,8 @@ Features Bugfixes ~~~~~~~~ - `#1235 <https://leap.se/code/issues/1235>`_: Description for the fixed stuff corresponding with issue #1235. +- Remove document content conversion to unicode. Users of API are responsible + for only passing valid JSON to Soledad for storage. - Bugfix without related issue number. Misc diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py index 2477350e..d83291e7 100644 --- a/client/src/leap/soledad/client/api.py +++ b/client/src/leap/soledad/client/api.py @@ -35,10 +35,6 @@ import ssl import uuid import urlparse -try: - import cchardet as chardet -except ImportError: - import chardet from itertools import chain from StringIO import StringIO @@ -357,7 +353,6 @@ class Soledad(object): also be updated. :rtype: twisted.internet.defer.Deferred """ - doc.content = _convert_to_unicode(doc.content) return self._defer("put_doc", doc) def delete_doc(self, doc): @@ -452,8 +447,7 @@ class Soledad(object): # create_doc (and probably to put_doc too). There are cases (mail # payloads for example) in which we already have the encoding in the # headers, so we don't need to guess it. - return self._defer( - "create_doc", _convert_to_unicode(content), doc_id=doc_id) + return self._defer("create_doc", content, doc_id=doc_id) def create_doc_from_json(self, json, doc_id=None): """ @@ -974,44 +968,6 @@ class Soledad(object): return self.create_doc(doc) -def _convert_to_unicode(content): - """ - Convert content to unicode (or all the strings in content). - - NOTE: Even though this method supports any type, it will - currently ignore contents of lists, tuple or any other - iterable than dict. We don't need support for these at the - moment - - :param content: content to convert - :type content: object - - :rtype: object - """ - # Chardet doesn't guess very well with some smallish payloads. - # This parameter might need some empirical tweaking. - CUTOFF_CONFIDENCE = 0.90 - - if isinstance(content, unicode): - return content - elif isinstance(content, str): - encoding = "utf-8" - result = chardet.detect(content) - if result["confidence"] > CUTOFF_CONFIDENCE: - encoding = result["encoding"] - try: - content = content.decode(encoding) - except UnicodeError as e: - logger.error("Unicode error: {0!r}. Using 'replace'".format(e)) - content = content.decode(encoding, 'replace') - return content - else: - if isinstance(content, dict): - for key in content.keys(): - content[key] = _convert_to_unicode(content[key]) - return content - - def create_path_if_not_exists(path): try: if not os.path.isdir(path): |