summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--client/changes/next-changelog.rst2
-rw-r--r--client/src/leap/soledad/client/api.py46
2 files changed, 3 insertions, 45 deletions
diff --git a/client/changes/next-changelog.rst b/client/changes/next-changelog.rst
index 6e97386c..050d84be 100644
--- a/client/changes/next-changelog.rst
+++ b/client/changes/next-changelog.rst
@@ -16,6 +16,8 @@ Features
Bugfixes
~~~~~~~~
- `#1235 <https://leap.se/code/issues/1235>`_: Description for the fixed stuff corresponding with issue #1235.
+- Remove document content conversion to unicode. Users of API are responsible
+ for only passing valid JSON to Soledad for storage.
- Bugfix without related issue number.
Misc
diff --git a/client/src/leap/soledad/client/api.py b/client/src/leap/soledad/client/api.py
index 2477350e..d83291e7 100644
--- a/client/src/leap/soledad/client/api.py
+++ b/client/src/leap/soledad/client/api.py
@@ -35,10 +35,6 @@ import ssl
import uuid
import urlparse
-try:
- import cchardet as chardet
-except ImportError:
- import chardet
from itertools import chain
from StringIO import StringIO
@@ -357,7 +353,6 @@ class Soledad(object):
also be updated.
:rtype: twisted.internet.defer.Deferred
"""
- doc.content = _convert_to_unicode(doc.content)
return self._defer("put_doc", doc)
def delete_doc(self, doc):
@@ -452,8 +447,7 @@ class Soledad(object):
# create_doc (and probably to put_doc too). There are cases (mail
# payloads for example) in which we already have the encoding in the
# headers, so we don't need to guess it.
- return self._defer(
- "create_doc", _convert_to_unicode(content), doc_id=doc_id)
+ return self._defer("create_doc", content, doc_id=doc_id)
def create_doc_from_json(self, json, doc_id=None):
"""
@@ -974,44 +968,6 @@ class Soledad(object):
return self.create_doc(doc)
-def _convert_to_unicode(content):
- """
- Convert content to unicode (or all the strings in content).
-
- NOTE: Even though this method supports any type, it will
- currently ignore contents of lists, tuple or any other
- iterable than dict. We don't need support for these at the
- moment
-
- :param content: content to convert
- :type content: object
-
- :rtype: object
- """
- # Chardet doesn't guess very well with some smallish payloads.
- # This parameter might need some empirical tweaking.
- CUTOFF_CONFIDENCE = 0.90
-
- if isinstance(content, unicode):
- return content
- elif isinstance(content, str):
- encoding = "utf-8"
- result = chardet.detect(content)
- if result["confidence"] > CUTOFF_CONFIDENCE:
- encoding = result["encoding"]
- try:
- content = content.decode(encoding)
- except UnicodeError as e:
- logger.error("Unicode error: {0!r}. Using 'replace'".format(e))
- content = content.decode(encoding, 'replace')
- return content
- else:
- if isinstance(content, dict):
- for key in content.keys():
- content[key] = _convert_to_unicode(content[key])
- return content
-
-
def create_path_if_not_exists(path):
try:
if not os.path.isdir(path):