diff options
author | Tomás Touceda <chiiph@leap.se> | 2013-10-02 17:05:58 -0300 |
---|---|---|
committer | Tomás Touceda <chiiph@leap.se> | 2013-10-03 12:18:00 -0300 |
commit | e7c90a407c08ee169ae912db8f62f218531e41a4 (patch) | |
tree | 35b9ce90e40b6ea2623f0caa80b759a3985cf6db | |
parent | 2d8f79bee0ad18d38f33cd7d6138b6500358e436 (diff) |
Convert to utf8 all the strings before saving them
-rw-r--r-- | changes/utf8_all_the_things | 1 | ||||
-rw-r--r-- | client/pkg/requirements.pip | 2 | ||||
-rw-r--r-- | client/src/leap/soledad/client/__init__.py | 35 |
3 files changed, 37 insertions, 1 deletions
diff --git a/changes/utf8_all_the_things b/changes/utf8_all_the_things new file mode 100644 index 00000000..c213b890 --- /dev/null +++ b/changes/utf8_all_the_things @@ -0,0 +1 @@ + o Save only UTF8 strings. Related to #3660. diff --git a/client/pkg/requirements.pip b/client/pkg/requirements.pip index df6ddb94..8733f320 100644 --- a/client/pkg/requirements.pip +++ b/client/pkg/requirements.pip @@ -20,3 +20,5 @@ oauth # pysqlite should not be a dep, see #2945 pysqlite + +cchardet
\ No newline at end of file diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 1f54ef8c..13a3b68f 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -32,6 +32,8 @@ import socket import ssl import urlparse +import cchardet + from hashlib import sha256 from u1db.remote import http_client @@ -755,6 +757,37 @@ class Soledad(object): """ return self._db.get_all_docs(include_deleted) + def _convert_to_utf8(self, content): + """ + Converts content to utf8 (or all the strings in content) + + NOTE: Even though this method supports any type, it will + currently ignore contents of lists, tuple or any other + iterable than dict. We don't need support for these at the + moment + + :param content: content to convert + :type content: object + + :rtype: object + """ + + if isinstance(content, unicode): + return content + elif isinstance(content, str): + try: + result = cchardet.detect(content) + content = content.decode(result["encoding"]).encode("utf-8")\ + .decode("utf-8") + except UnicodeError: + pass + return content + else: + if isinstance(content, dict): + for key in content.keys(): + content[key] = self._convert_to_utf8(content[key]) + return content + def create_doc(self, content, doc_id=None): """ Create a new document in the local encrypted database. @@ -767,7 +800,7 @@ class Soledad(object): :return: the new document :rtype: SoledadDocument """ - return self._db.create_doc(content, doc_id=doc_id) + return self._db.create_doc(self._convert_to_utf8(content), doc_id=doc_id) def create_doc_from_json(self, json, doc_id=None): """ |