From e7c90a407c08ee169ae912db8f62f218531e41a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Touceda?= Date: Wed, 2 Oct 2013 17:05:58 -0300 Subject: Convert to utf8 all the strings before saving them --- changes/utf8_all_the_things | 1 + client/pkg/requirements.pip | 2 ++ client/src/leap/soledad/client/__init__.py | 35 +++++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 changes/utf8_all_the_things diff --git a/changes/utf8_all_the_things b/changes/utf8_all_the_things new file mode 100644 index 00000000..c213b890 --- /dev/null +++ b/changes/utf8_all_the_things @@ -0,0 +1 @@ + o Save only UTF8 strings. Related to #3660. diff --git a/client/pkg/requirements.pip b/client/pkg/requirements.pip index df6ddb94..8733f320 100644 --- a/client/pkg/requirements.pip +++ b/client/pkg/requirements.pip @@ -20,3 +20,5 @@ oauth # pysqlite should not be a dep, see #2945 pysqlite + +cchardet \ No newline at end of file diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py index 1f54ef8c..13a3b68f 100644 --- a/client/src/leap/soledad/client/__init__.py +++ b/client/src/leap/soledad/client/__init__.py @@ -32,6 +32,8 @@ import socket import ssl import urlparse +import cchardet + from hashlib import sha256 from u1db.remote import http_client @@ -755,6 +757,37 @@ class Soledad(object): """ return self._db.get_all_docs(include_deleted) + def _convert_to_utf8(self, content): + """ + Converts content to utf8 (or all the strings in content) + + NOTE: Even though this method supports any type, it will + currently ignore contents of lists, tuple or any other + iterable than dict. We don't need support for these at the + moment + + :param content: content to convert + :type content: object + + :rtype: object + """ + + if isinstance(content, unicode): + return content + elif isinstance(content, str): + try: + result = cchardet.detect(content) + content = content.decode(result["encoding"]).encode("utf-8")\ + .decode("utf-8") + except UnicodeError: + pass + return content + else: + if isinstance(content, dict): + for key in content.keys(): + content[key] = self._convert_to_utf8(content[key]) + return content + def create_doc(self, content, doc_id=None): """ Create a new document in the local encrypted database. @@ -767,7 +800,7 @@ class Soledad(object): :return: the new document :rtype: SoledadDocument """ - return self._db.create_doc(content, doc_id=doc_id) + return self._db.create_doc(self._convert_to_utf8(content), doc_id=doc_id) def create_doc_from_json(self, json, doc_id=None): """ -- cgit v1.2.3