summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomás Touceda <chiiph@leap.se>2013-10-02 17:05:58 -0300
committerTomás Touceda <chiiph@leap.se>2013-10-03 12:18:00 -0300
commite7c90a407c08ee169ae912db8f62f218531e41a4 (patch)
tree35b9ce90e40b6ea2623f0caa80b759a3985cf6db
parent2d8f79bee0ad18d38f33cd7d6138b6500358e436 (diff)
Convert to utf8 all the strings before saving them
-rw-r--r--changes/utf8_all_the_things1
-rw-r--r--client/pkg/requirements.pip2
-rw-r--r--client/src/leap/soledad/client/__init__.py35
3 files changed, 37 insertions, 1 deletions
diff --git a/changes/utf8_all_the_things b/changes/utf8_all_the_things
new file mode 100644
index 00000000..c213b890
--- /dev/null
+++ b/changes/utf8_all_the_things
@@ -0,0 +1 @@
+ o Save only UTF8 strings. Related to #3660.
diff --git a/client/pkg/requirements.pip b/client/pkg/requirements.pip
index df6ddb94..8733f320 100644
--- a/client/pkg/requirements.pip
+++ b/client/pkg/requirements.pip
@@ -20,3 +20,5 @@ oauth
# pysqlite should not be a dep, see #2945
pysqlite
+
+cchardet \ No newline at end of file
diff --git a/client/src/leap/soledad/client/__init__.py b/client/src/leap/soledad/client/__init__.py
index 1f54ef8c..13a3b68f 100644
--- a/client/src/leap/soledad/client/__init__.py
+++ b/client/src/leap/soledad/client/__init__.py
@@ -32,6 +32,8 @@ import socket
import ssl
import urlparse
+import cchardet
+
from hashlib import sha256
from u1db.remote import http_client
@@ -755,6 +757,37 @@ class Soledad(object):
"""
return self._db.get_all_docs(include_deleted)
+ def _convert_to_utf8(self, content):
+ """
+ Converts content to utf8 (or all the strings in content)
+
+ NOTE: Even though this method supports any type, it will
+ currently ignore contents of lists, tuple or any other
+ iterable than dict. We don't need support for these at the
+ moment
+
+ :param content: content to convert
+ :type content: object
+
+ :rtype: object
+ """
+
+ if isinstance(content, unicode):
+ return content
+ elif isinstance(content, str):
+ try:
+ result = cchardet.detect(content)
+ content = content.decode(result["encoding"]).encode("utf-8")\
+ .decode("utf-8")
+ except UnicodeError:
+ pass
+ return content
+ else:
+ if isinstance(content, dict):
+ for key in content.keys():
+ content[key] = self._convert_to_utf8(content[key])
+ return content
+
def create_doc(self, content, doc_id=None):
"""
Create a new document in the local encrypted database.
@@ -767,7 +800,7 @@ class Soledad(object):
:return: the new document
:rtype: SoledadDocument
"""
- return self._db.create_doc(content, doc_id=doc_id)
+ return self._db.create_doc(self._convert_to_utf8(content), doc_id=doc_id)
def create_doc_from_json(self, json, doc_id=None):
"""