From 40c5c6d7828362a826f022203cd6d50a5bfe6f0b Mon Sep 17 00:00:00 2001
From: Ivan Alejandro <ivanalejandro0@gmail.com>
Date: Fri, 17 Jan 2014 14:59:09 -0300
Subject: Add custom json.loads method.

This allows us to support the use of an `str` parameter that won't be
converted to unicode.
So in the case of a string containing bytes with different encodings
this won't break.
---
 mail/src/leap/mail/utils.py | 101 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 100 insertions(+), 1 deletion(-)

(limited to 'mail/src')

diff --git a/mail/src/leap/mail/utils.py b/mail/src/leap/mail/utils.py
index 2480efc..93388d3 100644
--- a/mail/src/leap/mail/utils.py
+++ b/mail/src/leap/mail/utils.py
@@ -15,8 +15,10 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
-Small utilities.
+Mail utilities.
 """
+import json
+import traceback
 
 
 def first(things):
@@ -27,3 +29,100 @@ def first(things):
         return things[0]
     except (IndexError, TypeError):
         return None
+
+
+class CustomJsonScanner(object):
+    """
+    This class is a context manager definition used to monkey patch the default
+    json string parsing behavior.
+    The emails can have more than one encoding, so the `str` objects have more
+    than one encoding and json does not support direct work with `str`
+    (only `unicode`).
+    """
+
+    def _parse_string_str(self, s, idx, *args, **kwargs):
+        """
+        Parses the string "s" starting at the point idx and returns an `str`
+        object. Which basically means it works exactly the same as the regular
+        JSON string parsing, except that it doesn't try to decode utf8.
+        We need this because mail raw strings might have bytes in multiple
+        encodings.
+
+        :param s: the string we want to parse
+        :type s: str
+        :param idx: the starting point for parsing
+        :type idx: int
+
+        :returns: the parsed string and the index where the
+                  string ends.
+        :rtype: tuple (str, int)
+        """
+        # NOTE: we just want to use this monkey patched version if we are
+        # calling the loads from our custom method. Otherwise, we use the
+        # json's default parser.
+        monkey_patched = False
+        for i in traceback.extract_stack():
+            # look for json_loads method in the call stack
+            if i[2] == json_loads.__name__:
+                monkey_patched = True
+                break
+
+        if not monkey_patched:
+            return self._orig_scanstring(s, idx, *args, **kwargs)
+
+        found = False
+        end = s.find("\"", idx)
+        while not found:
+            try:
+                if s[end-1] != "\\":
+                    found = True
+                else:
+                    end = s.find("\"", end+1)
+            except Exception:
+                found = True
+        return s[idx:end].decode("string-escape"), end+1
+
+    def __enter__(self):
+        """
+        Replace the json methods with the needed ones.
+        Also make a backup to restore them later.
+        """
+        # backup original values
+        self._orig_make_scanner = json.scanner.make_scanner
+        self._orig_scanstring = json.decoder.scanstring
+
+        # We need the make_scanner function to be the python one so we can
+        # monkey_patch the json string parsing
+        json.scanner.make_scanner = json.scanner.py_make_scanner
+
+        # And now we monkey patch the money method
+        json.decoder.scanstring = self._parse_string_str
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        Restores the backuped methods.
+        """
+        # restore original values
+        json.scanner.make_scanner = self._orig_make_scanner
+        json.decoder.scanstring = self._orig_scanstring
+
+
+def json_loads(data):
+    """
+    It works as json.loads but supporting multiple encodings in the same
+    string and accepting an `str` parameter that won't be converted to unicode.
+
+    :param data: the string to load the objects from
+    :type data: str
+
+    :returns: the corresponding python object result of parsing 'data', this
+              behaves similarly as json.loads, with the exception of that
+              returns always `str` instead of `unicode`.
+    """
+    obj = None
+    with CustomJsonScanner():
+        # We need to use the cls parameter in order to trigger the code
+        # that will let us control the string parsing method.
+        obj = json.loads(data, cls=json.JSONDecoder)
+
+    return obj
-- 
cgit v1.2.3


From af8680aa692ee52ee2bc14e2a77c8edcd36b3dda Mon Sep 17 00:00:00 2001
From: Ivan Alejandro <ivanalejandro0@gmail.com>
Date: Fri, 17 Jan 2014 15:07:37 -0300
Subject: Fix encodings usage, use custom json.loads method.

Also remove some unused imports.
---
 mail/src/leap/mail/imap/fetch.py    | 19 +++++--------------
 mail/src/leap/mail/imap/messages.py |  4 ++--
 2 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'mail/src')

diff --git a/mail/src/leap/mail/imap/fetch.py b/mail/src/leap/mail/imap/fetch.py
index 604a2ea..817ad6a 100644
--- a/mail/src/leap/mail/imap/fetch.py
+++ b/mail/src/leap/mail/imap/fetch.py
@@ -18,9 +18,7 @@
 Incoming mail fetcher.
 """
 import copy
-import json
 import logging
-#import ssl
 import threading
 import time
 import sys
@@ -34,7 +32,6 @@ from StringIO import StringIO
 from twisted.python import log
 from twisted.internet import defer
 from twisted.internet.task import LoopingCall
-#from twisted.internet.threads import deferToThread
 from zope.proxy import sameProxiedObjects
 
 from leap.common import events as leap_events
@@ -49,6 +46,7 @@ from leap.common.mail import get_email_charset
 from leap.keymanager import errors as keymanager_errors
 from leap.keymanager.openpgp import OpenPGPKey
 from leap.mail.decorators import deferred
+from leap.mail.utils import json_loads
 from leap.soledad.client import Soledad
 from leap.soledad.common.crypto import ENC_SCHEME_KEY, ENC_JSON_KEY
 
@@ -321,7 +319,8 @@ class LeapIncomingMail(object):
         """
         log.msg('processing decrypted doc')
         doc, data = msgtuple
-        msg = json.loads(data)
+        msg = json_loads(data)
+
         if not isinstance(msg, dict):
             defer.returnValue(False)
         if not msg.get(self.INCOMING_KEY, False):
@@ -338,16 +337,15 @@ class LeapIncomingMail(object):
         Tries to decrypt a gpg message if data looks like one.
 
         :param data: the text to be decrypted.
-        :type data: unicode
+        :type data: str
         :return: data, possibly descrypted.
         :rtype: str
         """
+        leap_assert_type(data, str)
         log.msg('maybe decrypting doc')
-        leap_assert_type(data, unicode)
 
         # parse the original message
         encoding = get_email_charset(data)
-        data = data.encode(encoding)
         msg = self._parser.parsestr(data)
 
         # try to obtain sender public key
@@ -420,13 +418,6 @@ class LeapIncomingMail(object):
             # Bailing out!
             return (msg, False)
 
-        # decrypted successully, now fix encoding and parse
-        try:
-            decrdata = decrdata.encode(encoding)
-        except (UnicodeEncodeError, UnicodeDecodeError) as e:
-            logger.error("Unicode error {0}".format(e))
-            decrdata = decrdata.encode(encoding, 'replace')
-
         decrmsg = self._parser.parsestr(decrdata)
         # remove original message's multipart/encrypted content-type
         del(msg['content-type'])
diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py
index 22de356..28bd272 100644
--- a/mail/src/leap/mail/imap/messages.py
+++ b/mail/src/leap/mail/imap/messages.py
@@ -494,8 +494,8 @@ class LeapMessage(fields, MailParser, MBoxParser):
             if not charset:
                 charset = self._get_charset(body)
             try:
-                body = body.decode(charset).encode(charset)
-            except (UnicodeEncodeError, UnicodeDecodeError) as e:
+                body = body.encode(charset)
+            except UnicodeError as e:
                 logger.error("Unicode error {0}".format(e))
                 body = body.encode(charset, 'replace')
 
-- 
cgit v1.2.3