From 7b558eb23208d6de0b115fa453334421cc941e44 Mon Sep 17 00:00:00 2001
From: Ivan Alejandro <ivanalejandro0@gmail.com>
Date: Fri, 17 Jan 2014 14:59:09 -0300
Subject: Add custom json.loads method.

This allows us to support the use of an `str` parameter that won't be
converted to unicode.
So in the case of a string containing bytes with different encodings
this won't break.
---
 src/leap/mail/utils.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 100 insertions(+), 1 deletion(-)

(limited to 'src/leap')

diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py
index 2480efc..93388d3 100644
--- a/src/leap/mail/utils.py
+++ b/src/leap/mail/utils.py
@@ -15,8 +15,10 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
-Small utilities.
+Mail utilities.
 """
+import json
+import traceback
 
 
 def first(things):
@@ -27,3 +29,100 @@ def first(things):
         return things[0]
     except (IndexError, TypeError):
         return None
+
+
+class CustomJsonScanner(object):
+    """
+    This class is a context manager definition used to monkey patch the default
+    json string parsing behavior.
+    The emails can have more than one encoding, so the `str` objects have more
+    than one encoding and json does not support direct work with `str`
+    (only `unicode`).
+    """
+
+    def _parse_string_str(self, s, idx, *args, **kwargs):
+        """
+        Parses the string "s" starting at the point idx and returns an `str`
+        object. Which basically means it works exactly the same as the regular
+        JSON string parsing, except that it doesn't try to decode utf8.
+        We need this because mail raw strings might have bytes in multiple
+        encodings.
+
+        :param s: the string we want to parse
+        :type s: str
+        :param idx: the starting point for parsing
+        :type idx: int
+
+        :returns: the parsed string and the index where the
+                  string ends.
+        :rtype: tuple (str, int)
+        """
+        # NOTE: we just want to use this monkey patched version if we are
+        # calling the loads from our custom method. Otherwise, we use the
+        # json's default parser.
+        monkey_patched = False
+        for i in traceback.extract_stack():
+            # look for json_loads method in the call stack
+            if i[2] == json_loads.__name__:
+                monkey_patched = True
+                break
+
+        if not monkey_patched:
+            return self._orig_scanstring(s, idx, *args, **kwargs)
+
+        found = False
+        end = s.find("\"", idx)
+        while not found:
+            try:
+                if s[end-1] != "\\":
+                    found = True
+                else:
+                    end = s.find("\"", end+1)
+            except Exception:
+                found = True
+        return s[idx:end].decode("string-escape"), end+1
+
+    def __enter__(self):
+        """
+        Replace the json methods with the needed ones.
+        Also make a backup to restore them later.
+        """
+        # backup original values
+        self._orig_make_scanner = json.scanner.make_scanner
+        self._orig_scanstring = json.decoder.scanstring
+
+        # We need the make_scanner function to be the python one so we can
+        # monkey_patch the json string parsing
+        json.scanner.make_scanner = json.scanner.py_make_scanner
+
+        # And now we monkey patch the money method
+        json.decoder.scanstring = self._parse_string_str
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        Restores the backuped methods.
+        """
+        # restore original values
+        json.scanner.make_scanner = self._orig_make_scanner
+        json.decoder.scanstring = self._orig_scanstring
+
+
+def json_loads(data):
+    """
+    It works as json.loads but supporting multiple encodings in the same
+    string and accepting an `str` parameter that won't be converted to unicode.
+
+    :param data: the string to load the objects from
+    :type data: str
+
+    :returns: the corresponding python object result of parsing 'data', this
+              behaves similarly as json.loads, with the exception of that
+              returns always `str` instead of `unicode`.
+    """
+    obj = None
+    with CustomJsonScanner():
+        # We need to use the cls parameter in order to trigger the code
+        # that will let us control the string parsing method.
+        obj = json.loads(data, cls=json.JSONDecoder)
+
+    return obj
-- 
cgit v1.2.3