From a203337d155a6e7186980ef175642adc91d472fe Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 7 Jan 2014 14:23:25 -0400 Subject: move utility to its own --- src/leap/mail/utils.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 src/leap/mail/utils.py (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py new file mode 100644 index 0000000..2480efc --- /dev/null +++ b/src/leap/mail/utils.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# utils.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Small utilities. +""" + + +def first(things): + """ + Return the head of a collection. + """ + try: + return things[0] + except (IndexError, TypeError): + return None -- cgit v1.2.3 From 7b558eb23208d6de0b115fa453334421cc941e44 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Fri, 17 Jan 2014 14:59:09 -0300 Subject: Add custom json.loads method. This allows us to support the use of an `str` parameter that won't be converted to unicode. So in the case of a string containing bytes with different encodings this won't break. --- src/leap/mail/utils.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 1 deletion(-) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index 2480efc..93388d3 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -15,8 +15,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . """ -Small utilities. +Mail utilities. """ +import json +import traceback def first(things): @@ -27,3 +29,100 @@ def first(things): return things[0] except (IndexError, TypeError): return None + + +class CustomJsonScanner(object): + """ + This class is a context manager definition used to monkey patch the default + json string parsing behavior. + The emails can have more than one encoding, so the `str` objects have more + than one encoding and json does not support direct work with `str` + (only `unicode`). + """ + + def _parse_string_str(self, s, idx, *args, **kwargs): + """ + Parses the string "s" starting at the point idx and returns an `str` + object. Which basically means it works exactly the same as the regular + JSON string parsing, except that it doesn't try to decode utf8. + We need this because mail raw strings might have bytes in multiple + encodings. + + :param s: the string we want to parse + :type s: str + :param idx: the starting point for parsing + :type idx: int + + :returns: the parsed string and the index where the + string ends. + :rtype: tuple (str, int) + """ + # NOTE: we just want to use this monkey patched version if we are + # calling the loads from our custom method. Otherwise, we use the + # json's default parser. + monkey_patched = False + for i in traceback.extract_stack(): + # look for json_loads method in the call stack + if i[2] == json_loads.__name__: + monkey_patched = True + break + + if not monkey_patched: + return self._orig_scanstring(s, idx, *args, **kwargs) + + found = False + end = s.find("\"", idx) + while not found: + try: + if s[end-1] != "\\": + found = True + else: + end = s.find("\"", end+1) + except Exception: + found = True + return s[idx:end].decode("string-escape"), end+1 + + def __enter__(self): + """ + Replace the json methods with the needed ones. + Also make a backup to restore them later. + """ + # backup original values + self._orig_make_scanner = json.scanner.make_scanner + self._orig_scanstring = json.decoder.scanstring + + # We need the make_scanner function to be the python one so we can + # monkey_patch the json string parsing + json.scanner.make_scanner = json.scanner.py_make_scanner + + # And now we monkey patch the money method + json.decoder.scanstring = self._parse_string_str + + def __exit__(self, exc_type, exc_value, traceback): + """ + Restores the backuped methods. + """ + # restore original values + json.scanner.make_scanner = self._orig_make_scanner + json.decoder.scanstring = self._orig_scanstring + + +def json_loads(data): + """ + It works as json.loads but supporting multiple encodings in the same + string and accepting an `str` parameter that won't be converted to unicode. + + :param data: the string to load the objects from + :type data: str + + :returns: the corresponding python object result of parsing 'data', this + behaves similarly as json.loads, with the exception of that + returns always `str` instead of `unicode`. + """ + obj = None + with CustomJsonScanner(): + # We need to use the cls parameter in order to trigger the code + # that will let us control the string parsing method. + obj = json.loads(data, cls=json.JSONDecoder) + + return obj -- cgit v1.2.3 From c2e052a08789057d550a0442caa28b27ebc4b416 Mon Sep 17 00:00:00 2001 From: Ivan Alejandro Date: Wed, 22 Jan 2014 11:01:05 -0300 Subject: Add find_charset helper and use where is needed. --- src/leap/mail/utils.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index 93388d3..6c79227 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -18,9 +18,14 @@ Mail utilities. """ import json +import re import traceback +CHARSET_PATTERN = r"""charset=([\w-]+)""" +CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) + + def first(things): """ Return the head of a collection. @@ -31,6 +36,26 @@ def first(things): return None +def find_charset(thing, default=None): + """ + Looks into the object 'thing' for a charset specification. + It searchs into the object's `repr`. + + :param thing: the object to look into. + :type thing: object + :param default: the dafault charset to return if no charset is found. + :type default: str + + :returns: the charset or 'default' + :rtype: str or None + """ + charset = first(CHARSET_RE.findall(repr(thing))) + if charset is None: + charset = default + + return charset + + class CustomJsonScanner(object): """ This class is a context manager definition used to monkey patch the default -- cgit v1.2.3 From d7a167e1ba5ea9bb8167e6255a81d4c96fdffef9 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Thu, 23 Jan 2014 02:33:32 -0400 Subject: move utilities --- src/leap/mail/utils.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index 6c79227..64af04f 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -36,6 +36,14 @@ def first(things): return None +def maybe_call(thing): + """ + Return the same thing, or the result of its invocation if it is a + callable. + """ + return thing() if callable(thing) else thing + + def find_charset(thing, default=None): """ Looks into the object 'thing' for a charset specification. @@ -46,16 +54,28 @@ def find_charset(thing, default=None): :param default: the dafault charset to return if no charset is found. :type default: str - :returns: the charset or 'default' + :return: the charset or 'default' :rtype: str or None """ charset = first(CHARSET_RE.findall(repr(thing))) if charset is None: charset = default - return charset +def lowerdict(_dict): + """ + Return a dict with the keys in lowercase. + + :param _dict: the dict to convert + :rtype: dict + """ + # TODO should properly implement a CaseInsensitive dict. + # Look into requests code. + return dict((key.lower(), value) + for key, value in _dict.items()) + + class CustomJsonScanner(object): """ This class is a context manager definition used to monkey patch the default -- cgit v1.2.3 From e02db78b1b6d8fe021efd4adb250c64a1dd4bac4 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 24 Jan 2014 05:39:13 -0400 Subject: flags use the memstore * add new/dirty deferred dict to notify when written to disk * fix eventual duplication after copy * fix flag flickering on first retrieval. --- src/leap/mail/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index 64af04f..bae2898 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -36,6 +36,15 @@ def first(things): return None +def empty(thing): + """ + Return True if a thing is None or its length is zero. + """ + if thing is None: + return True + return len(thing) == 0 + + def maybe_call(thing): """ Return the same thing, or the result of its invocation if it is a -- cgit v1.2.3 From f5365ae0c2edb8b3e879f876f2f7e42b25f4616a Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Mon, 27 Jan 2014 16:11:53 -0400 Subject: handle last_uid property in memory store --- src/leap/mail/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index bae2898..1f43947 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -42,7 +42,10 @@ def empty(thing): """ if thing is None: return True - return len(thing) == 0 + try: + return len(thing) == 0 + except ReferenceError: + return True def maybe_call(thing): -- cgit v1.2.3 From a7e0054b595822325f749b0b1df7d25cab4e6486 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 28 Jan 2014 18:39:59 -0400 Subject: docstring fixes Also some fixes for None comparisons. --- src/leap/mail/utils.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index 1f43947..6a1fcde 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -21,6 +21,8 @@ import json import re import traceback +from leap.soledad.common.document import SoledadDocument + CHARSET_PATTERN = r"""charset=([\w-]+)""" CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) @@ -42,6 +44,8 @@ def empty(thing): """ if thing is None: return True + if isinstance(thing, SoledadDocument): + thing = thing.content try: return len(thing) == 0 except ReferenceError: -- cgit v1.2.3 From 18fed49c4143eb764ae9e806882d24f8f4e95744 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Sun, 2 Feb 2014 09:26:37 -0400 Subject: fix missing content after in-memory add because THE KEYS WILL BE STRINGS AFTER ADDED TO SOLEDAD Can I remember that? * Fix copy from local folders * Fix copy when we already have a copy of the message in the inbox, marked as deleted. * Fix also bad deferred.succeed in add_msg when it already exist. --- src/leap/mail/utils.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index 6a1fcde..942acfb 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -17,6 +17,7 @@ """ Mail utilities. """ +import copy import json import re import traceback @@ -92,6 +93,43 @@ def lowerdict(_dict): for key, value in _dict.items()) +PART_MAP = "part_map" + + +def _str_dict(d, k): + """ + Convert the dictionary key to string if it was a string. + + :param d: the dict + :type d: dict + :param k: the key + :type k: object + """ + if isinstance(k, int): + val = d[k] + d[str(k)] = val + del(d[k]) + + +def stringify_parts_map(d): + """ + Modify a dictionary making all the nested dicts under "part_map" keys + having strings as keys. + + :param d: the dictionary to modify + :type d: dictionary + :rtype: dictionary + """ + for k in d: + if k == PART_MAP: + pmap = d[k] + for kk in pmap.keys(): + _str_dict(d[k], kk) + for kk in pmap.keys(): + stringify_parts_map(d[k][str(kk)]) + return d + + class CustomJsonScanner(object): """ This class is a context manager definition used to monkey patch the default -- cgit v1.2.3 From b92e63c316c1cf9f8b6481dbfa70737acfb3eee9 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 7 Feb 2014 05:50:55 -0400 Subject: separate better dirty/new flags; add cdocs --- src/leap/mail/utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index 942acfb..8b75cfc 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -94,6 +94,7 @@ def lowerdict(_dict): PART_MAP = "part_map" +PHASH = "phash" def _str_dict(d, k): @@ -130,6 +131,24 @@ def stringify_parts_map(d): return d +def phash_iter(d): + """ + A recursive generator that extracts all the payload-hashes + from an arbitrary nested parts-map dictionary. + + :param d: the dictionary to walk + :type d: dictionary + :return: a list of all the phashes found + :rtype: list + """ + if PHASH in d: + yield d[PHASH] + if PART_MAP in d: + for key in d[PART_MAP]: + for phash in phash_iter(d[PART_MAP][key]): + yield phash + + class CustomJsonScanner(object): """ This class is a context manager definition used to monkey patch the default -- cgit v1.2.3 From ce55f761a55f78cb122296e91686fa6fde8959b8 Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Fri, 7 Feb 2014 07:00:47 -0400 Subject: two versions of accumulator util --- src/leap/mail/utils.py | 81 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index 8b75cfc..3ba4291 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -17,10 +17,10 @@ """ Mail utilities. """ -import copy import json import re import traceback +import Queue from leap.soledad.common.document import SoledadDocument @@ -149,6 +149,85 @@ def phash_iter(d): yield phash +def accumulator(fun, lim): + """ + A simple accumulator that uses a closure and a mutable + object to collect items. + When the count of items is greater than `lim`, the + collection is flushed after invoking a map of the function `fun` + over it. + + The returned accumulator can also be flushed at any moment + by passing a boolean as a second parameter. + + :param fun: the function to call over the collection + when its size is greater than `lim` + :type fun: callable + :param lim: the turning point for the collection + :type lim: int + :rtype: function + + >>> from pprint import pprint + >>> acc = accumulator(pprint, 2) + >>> acc(1) + >>> acc(2) + [1, 2] + >>> acc(3) + >>> acc(4) + [3, 4] + >>> acc = accumulator(pprint, 5) + >>> acc(1) + >>> acc(2) + >>> acc(3) + >>> acc(None, flush=True) + [1,2,3] + """ + KEY = "items" + _o = {KEY: []} + + def _accumulator(item, flush=False): + collection = _o[KEY] + collection.append(item) + if len(collection) >= lim or flush: + map(fun, filter(None, collection)) + _o[KEY] = [] + + return _accumulator + + +def accumulator_queue(fun, lim): + """ + A version of the accumulator that uses a queue. + + When the count of items is greater than `lim`, the + queue is flushed after invoking the function `fun` + over its items. + + The returned accumulator can also be flushed at any moment + by passing a boolean as a second parameter. + + :param fun: the function to call over the collection + when its size is greater than `lim` + :type fun: callable + :param lim: the turning point for the collection + :type lim: int + :rtype: function + """ + _q = Queue.Queue() + + def _accumulator(item, flush=False): + _q.put(item) + if _q.qsize() >= lim or flush: + collection = [_q.get() for i in range(_q.qsize())] + map(fun, filter(None, collection)) + + return _accumulator + + +# +# String manipulation +# + class CustomJsonScanner(object): """ This class is a context manager definition used to monkey patch the default -- cgit v1.2.3 From de762b5c6e529f4e668bee1ec848eb1f6380369b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 11 Feb 2014 01:41:51 -0400 Subject: catch typeerror too in empty definition --- src/leap/mail/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/leap/mail/utils.py') diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py index 3ba4291..fed24b3 100644 --- a/src/leap/mail/utils.py +++ b/src/leap/mail/utils.py @@ -49,7 +49,7 @@ def empty(thing): thing = thing.content try: return len(thing) == 0 - except ReferenceError: + except (ReferenceError, TypeError): return True @@ -267,6 +267,8 @@ class CustomJsonScanner(object): if not monkey_patched: return self._orig_scanstring(s, idx, *args, **kwargs) + # TODO profile to see if a compiled regex can get us some + # benefit here. found = False end = s.find("\"", idx) while not found: -- cgit v1.2.3