From 6c0541c3b8827eb9a5be9ae46d58925b399afc09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Touceda?= Date: Thu, 31 Oct 2013 10:17:03 -0300 Subject: Move get_email_charset to this module --- changes/bug_refactor_mail_utils | 1 + src/leap/common/mail.py | 50 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 changes/bug_refactor_mail_utils create mode 100644 src/leap/common/mail.py diff --git a/changes/bug_refactor_mail_utils b/changes/bug_refactor_mail_utils new file mode 100644 index 0000000..ea17203 --- /dev/null +++ b/changes/bug_refactor_mail_utils @@ -0,0 +1 @@ + o Move get_email_charset to this module. \ No newline at end of file diff --git a/src/leap/common/mail.py b/src/leap/common/mail.py new file mode 100644 index 0000000..2f2146d --- /dev/null +++ b/src/leap/common/mail.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# mail.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +""" +Utility functions for email. +""" +import email +import re + +from leap.common.check import leap_assert_type + + +def get_email_charset(content, default="utf-8"): + """ + Mini parser to retrieve the charset of an email. + + :param content: mail contents + :type content: unicode + :param default: optional default value for encoding + :type default: str or None + + :returns: the charset as parsed from the contents + :rtype: str + """ + leap_assert_type(content, unicode) + + charset = default + try: + em = email.message_from_string(content.encode("utf-8", "replace")) + # Miniparser for: Content-Type: ; charset= + charset_re = r'''charset=(?P[\w|\d|-]*)''' + charset = re.findall(charset_re, em["Content-Type"])[0] + if charset is None or len(charset) == 0: + charset = default + except Exception: + pass + return charset -- cgit v1.2.3