diff options
author | Tomás Touceda <chiiph@leap.se> | 2013-10-23 11:36:32 -0300 |
---|---|---|
committer | Tomás Touceda <chiiph@leap.se> | 2013-10-23 11:36:32 -0300 |
commit | d57436f84abaa5d3a14ca1002d2de90176e9cb52 (patch) | |
tree | 7be7768872cf52888a84a74493b62e4eea82d6c7 /src/leap/mail/utils.py | |
parent | dd8b6212072bb8db499e12468d9905a5cf8ce630 (diff) | |
parent | bea7ee293c720b8b632a3b5149e4550c6409fd20 (diff) |
Merge remote-tracking branch 'ivan/bug/4000_support-non-ascii' into develop
Diffstat (limited to 'src/leap/mail/utils.py')
-rw-r--r-- | src/leap/mail/utils.py | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py new file mode 100644 index 0000000..22e16a7 --- /dev/null +++ b/src/leap/mail/utils.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# utils.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Utility functions for email. +""" +import email +import re + + +def get_email_charset(content): + """ + Mini parser to retrieve the charset of an email. + + :param content: mail contents + :type content: unicode + + :returns: the charset as parsed from the contents + :rtype: str + """ + charset = "UTF-8" + try: + em = email.message_from_string(content.encode("utf-8")) + # Miniparser for: Content-Type: <something>; charset=<charset> + charset_re = r'''charset=(?P<charset>[\w|\d|-]*)''' + charset = re.findall(charset_re, em["Content-Type"])[0] + if charset is None or len(charset) == 0: + charset = "UTF-8" + except Exception: + pass + return charset |