diff options
author | Ivan Alejandro <ivanalejandro0@gmail.com> | 2013-10-23 10:17:21 -0300 |
---|---|---|
committer | Ivan Alejandro <ivanalejandro0@gmail.com> | 2013-10-23 10:17:21 -0300 |
commit | dcaa9301694fba800ae46c22592d55b5347f988c (patch) | |
tree | bb67f88d260d88691b7c814ad2de4048bcdc7f35 /mail/src/leap | |
parent | c51294dd7c054850a195c526ab50c2cddf085d29 (diff) |
Move charset parser to a utils module.
Diffstat (limited to 'mail/src/leap')
-rw-r--r-- | mail/src/leap/mail/imap/server.py | 29 | ||||
-rw-r--r-- | mail/src/leap/mail/utils.py | 44 |
2 files changed, 47 insertions, 26 deletions
diff --git a/mail/src/leap/mail/imap/server.py b/mail/src/leap/mail/imap/server.py index df510ce..b9a041d 100644 --- a/mail/src/leap/mail/imap/server.py +++ b/mail/src/leap/mail/imap/server.py @@ -18,9 +18,7 @@ Soledad-backed IMAP Server. """ import copy -import email import logging -import re import StringIO import cStringIO import time @@ -41,6 +39,7 @@ from leap.common import events as leap_events from leap.common.events.events_pb2 import IMAP_UNREAD_MAIL from leap.common.check import leap_assert, leap_assert_type from leap.soledad.client import Soledad +from leap.mail.utils import get_email_charset logger = logging.getLogger(__name__) @@ -695,28 +694,6 @@ class LeapMessage(WithMsgFields): the more complex MIME-based interface. """ - def _get_charset(self, content): - """ - Mini parser to retrieve the charset of an email - - :param content: mail contents - :type content: unicode - - :returns: the charset as parsed from the contents - :rtype: str - """ - charset = "UTF-8" - try: - em = email.message_from_string(content.encode("utf-8")) - # Miniparser for: Content-Type: <something>; charset=<charset> - charset_re = r'''charset=(?P<charset>[\w|\d|-]*)''' - charset = re.findall(charset_re, em["Content-Type"])[0] - if charset is None or len(charset) == 0: - charset = "UTF-8" - except Exception: - pass - return charset - def open(self): """ Return an file-like object opened for reading. @@ -728,7 +705,7 @@ class LeapMessage(WithMsgFields): :rtype: StringIO """ fd = cStringIO.StringIO() - charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) + charset = get_email_charset(self._doc.content.get(self.RAW_KEY, '')) fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) fd.seek(0) return fd @@ -748,7 +725,7 @@ class LeapMessage(WithMsgFields): :rtype: StringIO """ fd = StringIO.StringIO() - charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) + charset = get_email_charset(self._doc.content.get(self.RAW_KEY, '')) fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset)) # SHOULD use a separate BODY FIELD ... fd.seek(0) diff --git a/mail/src/leap/mail/utils.py b/mail/src/leap/mail/utils.py new file mode 100644 index 0000000..22e16a7 --- /dev/null +++ b/mail/src/leap/mail/utils.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# utils.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Utility functions for email. +""" +import email +import re + + +def get_email_charset(content): + """ + Mini parser to retrieve the charset of an email. + + :param content: mail contents + :type content: unicode + + :returns: the charset as parsed from the contents + :rtype: str + """ + charset = "UTF-8" + try: + em = email.message_from_string(content.encode("utf-8")) + # Miniparser for: Content-Type: <something>; charset=<charset> + charset_re = r'''charset=(?P<charset>[\w|\d|-]*)''' + charset = re.findall(charset_re, em["Content-Type"])[0] + if charset is None or len(charset) == 0: + charset = "UTF-8" + except Exception: + pass + return charset |