diff options
| author | Ivan Alejandro <ivanalejandro0@gmail.com> | 2013-10-23 10:17:21 -0300 | 
|---|---|---|
| committer | Ivan Alejandro <ivanalejandro0@gmail.com> | 2013-10-23 10:17:21 -0300 | 
| commit | c0834048d564dfdc3e2cedc1a0f81788d14f3cab (patch) | |
| tree | 21bce01d1b32cc6957a3570305e6be053e9d85c3 /src | |
| parent | dd8b6212072bb8db499e12468d9905a5cf8ce630 (diff) | |
Move charset parser to a utils module.
Diffstat (limited to 'src')
| -rw-r--r-- | src/leap/mail/imap/server.py | 29 | ||||
| -rw-r--r-- | src/leap/mail/utils.py | 44 | 
2 files changed, 47 insertions, 26 deletions
| diff --git a/src/leap/mail/imap/server.py b/src/leap/mail/imap/server.py index df510ce..b9a041d 100644 --- a/src/leap/mail/imap/server.py +++ b/src/leap/mail/imap/server.py @@ -18,9 +18,7 @@  Soledad-backed IMAP Server.  """  import copy -import email  import logging -import re  import StringIO  import cStringIO  import time @@ -41,6 +39,7 @@ from leap.common import events as leap_events  from leap.common.events.events_pb2 import IMAP_UNREAD_MAIL  from leap.common.check import leap_assert, leap_assert_type  from leap.soledad.client import Soledad +from leap.mail.utils import get_email_charset  logger = logging.getLogger(__name__) @@ -695,28 +694,6 @@ class LeapMessage(WithMsgFields):      the more complex MIME-based interface.      """ -    def _get_charset(self, content): -        """ -        Mini parser to retrieve the charset of an email - -        :param content: mail contents -        :type content: unicode - -        :returns: the charset as parsed from the contents -        :rtype: str -        """ -        charset = "UTF-8" -        try: -            em = email.message_from_string(content.encode("utf-8")) -            # Miniparser for: Content-Type: <something>; charset=<charset> -            charset_re = r'''charset=(?P<charset>[\w|\d|-]*)''' -            charset = re.findall(charset_re, em["Content-Type"])[0] -            if charset is None or len(charset) == 0: -                charset = "UTF-8" -        except Exception: -            pass -        return charset -      def open(self):          """          Return an file-like object opened for reading. @@ -728,7 +705,7 @@ class LeapMessage(WithMsgFields):          :rtype: StringIO          """          fd = cStringIO.StringIO() -        charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) +        charset = get_email_charset(self._doc.content.get(self.RAW_KEY, ''))          fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset))          fd.seek(0)          return fd @@ -748,7 +725,7 @@ class LeapMessage(WithMsgFields):          :rtype: StringIO          """          fd = StringIO.StringIO() -        charset = self._get_charset(self._doc.content.get(self.RAW_KEY, '')) +        charset = get_email_charset(self._doc.content.get(self.RAW_KEY, ''))          fd.write(self._doc.content.get(self.RAW_KEY, '').encode(charset))          # SHOULD use a separate BODY FIELD ...          fd.seek(0) diff --git a/src/leap/mail/utils.py b/src/leap/mail/utils.py new file mode 100644 index 0000000..22e16a7 --- /dev/null +++ b/src/leap/mail/utils.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# utils.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. +""" +Utility functions for email. +""" +import email +import re + + +def get_email_charset(content): +    """ +    Mini parser to retrieve the charset of an email. + +    :param content: mail contents +    :type content: unicode + +    :returns: the charset as parsed from the contents +    :rtype: str +    """ +    charset = "UTF-8" +    try: +        em = email.message_from_string(content.encode("utf-8")) +        # Miniparser for: Content-Type: <something>; charset=<charset> +        charset_re = r'''charset=(?P<charset>[\w|\d|-]*)''' +        charset = re.findall(charset_re, em["Content-Type"])[0] +        if charset is None or len(charset) == 0: +            charset = "UTF-8" +    except Exception: +        pass +    return charset | 
