diff options
| -rw-r--r-- | mail/src/leap/mail/imap/messageparts.py | 16 | ||||
| -rw-r--r-- | mail/src/leap/mail/imap/messages.py | 8 | 
2 files changed, 11 insertions, 13 deletions
| diff --git a/mail/src/leap/mail/imap/messageparts.py b/mail/src/leap/mail/imap/messageparts.py index 6d8631a..10672ed 100644 --- a/mail/src/leap/mail/imap/messageparts.py +++ b/mail/src/leap/mail/imap/messageparts.py @@ -32,7 +32,7 @@ from leap.common.decorators import memoized_method  from leap.common.mail import get_email_charset  from leap.mail.imap import interfaces  from leap.mail.imap.fields import fields -from leap.mail.utils import empty, first +from leap.mail.utils import empty, first, find_charset  MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id") @@ -40,10 +40,6 @@ MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id")  logger = logging.getLogger(__name__) -# XXX not needed anymoar ... -CHARSET_PATTERN = r"""charset=([\w-]+)""" -CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) -  """  A MessagePartDoc is a light wrapper around the dictionary-like  data that we pass along for message parts. It can be used almost everywhere @@ -363,17 +359,17 @@ class MessagePart(object):              payload = str("")          if payload: -            # XXX use find_charset instead -------------------------- -            # bad rebase???              content_type = self._get_ctype_from_document(phash) -            charset = first(CHARSET_RE.findall(content_type)) +            charset = find_charset(content_type)              logger.debug("Got charset from header: %s" % (charset,)) -            if not charset: +            if charset is None:                  charset = self._get_charset(payload) +                logger.debug("Got charset: %s" % (charset,))              try:                  payload = payload.encode(charset)              except UnicodeError as exc: -                logger.error("Unicode error {0}".format(exc)) +                logger.error( +                    "Unicode error, using 'replace'. {0!r}".format(exc))                  payload = payload.encode(charset, 'replace')          fd.write(payload) diff --git a/mail/src/leap/mail/imap/messages.py b/mail/src/leap/mail/imap/messages.py index 35c07f5..7617fb8 100644 --- a/mail/src/leap/mail/imap/messages.py +++ b/mail/src/leap/mail/imap/messages.py @@ -335,16 +335,18 @@ class LeapMessage(fields, MailParser, MBoxParser):              charset = find_charset(content_type)              logger.debug('got charset from content-type: %s' % charset)              if charset is None: -                # XXX change for find_charset utility                  charset = self._get_charset(body)              try:                  body = body.encode(charset)              except UnicodeError as exc: -                logger.error("Unicode error {0}".format(exc)) +                logger.error( +                    "Unicode error, using 'replace'. {0!r}".format(exc))                  logger.debug("Attempted to encode with: %s" % charset)                  try:                      body = body.encode(charset, 'replace') -                except UnicodeError as exc: + +                # XXX desperate attempt. I've seen things you wouldn't believe +                except UnicodeError:                      try:                          body = body.encode('utf-8', 'replace')                      except: | 
