From f096368cfbc49caab52811ae50388aae74272a1a Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 28 Jan 2014 10:24:04 -0400 Subject: fix find_charset rebase --- src/leap/mail/imap/messageparts.py | 16 ++++++---------- src/leap/mail/imap/messages.py | 8 +++++--- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py index 6d8631a..10672ed 100644 --- a/src/leap/mail/imap/messageparts.py +++ b/src/leap/mail/imap/messageparts.py @@ -32,7 +32,7 @@ from leap.common.decorators import memoized_method from leap.common.mail import get_email_charset from leap.mail.imap import interfaces from leap.mail.imap.fields import fields -from leap.mail.utils import empty, first +from leap.mail.utils import empty, first, find_charset MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id") @@ -40,10 +40,6 @@ MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id") logger = logging.getLogger(__name__) -# XXX not needed anymoar ... -CHARSET_PATTERN = r"""charset=([\w-]+)""" -CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE) - """ A MessagePartDoc is a light wrapper around the dictionary-like data that we pass along for message parts. It can be used almost everywhere @@ -363,17 +359,17 @@ class MessagePart(object): payload = str("") if payload: - # XXX use find_charset instead -------------------------- - # bad rebase??? content_type = self._get_ctype_from_document(phash) - charset = first(CHARSET_RE.findall(content_type)) + charset = find_charset(content_type) logger.debug("Got charset from header: %s" % (charset,)) - if not charset: + if charset is None: charset = self._get_charset(payload) + logger.debug("Got charset: %s" % (charset,)) try: payload = payload.encode(charset) except UnicodeError as exc: - logger.error("Unicode error {0}".format(exc)) + logger.error( + "Unicode error, using 'replace'. {0!r}".format(exc)) payload = payload.encode(charset, 'replace') fd.write(payload) diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py index 35c07f5..7617fb8 100644 --- a/src/leap/mail/imap/messages.py +++ b/src/leap/mail/imap/messages.py @@ -335,16 +335,18 @@ class LeapMessage(fields, MailParser, MBoxParser): charset = find_charset(content_type) logger.debug('got charset from content-type: %s' % charset) if charset is None: - # XXX change for find_charset utility charset = self._get_charset(body) try: body = body.encode(charset) except UnicodeError as exc: - logger.error("Unicode error {0}".format(exc)) + logger.error( + "Unicode error, using 'replace'. {0!r}".format(exc)) logger.debug("Attempted to encode with: %s" % charset) try: body = body.encode(charset, 'replace') - except UnicodeError as exc: + + # XXX desperate attempt. I've seen things you wouldn't believe + except UnicodeError: try: body = body.encode('utf-8', 'replace') except: -- cgit v1.2.3