summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKali Kaneko <kali@leap.se>2014-01-28 10:24:04 -0400
committerKali Kaneko <kali@leap.se>2014-01-28 19:38:46 -0400
commitf096368cfbc49caab52811ae50388aae74272a1a (patch)
treecb410ba33e2ed86af4067613b56d03091d739b10
parentf5365ae0c2edb8b3e879f876f2f7e42b25f4616a (diff)
fix find_charset rebase
-rw-r--r--src/leap/mail/imap/messageparts.py16
-rw-r--r--src/leap/mail/imap/messages.py8
2 files changed, 11 insertions, 13 deletions
diff --git a/src/leap/mail/imap/messageparts.py b/src/leap/mail/imap/messageparts.py
index 6d8631a..10672ed 100644
--- a/src/leap/mail/imap/messageparts.py
+++ b/src/leap/mail/imap/messageparts.py
@@ -32,7 +32,7 @@ from leap.common.decorators import memoized_method
from leap.common.mail import get_email_charset
from leap.mail.imap import interfaces
from leap.mail.imap.fields import fields
-from leap.mail.utils import empty, first
+from leap.mail.utils import empty, first, find_charset
MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id")
@@ -40,10 +40,6 @@ MessagePartType = Enum("hdoc", "fdoc", "cdoc", "cdocs", "docs_id")
logger = logging.getLogger(__name__)
-# XXX not needed anymoar ...
-CHARSET_PATTERN = r"""charset=([\w-]+)"""
-CHARSET_RE = re.compile(CHARSET_PATTERN, re.IGNORECASE)
-
"""
A MessagePartDoc is a light wrapper around the dictionary-like
data that we pass along for message parts. It can be used almost everywhere
@@ -363,17 +359,17 @@ class MessagePart(object):
payload = str("")
if payload:
- # XXX use find_charset instead --------------------------
- # bad rebase???
content_type = self._get_ctype_from_document(phash)
- charset = first(CHARSET_RE.findall(content_type))
+ charset = find_charset(content_type)
logger.debug("Got charset from header: %s" % (charset,))
- if not charset:
+ if charset is None:
charset = self._get_charset(payload)
+ logger.debug("Got charset: %s" % (charset,))
try:
payload = payload.encode(charset)
except UnicodeError as exc:
- logger.error("Unicode error {0}".format(exc))
+ logger.error(
+ "Unicode error, using 'replace'. {0!r}".format(exc))
payload = payload.encode(charset, 'replace')
fd.write(payload)
diff --git a/src/leap/mail/imap/messages.py b/src/leap/mail/imap/messages.py
index 35c07f5..7617fb8 100644
--- a/src/leap/mail/imap/messages.py
+++ b/src/leap/mail/imap/messages.py
@@ -335,16 +335,18 @@ class LeapMessage(fields, MailParser, MBoxParser):
charset = find_charset(content_type)
logger.debug('got charset from content-type: %s' % charset)
if charset is None:
- # XXX change for find_charset utility
charset = self._get_charset(body)
try:
body = body.encode(charset)
except UnicodeError as exc:
- logger.error("Unicode error {0}".format(exc))
+ logger.error(
+ "Unicode error, using 'replace'. {0!r}".format(exc))
logger.debug("Attempted to encode with: %s" % charset)
try:
body = body.encode(charset, 'replace')
- except UnicodeError as exc:
+
+ # XXX desperate attempt. I've seen things you wouldn't believe
+ except UnicodeError:
try:
body = body.encode('utf-8', 'replace')
except: