diff options
author | Folker Bernitt <fbernitt@thoughtworks.com> | 2015-10-12 10:52:14 +0200 |
---|---|---|
committer | Folker Bernitt <fbernitt@thoughtworks.com> | 2015-10-12 10:54:00 +0200 |
commit | 048418454631066426e53078740c009e729fd8ae (patch) | |
tree | bdbc12fe9d8c4f5c3d6e667be37e7e945bba6bc0 /service/pixelated/adapter | |
parent | 94fd15faa52a0b789ff1e705e5a8cdab4d279560 (diff) |
Log problem when ignoring characters
- Issue #473
- Previous fix only fixes symptoms, but we want
to be aware that this problem still exists
Diffstat (limited to 'service/pixelated/adapter')
-rw-r--r-- | service/pixelated/adapter/mailstore/body_parser.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/service/pixelated/adapter/mailstore/body_parser.py b/service/pixelated/adapter/mailstore/body_parser.py index 2193b8e8..a6017833 100644 --- a/service/pixelated/adapter/mailstore/body_parser.py +++ b/service/pixelated/adapter/mailstore/body_parser.py @@ -16,6 +16,9 @@ from email.parser import Parser import re +import logging + +logger = logging.getLogger(__name__) def _parse_charset_header(content_type_and_charset_header, default_charset='us-ascii'): @@ -56,6 +59,10 @@ class BodyParser(object): text += u'\n' encoded_text = text.encode(charset) if isinstance(self._content, unicode): - return encoded_text + self._content.encode(charset, 'ignore') + try: + return encoded_text + self._content.encode(charset) + except UnicodeError, e: + logger.warn('Failed to encode content for charset %s. Ignoring invalid chars: %s' % (charset, e)) + return encoded_text + self._content.encode(charset, 'ignore') else: return encoded_text + self._content |