summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFolker Bernitt <fbernitt@thoughtworks.com>2015-10-12 10:52:14 +0200
committerFolker Bernitt <fbernitt@thoughtworks.com>2015-10-12 10:54:00 +0200
commit048418454631066426e53078740c009e729fd8ae (patch)
treebdbc12fe9d8c4f5c3d6e667be37e7e945bba6bc0
parent94fd15faa52a0b789ff1e705e5a8cdab4d279560 (diff)
Log problem when ignoring characters
- Issue #473 - Previous fix only fixes symptoms, but we want to be aware that this problem still exists
-rw-r--r--service/pixelated/adapter/mailstore/body_parser.py9
-rw-r--r--service/test/unit/adapter/mailstore/test_body_parser.py9
2 files changed, 17 insertions, 1 deletions
diff --git a/service/pixelated/adapter/mailstore/body_parser.py b/service/pixelated/adapter/mailstore/body_parser.py
index 2193b8e8..a6017833 100644
--- a/service/pixelated/adapter/mailstore/body_parser.py
+++ b/service/pixelated/adapter/mailstore/body_parser.py
@@ -16,6 +16,9 @@
from email.parser import Parser
import re
+import logging
+
+logger = logging.getLogger(__name__)
def _parse_charset_header(content_type_and_charset_header, default_charset='us-ascii'):
@@ -56,6 +59,10 @@ class BodyParser(object):
text += u'\n'
encoded_text = text.encode(charset)
if isinstance(self._content, unicode):
- return encoded_text + self._content.encode(charset, 'ignore')
+ try:
+ return encoded_text + self._content.encode(charset)
+ except UnicodeError, e:
+ logger.warn('Failed to encode content for charset %s. Ignoring invalid chars: %s' % (charset, e))
+ return encoded_text + self._content.encode(charset, 'ignore')
else:
return encoded_text + self._content
diff --git a/service/test/unit/adapter/mailstore/test_body_parser.py b/service/test/unit/adapter/mailstore/test_body_parser.py
index 3c2d17fb..9d58637c 100644
--- a/service/test/unit/adapter/mailstore/test_body_parser.py
+++ b/service/test/unit/adapter/mailstore/test_body_parser.py
@@ -15,6 +15,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with Pixelated. If not, see <http://www.gnu.org/licenses/>.
import unittest
+from mock import patch
from pixelated.adapter.mailstore.body_parser import BodyParser
@@ -46,3 +47,11 @@ class BodyParserTest(unittest.TestCase):
parser = BodyParser('dGVzdCB0ZXh0\n', content_type='text/plain', content_transfer_encoding='base64')
self.assertEqual('test text', parser.parsed_content())
+
+ @patch('pixelated.adapter.mailstore.body_parser.logger')
+ def test_body_parser_logs_problems_and_then_ignores_invalid_chars(self, logger_mock):
+ data = u'unkown char: \ufffd'
+ parser = BodyParser(data, content_type='text/plain; charset=iso-8859-1', content_transfer_encoding='8bit')
+
+ self.assertEqual(u'unkown char: ', parser.parsed_content())
+ logger_mock.warn.assert_called_with(u'Failed to encode content for charset iso-8859-1. Ignoring invalid chars: \'latin-1\' codec can\'t encode character u\'\\ufffd\' in position 13: ordinal not in range(256)')