From 61971193bbaf7c7572a07cbb08add0c04be5ad55 Mon Sep 17 00:00:00 2001 From: Folker Bernitt Date: Tue, 13 Jan 2015 15:23:03 +0100 Subject: Fixed ignoring charset on mail content type when parsing mails. --- service/pixelated/adapter/model/mail.py | 20 ++++++++++++++------ service/test/unit/adapter/mail_test.py | 11 +++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'service') diff --git a/service/pixelated/adapter/model/mail.py b/service/pixelated/adapter/model/mail.py index 7984cb05..f1b7774c 100644 --- a/service/pixelated/adapter/model/mail.py +++ b/service/pixelated/adapter/model/mail.py @@ -76,10 +76,16 @@ class Mail(object): def _charset(self): if 'content_type' in self.headers and 'charset' in self.headers['content_type']: - return re.compile('.*charset=(.*)').match(self.headers['content_type']).group(1) + return self._parse_charset_heade(self.headers['content_type']) else: return 'utf-8' + def _parse_charset_header(self, charset_header, default_charset='utf-8'): + try: + return re.compile('.*charset=(.*)').match(charset_header).group(1) + except: + return default_charset + @property def raw(self): return self._mime_multipart.as_string() @@ -213,14 +219,16 @@ class PixelatedMail(Mail): def _decode_part(self, part): encoding = part['headers'].get('Content-Transfer-Encoding', '') + content_type = self._parse_charset_header(part['headers'].get('Content-Type')) decoding_map = { - 'quoted-printable': lambda content: unicode(content.decode('quopri')), - 'base64': lambda content: content.decode('base64').decode('utf-8') + 'quoted-printable': lambda content, content_type: unicode(content.decode('quopri'), content_type), + 'base64': lambda content, content_type: content.decode('base64').decode('utf-8') } if encoding: - return decoding_map[encoding](part['content']) - return part['content'] + return decoding_map[encoding](part['content'], content_type) + else: + return part['content'] @property def alternatives(self): @@ -228,7 +236,7 @@ class PixelatedMail(Mail): @property def text_plain_body(self): - if self.parts and len(self.alternatives) == 1: + if self.parts and len(self.alternatives) >= 1: return self._decode_part(self.alternatives[0]) else: return self.bdoc.content['raw'] # plain diff --git a/service/test/unit/adapter/mail_test.py b/service/test/unit/adapter/mail_test.py index 9dc54e66..be7b731d 100644 --- a/service/test/unit/adapter/mail_test.py +++ b/service/test/unit/adapter/mail_test.py @@ -171,6 +171,17 @@ class TestPixelatedMail(unittest.TestCase): self.assertRegexpMatches(mail.text_plain_body, '([\s\S]*100%)') self.assertRegexpMatches(mail.html_body, '([\s\S]*100%)') + def test_content_type_header_of_mail_part_is_used(self): + plain_headers = {'Content-Type': 'text/plain; charset=utf-8', 'Content-Transfer-Encoding': 'quoted-printable'} + html_headers = {'Content-Type': 'text/html; charset=utf-8', 'Content-Transfer-Encoding': 'quoted-printable'} + parts = {'alternatives': [{'content': 'H=C3=A4llo', 'headers': plain_headers}, {'content': '

H=C3=A4llo

', 'headers': html_headers}]} + + mail = PixelatedMail.from_soledad(None, None, self._create_bdoc(raw='some raw body'), parts=parts, soledad_querier=None) + + self.assertEqual(2, len(mail.alternatives)) + self.assertEquals(u'H\xe4llo', mail.text_plain_body) + self.assertEquals(u'

H\xe4llo

', mail.html_body) + def test_clean_line_breaks_on_address_headers(self): many_recipients = 'One ,\nTwo , Normal ,\nalone@mail.com' headers = {'Cc': many_recipients, -- cgit v1.2.3