From 61971193bbaf7c7572a07cbb08add0c04be5ad55 Mon Sep 17 00:00:00 2001
From: Folker Bernitt <fbernitt@thoughtworks.com>
Date: Tue, 13 Jan 2015 15:23:03 +0100
Subject: Fixed ignoring charset on mail content type when parsing mails.

---
 service/pixelated/adapter/model/mail.py | 20 ++++++++++++++------
 service/test/unit/adapter/mail_test.py  | 11 +++++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'service')
diff --git a/service/pixelated/adapter/model/mail.py b/service/pixelated/adapter/model/mail.py
index 7984cb05..f1b7774c 100644
--- a/service/pixelated/adapter/model/mail.py
+++ b/service/pixelated/adapter/model/mail.py
@@ -76,10 +76,16 @@ class Mail(object):
 
     def _charset(self):
         if 'content_type' in self.headers and 'charset' in self.headers['content_type']:
-            return re.compile('.*charset=(.*)').match(self.headers['content_type']).group(1)
+            return self._parse_charset_heade(self.headers['content_type'])
         else:
             return 'utf-8'
 
+    def _parse_charset_header(self, charset_header, default_charset='utf-8'):
+        try:
+            return re.compile('.*charset=(.*)').match(charset_header).group(1)
+        except:
+            return default_charset
+
     @property
     def raw(self):
         return self._mime_multipart.as_string()
@@ -213,14 +219,16 @@ class PixelatedMail(Mail):
 
     def _decode_part(self, part):
         encoding = part['headers'].get('Content-Transfer-Encoding', '')
+        content_type = self._parse_charset_header(part['headers'].get('Content-Type'))
 
         decoding_map = {
-            'quoted-printable': lambda content: unicode(content.decode('quopri')),
-            'base64': lambda content: content.decode('base64').decode('utf-8')
+            'quoted-printable': lambda content, content_type: unicode(content.decode('quopri'), content_type),
+            'base64': lambda content, content_type: content.decode('base64').decode('utf-8')
         }
         if encoding:
-            return decoding_map[encoding](part['content'])
-        return part['content']
+            return decoding_map[encoding](part['content'], content_type)
+        else:
+            return part['content']
 
     @property
     def alternatives(self):
@@ -228,7 +236,7 @@ class PixelatedMail(Mail):
 
     @property
     def text_plain_body(self):
-        if self.parts and len(self.alternatives) == 1:
+        if self.parts and len(self.alternatives) >= 1:
             return self._decode_part(self.alternatives[0])
         else:
             return self.bdoc.content['raw']  # plain
diff --git a/service/test/unit/adapter/mail_test.py b/service/test/unit/adapter/mail_test.py
index 9dc54e66..be7b731d 100644
--- a/service/test/unit/adapter/mail_test.py
+++ b/service/test/unit/adapter/mail_test.py
@@ -171,6 +171,17 @@ class TestPixelatedMail(unittest.TestCase):
         self.assertRegexpMatches(mail.text_plain_body, '([\s\S]*100%)')
         self.assertRegexpMatches(mail.html_body, '([\s\S]*100%)')
 
+    def test_content_type_header_of_mail_part_is_used(self):
+        plain_headers = {'Content-Type': 'text/plain; charset=utf-8', 'Content-Transfer-Encoding': 'quoted-printable'}
+        html_headers = {'Content-Type': 'text/html; charset=utf-8', 'Content-Transfer-Encoding': 'quoted-printable'}
+        parts = {'alternatives': [{'content': 'H=C3=A4llo', 'headers': plain_headers}, {'content': '<p>H=C3=A4llo</p>', 'headers': html_headers}]}
+
+        mail = PixelatedMail.from_soledad(None, None, self._create_bdoc(raw='some raw body'), parts=parts, soledad_querier=None)
+
+        self.assertEqual(2, len(mail.alternatives))
+        self.assertEquals(u'H\xe4llo', mail.text_plain_body)
+        self.assertEquals(u'<p>H\xe4llo</p>', mail.html_body)
+
     def test_clean_line_breaks_on_address_headers(self):
         many_recipients = 'One <one@mail.com>,\nTwo <two@mail.com>, Normal <normal@mail.com>,\nalone@mail.com'
         headers = {'Cc': many_recipients,
-- 
cgit v1.2.3