From 5c5500f91a520ee363c8e553718a5a0e763257e5 Mon Sep 17 00:00:00 2001 From: Folker Bernitt Date: Tue, 1 Sep 2015 10:21:42 +0200 Subject: Decode encoded mail headers to unicode - Issue #446 --- service/pixelated/adapter/mailstore/leap_mailstore.py | 12 +++++++++++- service/pixelated/adapter/search/__init__.py | 10 +++++----- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'service/pixelated') diff --git a/service/pixelated/adapter/mailstore/leap_mailstore.py b/service/pixelated/adapter/mailstore/leap_mailstore.py index c18748b6..73ce543f 100644 --- a/service/pixelated/adapter/mailstore/leap_mailstore.py +++ b/service/pixelated/adapter/mailstore/leap_mailstore.py @@ -88,9 +88,19 @@ class LeapMail(Mail): return result + def _decoded_header_utf_8(self, header_value): + if isinstance(header_value, list): + return [self._decoded_header_utf_8(v) for v in header_value] + else: + content, encoding = decode_header(header_value)[0] + if encoding: + return unicode(content, encoding=encoding) + else: + return unicode(content, encoding='ascii') + def as_dict(self): return { - 'header': {k.lower(): v for k, v in self.headers.items()}, + 'header': {k.lower(): self._decoded_header_utf_8(v) for k, v in self.headers.items()}, 'ident': self._mail_id, 'tags': self.tags, 'status': list(self.status), diff --git a/service/pixelated/adapter/search/__init__.py b/service/pixelated/adapter/search/__init__.py index 46691926..84e69a9a 100644 --- a/service/pixelated/adapter/search/__init__.py +++ b/service/pixelated/adapter/search/__init__.py @@ -126,8 +126,8 @@ class SearchEngine(object): bounced = mail.bounced if mail.bounced else [''] index_data = { - 'sender': self._unicode_header_field(header.get('from', '')), - 'subject': self._unicode_header_field(header.get('subject', '')), + 'sender': self._empty_string_to_none(header.get('from', '')), + 'subject': self._empty_string_to_none(header.get('subject', '')), 'date': milliseconds(header.get('date', '')), 'to': self._format_recipient(header, 'to'), 'cc': self._format_recipient(header, 'cc'), @@ -146,11 +146,11 @@ class SearchEngine(object): list = headers.get(name, ['']) return u','.join(list) if list else u'' - def _unicode_header_field(self, field_value): + def _empty_string_to_none(self, field_value): if not field_value: return None - - return unicode(field_value.decode('utf-8')) + else: + return field_value def index_mails(self, mails, callback=None): try: -- cgit v1.2.3