diff options
Diffstat (limited to 'service/pixelated/adapter/mailstore/body_parser.py')
-rw-r--r-- | service/pixelated/adapter/mailstore/body_parser.py | 69 |
1 files changed, 0 insertions, 69 deletions
diff --git a/service/pixelated/adapter/mailstore/body_parser.py b/service/pixelated/adapter/mailstore/body_parser.py deleted file mode 100644 index 8cac75cf..00000000 --- a/service/pixelated/adapter/mailstore/body_parser.py +++ /dev/null @@ -1,69 +0,0 @@ -# -# Copyright (c) 2015 ThoughtWorks, Inc. -# -# Pixelated is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Pixelated is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with Pixelated. If not, see <http://www.gnu.org/licenses/>. - -import re -from email.parser import Parser - -from twisted.logger import Logger - -logger = Logger() - - -def _parse_charset_header(content_type_and_charset_header, default_charset='us-ascii'): - try: - return re.compile('.*charset="?([a-zA-Z0-9-]+)"?', re.MULTILINE | re.DOTALL).match(content_type_and_charset_header).group(1) - except: - return default_charset - - -class BodyParser(object): - - def __init__(self, content, content_type='text/plain; charset="us-ascii"', content_transfer_encoding=None, charset=None): - self._content = content - self._content_type = content_type - self._content_transfer_encoding = content_transfer_encoding - self._charset = charset - - def parsed_content(self): - charset = self._charset or _parse_charset_header(self._content_type) - text = self._serialize_for_parser(charset) - - decoded_body = self._parse_and_decode(text) - return unicode(decoded_body, charset, errors='replace') - - def _parse_and_decode(self, text): - parsed_body = Parser().parsestr(text) - decoded_body = self._unwrap_content_transfer_encoding(parsed_body) - return decoded_body - - def _unwrap_content_transfer_encoding(self, parsed_body): - return parsed_body.get_payload(decode=True) - - def _serialize_for_parser(self, charset): - text = u'Content-Type: %s\n' % self._content_type - if self._content_transfer_encoding is not None: - text += u'Content-Transfer-Encoding: %s\n' % self._content_transfer_encoding - - text += u'\n' - encoded_text = text.encode(charset) - if isinstance(self._content, unicode): - try: - return encoded_text + self._content.encode(charset) - except UnicodeError, e: - logger.warn('Failed to encode content for charset %s. Ignoring invalid chars: %s' % (charset, e)) - return encoded_text + self._content.encode(charset, 'ignore') - else: - return encoded_text + self._content |