From 91e4481c450eb7eb928debc1cb7fa59bdb63dd7b Mon Sep 17 00:00:00 2001 From: Kali Kaneko Date: Tue, 25 Jul 2017 11:40:11 -0400 Subject: [pkg] packaging and path changes - move all the pixelated python package under src/ - move the pixelated_www package under the leap namespace - allow to set globally the static folder - add hours and minutes to the timestamp in package version, to allow for several releases a day. --- .../src/pixelated/adapter/mailstore/body_parser.py | 69 ++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 service/src/pixelated/adapter/mailstore/body_parser.py (limited to 'service/src/pixelated/adapter/mailstore/body_parser.py') diff --git a/service/src/pixelated/adapter/mailstore/body_parser.py b/service/src/pixelated/adapter/mailstore/body_parser.py new file mode 100644 index 00000000..8cac75cf --- /dev/null +++ b/service/src/pixelated/adapter/mailstore/body_parser.py @@ -0,0 +1,69 @@ +# +# Copyright (c) 2015 ThoughtWorks, Inc. +# +# Pixelated is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pixelated is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Pixelated. If not, see . + +import re +from email.parser import Parser + +from twisted.logger import Logger + +logger = Logger() + + +def _parse_charset_header(content_type_and_charset_header, default_charset='us-ascii'): + try: + return re.compile('.*charset="?([a-zA-Z0-9-]+)"?', re.MULTILINE | re.DOTALL).match(content_type_and_charset_header).group(1) + except: + return default_charset + + +class BodyParser(object): + + def __init__(self, content, content_type='text/plain; charset="us-ascii"', content_transfer_encoding=None, charset=None): + self._content = content + self._content_type = content_type + self._content_transfer_encoding = content_transfer_encoding + self._charset = charset + + def parsed_content(self): + charset = self._charset or _parse_charset_header(self._content_type) + text = self._serialize_for_parser(charset) + + decoded_body = self._parse_and_decode(text) + return unicode(decoded_body, charset, errors='replace') + + def _parse_and_decode(self, text): + parsed_body = Parser().parsestr(text) + decoded_body = self._unwrap_content_transfer_encoding(parsed_body) + return decoded_body + + def _unwrap_content_transfer_encoding(self, parsed_body): + return parsed_body.get_payload(decode=True) + + def _serialize_for_parser(self, charset): + text = u'Content-Type: %s\n' % self._content_type + if self._content_transfer_encoding is not None: + text += u'Content-Transfer-Encoding: %s\n' % self._content_transfer_encoding + + text += u'\n' + encoded_text = text.encode(charset) + if isinstance(self._content, unicode): + try: + return encoded_text + self._content.encode(charset) + except UnicodeError, e: + logger.warn('Failed to encode content for charset %s. Ignoring invalid chars: %s' % (charset, e)) + return encoded_text + self._content.encode(charset, 'ignore') + else: + return encoded_text + self._content -- cgit v1.2.3