diff options
author | Tomás Touceda <chiiph@leap.se> | 2014-01-09 00:22:21 -0300 |
---|---|---|
committer | Tomás Touceda <chiiph@leap.se> | 2014-01-09 00:22:21 -0300 |
commit | c2cccbcd4225049b35ea2c7be4bbe899532136ca (patch) | |
tree | d3ec191d5605d2803535048c84b7b9cec3cd227a /src/leap/mail/imap/parser.py | |
parent | 3bccd3323a36f58357d68465c07595b6807e9015 (diff) | |
parent | 3bd8a7b669ac490b8f8e6c33550963c37069e383 (diff) |
Merge remote-tracking branch 'refs/remotes/kali/feature/split_message_docs' into develop
Diffstat (limited to 'src/leap/mail/imap/parser.py')
-rw-r--r-- | src/leap/mail/imap/parser.py | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/src/leap/mail/imap/parser.py b/src/leap/mail/imap/parser.py new file mode 100644 index 0000000..306dcf0 --- /dev/null +++ b/src/leap/mail/imap/parser.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# parser.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Mail parser mixins. +""" +import cStringIO +import StringIO +import hashlib +import re + +from email.message import Message +from email.parser import Parser + +from leap.common.check import leap_assert_type + + +class MailParser(object): + """ + Mixin with utility methods to parse raw messages. + """ + def __init__(self): + """ + Initializes the mail parser. + """ + self._parser = Parser() + + def _get_parsed_msg(self, raw, headersonly=False): + """ + Return a parsed Message. + + :param raw: the raw string to parse + :type raw: basestring, or StringIO object + + :param headersonly: True for parsing only the headers. + :type headersonly: bool + """ + msg = self._get_parser_fun(raw)(raw, headersonly=headersonly) + return msg + + def _get_hash(self, msg): + """ + Returns a hash of the string representation of the raw message, + suitable for indexing the inmutable pieces. + + :param msg: a Message object + :type msg: Message + """ + leap_assert_type(msg, Message) + return hashlib.sha256(msg.as_string()).hexdigest() + + def _get_parser_fun(self, o): + """ + Retunn the proper parser function for an object. + + :param o: object + :type o: object + :param parser: an instance of email.parser.Parser + :type parser: email.parser.Parser + """ + if isinstance(o, (cStringIO.OutputType, StringIO.StringIO)): + return self._parser.parse + if isinstance(o, basestring): + return self._parser.parsestr + # fallback + return self._parser.parsestr + + def _stringify(self, o): + """ + Return a string object. + + :param o: object + :type o: object + """ + # XXX Maybe we don't need no more, we're using + # msg.as_string() + if isinstance(o, (cStringIO.OutputType, StringIO.StringIO)): + return o.getvalue() + else: + return o + + +class MBoxParser(object): + """ + Utility function to parse mailbox names. + """ + INBOX_NAME = "INBOX" + INBOX_RE = re.compile(INBOX_NAME, re.IGNORECASE) + + def _parse_mailbox_name(self, name): + """ + :param name: the name of the mailbox + :type name: unicode + + :rtype: unicode + """ + if self.INBOX_RE.match(name): + # ensure inital INBOX is uppercase + return self.INBOX_NAME + name[len(self.INBOX_NAME):] + return name |