summaryrefslogtreecommitdiff
path: root/src/leap/mail/imap/parser.py
blob: 306dcf0a30ee24ab881f5a5d38a4e6452d0f73db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
# parser.py
# Copyright (C) 2013 LEAP
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
Mail parser mixins.
"""
import cStringIO
import StringIO
import hashlib
import re

from email.message import Message
from email.parser import Parser

from leap.common.check import leap_assert_type


class MailParser(object):
    """
    Mixin with utility methods to parse raw messages.
    """
    def __init__(self):
        """
        Initializes the mail parser.
        """
        self._parser = Parser()

    def _get_parsed_msg(self, raw, headersonly=False):
        """
        Return a parsed Message.

        :param raw: the raw string to parse
        :type raw: basestring, or StringIO object

        :param headersonly: True for parsing only the headers.
        :type headersonly: bool
        """
        msg = self._get_parser_fun(raw)(raw, headersonly=headersonly)
        return msg

    def _get_hash(self, msg):
        """
        Returns a hash of the string representation of the raw message,
        suitable for indexing the inmutable pieces.

        :param msg: a Message object
        :type msg: Message
        """
        leap_assert_type(msg, Message)
        return hashlib.sha256(msg.as_string()).hexdigest()

    def _get_parser_fun(self, o):
        """
        Retunn the proper parser function for an object.

        :param o: object
        :type o: object
        :param parser: an instance of email.parser.Parser
        :type parser: email.parser.Parser
        """
        if isinstance(o, (cStringIO.OutputType, StringIO.StringIO)):
            return self._parser.parse
        if isinstance(o, basestring):
            return self._parser.parsestr
        # fallback
        return self._parser.parsestr

    def _stringify(self, o):
        """
        Return a string object.

        :param o: object
        :type o: object
        """
        # XXX Maybe we don't need no more, we're using
        # msg.as_string()
        if isinstance(o, (cStringIO.OutputType, StringIO.StringIO)):
            return o.getvalue()
        else:
            return o


class MBoxParser(object):
    """
    Utility function to parse mailbox names.
    """
    INBOX_NAME = "INBOX"
    INBOX_RE = re.compile(INBOX_NAME, re.IGNORECASE)

    def _parse_mailbox_name(self, name):
        """
        :param name: the name of the mailbox
        :type name: unicode

        :rtype: unicode
        """
        if self.INBOX_RE.match(name):
            # ensure inital INBOX is uppercase
            return self.INBOX_NAME + name[len(self.INBOX_NAME):]
        return name