diff options
author | Folker Bernitt <fbernitt@thoughtworks.com> | 2014-10-08 18:06:28 +0200 |
---|---|---|
committer | Folker Bernitt <fbernitt@thoughtworks.com> | 2014-10-09 18:24:41 +0200 |
commit | 85264c9ab3052dd844016d3ec44cb8af43dd5869 (patch) | |
tree | d6a5cb6511039407fdef40804c5338347b71c551 /service/pixelated | |
parent | fb8782d3f66b1068153a9948a01864ee52e39b10 (diff) |
Added whoosh as search engine (issue #94).
Diffstat (limited to 'service/pixelated')
-rw-r--r-- | service/pixelated/adapter/draft_service.py | 19 | ||||
-rw-r--r-- | service/pixelated/adapter/mail_service.py | 26 | ||||
-rw-r--r-- | service/pixelated/adapter/pixelated_mailbox.py | 8 | ||||
-rw-r--r-- | service/pixelated/adapter/pixelated_mailboxes.py | 18 | ||||
-rw-r--r-- | service/pixelated/adapter/search.py | 71 | ||||
-rw-r--r-- | service/pixelated/adapter/soledad_querier.py | 10 | ||||
-rw-r--r-- | service/pixelated/search_query.py | 58 | ||||
-rw-r--r-- | service/pixelated/user_agent.py | 37 |
8 files changed, 136 insertions, 111 deletions
diff --git a/service/pixelated/adapter/draft_service.py b/service/pixelated/adapter/draft_service.py new file mode 100644 index 00000000..d9d6b12f --- /dev/null +++ b/service/pixelated/adapter/draft_service.py @@ -0,0 +1,19 @@ + + +class DraftService(object): + __slots__ = '_mailboxes' + + def __init__(self, mailboxes): + self._mailboxes = mailboxes + + def create_draft(self, input_mail): + self._drafts().add(input_mail) + return input_mail + + def update_draft(self, ident, input_mail): + new_mail = self.create_draft(input_mail) + self._drafts().remove(ident) + return new_mail + + def _drafts(self): + return self._mailboxes.drafts() diff --git a/service/pixelated/adapter/mail_service.py b/service/pixelated/adapter/mail_service.py index 23afb41b..ad494ac8 100644 --- a/service/pixelated/adapter/mail_service.py +++ b/service/pixelated/adapter/mail_service.py @@ -20,18 +20,18 @@ from pixelated.adapter.soledad_querier import SoledadQuerier class MailService: __slots__ = ['leap_session', 'account', 'mailbox_name'] - ALL_MAILS_QUERY = {'tags': ['all']} - def __init__(self, mailboxes, mail_sender, tag_service=TagService.get_instance()): self.tag_service = tag_service self.mailboxes = mailboxes self.querier = SoledadQuerier.get_instance() self.mail_sender = mail_sender - self.tag_service.load_index(self.mails(MailService.ALL_MAILS_QUERY)) + self.tag_service.load_index(self.all_mails()) + + def all_mails(self): + return self.querier.all_mails() - def mails(self, query): - _mails = self.mailboxes.mails_by_tag(query['tags']) if query['tags'] else self.querier.all_mails() - return sorted(_mails or [], key=lambda mail: mail.headers['Date'], reverse=True) + def mails(self, ids): + return self.querier.mails(ids) def update_tags(self, mail_id, new_tags): reserved_words = self.tag_service.extract_reserved(new_tags) @@ -46,19 +46,7 @@ class MailService: def send(self, last_draft_ident, mail): self.mail_sender.sendmail(mail) self.mailboxes.drafts().remove(last_draft_ident) - self.mailboxes.sent().add(mail) - - def create_draft(self, mail): - return self.mailboxes.add_draft(mail) - - def update_draft(self, ident, new_version): - return self.mailboxes.update_draft(ident, new_version) - - def send_draft(self, mail): - pass - - def all_tags(self): - return self.tag_service.all_tags() + return self.mailboxes.sent().add(mail) def thread(self, thread_id): raise NotImplementedError() diff --git a/service/pixelated/adapter/pixelated_mailbox.py b/service/pixelated/adapter/pixelated_mailbox.py index e8c063e8..0b88e07d 100644 --- a/service/pixelated/adapter/pixelated_mailbox.py +++ b/service/pixelated/adapter/pixelated_mailbox.py @@ -45,13 +45,7 @@ class PixelatedMailbox: return message def add(self, mail): - self.querier.create_mail(mail, self.mailbox_name) - - def add_existing(self, mail_ident): - mail = self.querier.mail(mail_ident) - mail.remove_all_tags() - mail.set_mailbox(self.mailbox_name) - mail.save() + return self.querier.create_mail(mail, self.mailbox_name) def remove(self, ident): mail = self.querier.mail(ident) diff --git a/service/pixelated/adapter/pixelated_mailboxes.py b/service/pixelated/adapter/pixelated_mailboxes.py index aa39a5c4..c87b7ab3 100644 --- a/service/pixelated/adapter/pixelated_mailboxes.py +++ b/service/pixelated/adapter/pixelated_mailboxes.py @@ -14,12 +14,14 @@ # You should have received a copy of the GNU Affero General Public License # along with Pixelated. If not, see <http://www.gnu.org/licenses/>. from pixelated.adapter.pixelated_mailbox import PixelatedMailbox +from pixelated.adapter.soledad_querier import SoledadQuerier class PixelatedMailBoxes(): def __init__(self, account): self.account = account + self.querier = SoledadQuerier.get_instance() def _create_or_get(self, mailbox_name): mailbox_name = mailbox_name.upper() @@ -51,18 +53,12 @@ class PixelatedMailBoxes(): return mails - def add_draft(self, mail): - self.drafts().add(mail) - return mail - - def update_draft(self, ident, new_version): - new_mail = self.add_draft(new_version) - self.drafts().remove(ident) - return new_mail - def move_to_trash(self, mail_id): - new_mail_id = self.trash().add_existing(mail_id) - return new_mail_id + mail = self.querier.mail(mail_id) + mail.remove_all_tags() + mail.set_mailbox(self.trash().mailbox_name) + mail.save() + return mail def mail(self, mail_id): for mailbox in self.mailboxes: diff --git a/service/pixelated/adapter/search.py b/service/pixelated/adapter/search.py new file mode 100644 index 00000000..6c64cc40 --- /dev/null +++ b/service/pixelated/adapter/search.py @@ -0,0 +1,71 @@ +from whoosh.fields import * +from whoosh.filedb.filestore import RamStorage +from whoosh.qparser import QueryParser + + +class SearchEngine(object): + __slots__ = '_index' + + def __init__(self): + self._index = self._create_index() + + def _mail_schema(self): + return Schema( + ident=ID(stored=True, unique=True), + sender=ID(stored=False), + to=ID(stored=False), + cc=ID(stored=False), + bcc=ID(stored=False), + subject=TEXT(stored=False), + body=TEXT(stored=False), + tag=KEYWORD(stored=False, commas=True)) + + def _create_index(self): + return RamStorage().create_index(self._mail_schema(), indexname='mails') + + def index_mail(self, mail): + writer = self._index.writer() + self._index_mail(writer, mail) + writer.commit() + + def _index_mail(self, writer, mail): + mdict = mail.as_dict() + header = mdict['header'] + tags = mdict.get('tags', []) + tags.append(mail.mailbox_name.lower()) + index_data = { + 'sender': unicode(header.get('from', '')), + 'subject': unicode(header.get('subject', '')), + 'to': unicode(header.get('to', '')), + 'cc': unicode(header.get('cc', '')), + 'bcc': unicode(header.get('bcc', '')), + 'tag': u','.join(tags), + 'body': unicode(mdict['body']), + 'ident': unicode(mdict['ident']) + } + + writer.update_document(**index_data) + + def index_mails(self, mails): + writer = self._index.writer() + try: + for mail in mails: + self._index_mail(writer, mail) + finally: + writer.commit() + + def search(self, query): + query = query.replace('\"', '') + query = query.replace('-in:', 'AND NOT tag:') + query = query.replace('in:all', '*') + with self._index.searcher() as searcher: + query = QueryParser('body', self._index.schema).parse(query) + results = searcher.search(query) + return [mail['ident'] for mail in results] + + def remove_from_index(self, mail_id): + writer = self._index.writer() + try: + writer.delete_by_term('ident', mail_id) + finally: + writer.commit() diff --git a/service/pixelated/adapter/soledad_querier.py b/service/pixelated/adapter/soledad_querier.py index ab8f6941..43b9891e 100644 --- a/service/pixelated/adapter/soledad_querier.py +++ b/service/pixelated/adapter/soledad_querier.py @@ -47,11 +47,12 @@ class SoledadQuerier: def all_mails_by_mailbox(self, mailbox_name): fdocs_chash = [(fdoc, fdoc.content['chash']) for fdoc in self.soledad.get_from_index('by-type-and-mbox', 'flags', mailbox_name)] - if len(fdocs_chash) == 0: - return [] return self._build_mails_from_fdocs(fdocs_chash) def _build_mails_from_fdocs(self, fdocs_chash): + if len(fdocs_chash) == 0: + return [] + fdocs_hdocs = [(f[0], self.soledad.get_from_index('by-type-and-contenthash', 'head', f[1])[0]) for f in fdocs_chash] fdocs_hdocs_phash = [(f[0], f[1], f[1].content.get('body')) for f in fdocs_hdocs] fdocs_hdocs_bdocs = [(f[0], f[1], self.soledad.get_from_index('by-type-and-payloadhash', 'cnt', f[2])[0]) for f in fdocs_hdocs_phash] @@ -67,6 +68,7 @@ class SoledadQuerier: uid = self._next_uid_for_mailbox(mailbox_name) new_docs = [self.soledad.create_doc(doc) for doc in mail._get_for_save(next_uid=uid, mailbox=mailbox_name)] self._update_index(new_docs) + return self.mail(mail.ident) def mail(self, ident): fdoc = self.soledad.get_from_index('by-type-and-contenthash', 'flags', ident)[0] @@ -75,6 +77,10 @@ class SoledadQuerier: return PixelatedMail.from_soledad(fdoc, hdoc, bdoc, soledad_querier=self) + def mails(self, idents): + fdocs_chash = [(self.soledad.get_from_index('by-type-and-contenthash', 'flags', ident)[0], ident) for ident in idents] + return self._build_mails_from_fdocs(fdocs_chash) + def remove_mail(self, mail): _mail = self.mail(mail.ident) # FIX-ME: Must go through all the part_map phash to delete all the cdocs diff --git a/service/pixelated/search_query.py b/service/pixelated/search_query.py deleted file mode 100644 index 278e1f2f..00000000 --- a/service/pixelated/search_query.py +++ /dev/null @@ -1,58 +0,0 @@ -# -# Copyright (c) 2014 ThoughtWorks, Inc. -# -# Pixelated is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Pixelated is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with Pixelated. If not, see <http://www.gnu.org/licenses/>. -from scanner import StringScanner, StringRegexp -import re - - -def compile(query): - compiled = {"tags": [], "not_tags": []} - sanitized_query = re.sub(r"['\"]", "", query.encode('utf8')) - scanner = StringScanner(sanitized_query) - first_token = True - - while not scanner.is_eos: - token = scanner.scan(_next_token()) - - if not token: - scanner.skip(_separators()) - continue - - if ":" in token: - compiled = _compile_tag(compiled, token) - elif first_token: - compiled["general"] = token - - if not first_token: - first_token = True - - return compiled - - -def _next_token(): - return StringRegexp('[^\s]+') - - -def _separators(): - return StringRegexp('[\s&]+') - - -def _compile_tag(compiled, token): - tag = token.split(":").pop() - if token[0] == "-": - compiled["not_tags"].append(tag) - else: - compiled["tags"].append(tag) - return compiled diff --git a/service/pixelated/user_agent.py b/service/pixelated/user_agent.py index 03372f23..1bab0281 100644 --- a/service/pixelated/user_agent.py +++ b/service/pixelated/user_agent.py @@ -27,7 +27,6 @@ from flask import Response from pixelated.adapter.pixelated_mail_sender import PixelatedMailSender from pixelated.adapter.pixelated_mailboxes import PixelatedMailBoxes import pixelated.reactor_manager as reactor_manager -import pixelated.search_query as search_query import pixelated.bitmask_libraries.session as LeapSession from pixelated.bitmask_libraries.config import LeapConfig from pixelated.bitmask_libraries.provider import LeapProvider @@ -35,7 +34,9 @@ from pixelated.bitmask_libraries.auth import LeapAuthenticator, LeapCredentials from pixelated.adapter.mail_service import MailService from pixelated.adapter.pixelated_mail import PixelatedMail, InputMail from pixelated.adapter.soledad_querier import SoledadQuerier - +from pixelated.adapter.search import SearchEngine +from pixelated.adapter.tag_service import TagService +from pixelated.adapter.draft_service import DraftService static_folder = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "web-ui", "app")) @@ -70,9 +71,12 @@ def send_mail(): _mail = InputMail.from_dict(request.json) draft_id = request.json.get('ident') if draft_id: - mail_service.send(draft_id, _mail) + _mail = mail_service.send(draft_id, _mail) + search_engine.index_mail(_mail) + search_engine.remove_from_index(draft_id) else: - _mail = mail_service.create_draft(_mail) + _mail = draft_service.create_draft(_mail) + search_engine.index_mail(mail_service.mail(_mail.ident)) return respond_json(_mail.as_dict()) except Exception as error: return respond_json({'message': '\n'.join(list(error.args))}, status_code=500) @@ -81,19 +85,17 @@ def send_mail(): @app.route('/mails', methods=['PUT']) def update_draft(): _mail = InputMail.from_dict(request.json) - new_revision = mail_service.update_draft(request.json['ident'], _mail) + new_revision = draft_service.update_draft(request.json['ident'], _mail) ident = new_revision.ident + search_engine.index_mail(mail_service.mail(ident)) + search_engine.remove_from_index(request.json['ident']) return respond_json({'ident': ident}) @app.route('/mails') def mails(): - query = search_query.compile(request.args.get("q")) if request.args.get("q") else {'tags': {}} - - mails = mail_service.mails(query) - - if "inbox" in query['tags']: - mails = [mail for mail in mails if not mail.has_tag('trash')] + mail_ids = search_engine.search(request.args.get('q')) + mails = mail_service.mails(mail_ids) response = { "stats": { @@ -110,7 +112,8 @@ def mails(): @app.route('/mail/<mail_id>', methods=['DELETE']) def delete_mail(mail_id): - mail_service.delete_mail(mail_id) + trashed_mail = mail_service.delete_mail(mail_id) + search_engine.index_mail(trashed_mail) return respond_json(None) @@ -124,8 +127,6 @@ def delete_mails(): @app.route('/tags') def tags(): - tag_service = mail_service.tag_service - query = request.args.get('q') skipDefaultTags = request.args.get('skipDefaultTags') @@ -150,6 +151,7 @@ def mail_tags(mail_id): new_tags = map(lambda tag: tag.lower(), request.get_json()['newtags']) try: tags = mail_service.update_tags(mail_id, new_tags) + search_engine.index_mail(mail_service.mail(mail_id)) except ValueError as ve: return respond_json(ve.message, 403) return respond_json(list(tags)) @@ -197,6 +199,13 @@ def start_user_agent(debug_enabled): global mail_service mail_service = MailService(pixelated_mailboxes, pixelated_mail_sender) + global search_engine + search_engine = SearchEngine() + search_engine.index_mails(mail_service.all_mails()) + global draft_service + draft_service = DraftService(pixelated_mailboxes) + global tag_service + tag_service = TagService.get_instance() app.run(host=app.config['HOST'], debug=debug_enabled, port=app.config['PORT'], use_reloader=False) |