From 3220f53ff8a23de958db69f188fba95c52fd39a6 Mon Sep 17 00:00:00 2001 From: Folker Bernitt Date: Thu, 29 Oct 2015 12:15:54 +0100 Subject: Filter too short input when generating mails - Subjects less than 3 words caused problems with markov chain --- service/pixelated/support/mail_generator.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'service/pixelated/support/mail_generator.py') diff --git a/service/pixelated/support/mail_generator.py b/service/pixelated/support/mail_generator.py index af8dd4cc..e5232370 100644 --- a/service/pixelated/support/mail_generator.py +++ b/service/pixelated/support/mail_generator.py @@ -77,6 +77,10 @@ def search_for_tags(content): return map(lambda tag: tag[0], potential_tags) +def filter_too_short_texts(texts): + return [text for text in texts if text is not None and len(text.split()) >= 3] + + def load_all_mails(mail_list): subjects = set() mail_bodies = [] @@ -91,7 +95,7 @@ def load_all_mails(mail_list): else: raise Exception(mail.get_content_type()) - return subjects, mail_bodies + return filter_too_short_texts(subjects), filter_too_short_texts(mail_bodies) class MailGenerator(object): -- cgit v1.2.3