summaryrefslogtreecommitdiff
path: root/service/pixelated
diff options
context:
space:
mode:
authorFolker Bernitt <fbernitt@thoughtworks.com>2015-10-29 12:15:54 +0100
committerFolker Bernitt <fbernitt@thoughtworks.com>2015-10-29 12:17:39 +0100
commit3220f53ff8a23de958db69f188fba95c52fd39a6 (patch)
tree313ce9ea4395abae1d4c50ad0c06c434bb75dba7 /service/pixelated
parent8a2916859811c6e9e272723fe60fb336e7c1fdcb (diff)
Filter too short input when generating mails
- Subjects less than 3 words caused problems with markov chain
Diffstat (limited to 'service/pixelated')
-rw-r--r--service/pixelated/support/mail_generator.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/service/pixelated/support/mail_generator.py b/service/pixelated/support/mail_generator.py
index af8dd4cc..e5232370 100644
--- a/service/pixelated/support/mail_generator.py
+++ b/service/pixelated/support/mail_generator.py
@@ -77,6 +77,10 @@ def search_for_tags(content):
return map(lambda tag: tag[0], potential_tags)
+def filter_too_short_texts(texts):
+ return [text for text in texts if text is not None and len(text.split()) >= 3]
+
+
def load_all_mails(mail_list):
subjects = set()
mail_bodies = []
@@ -91,7 +95,7 @@ def load_all_mails(mail_list):
else:
raise Exception(mail.get_content_type())
- return subjects, mail_bodies
+ return filter_too_short_texts(subjects), filter_too_short_texts(mail_bodies)
class MailGenerator(object):