diff options
author | Folker Bernitt <fbernitt@thoughtworks.com> | 2015-10-29 12:15:54 +0100 |
---|---|---|
committer | Folker Bernitt <fbernitt@thoughtworks.com> | 2015-10-29 12:17:39 +0100 |
commit | 3220f53ff8a23de958db69f188fba95c52fd39a6 (patch) | |
tree | 313ce9ea4395abae1d4c50ad0c06c434bb75dba7 /service/pixelated | |
parent | 8a2916859811c6e9e272723fe60fb336e7c1fdcb (diff) |
Filter too short input when generating mails
- Subjects less than 3 words caused problems with markov chain
Diffstat (limited to 'service/pixelated')
-rw-r--r-- | service/pixelated/support/mail_generator.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/service/pixelated/support/mail_generator.py b/service/pixelated/support/mail_generator.py index af8dd4cc..e5232370 100644 --- a/service/pixelated/support/mail_generator.py +++ b/service/pixelated/support/mail_generator.py @@ -77,6 +77,10 @@ def search_for_tags(content): return map(lambda tag: tag[0], potential_tags) +def filter_too_short_texts(texts): + return [text for text in texts if text is not None and len(text.split()) >= 3] + + def load_all_mails(mail_list): subjects = set() mail_bodies = [] @@ -91,7 +95,7 @@ def load_all_mails(mail_list): else: raise Exception(mail.get_content_type()) - return subjects, mail_bodies + return filter_too_short_texts(subjects), filter_too_short_texts(mail_bodies) class MailGenerator(object): |