summaryrefslogtreecommitdiff
path: root/bin/parse-email-logs
diff options
context:
space:
mode:
Diffstat (limited to 'bin/parse-email-logs')
-rwxr-xr-xbin/parse-email-logs130
1 files changed, 112 insertions, 18 deletions
diff --git a/bin/parse-email-logs b/bin/parse-email-logs
index 2a24261..b0fd824 100755
--- a/bin/parse-email-logs
+++ b/bin/parse-email-logs
@@ -1,4 +1,4 @@
-#!/usr/bin/ruby
+#!/usr/bin/env ruby
require_relative '../config/initializer'
@@ -9,14 +9,23 @@ end
$input = nil
def parse_command_line
- if ARGV.grep(/-h/).any?
- usage
- end
- if ARGV[0] && File.exist?(ARGV[0])
- $input = File.open(ARGV[0])
- else
- $input = ARGF
+ while ARGV.any? do
+ case ARGV[0]
+ when '-h' then
+ usage
+ when '--help' then
+ usage
+ when '--debug' then
+ require 'byebug'
+ ARGV.shift
+ else
+ if ARGV[0] && File.exist?(ARGV[0])
+ $input = File.open(ARGV[0])
+ end
+ ARGV.shift
+ end
end
+ $input ||= ARGF
end
def usage
@@ -33,9 +42,9 @@ end
def hash_addresses(str)
str.split(',').map {|address|
- address = address.sub(/<.*>/, '')
+ address = address.sub(/.*</, '').sub(/[>\"\']/, '')
address.split('@').map {|segment|
- segment #Digest::HMAC.hexdigest(segment, CONFIG['secret'], Digest::MD5)
+ Digest::HMAC.hexdigest(segment, CONFIG['secret'], Digest::MD5)
}.join('@')
}.join(',')
end
@@ -51,9 +60,16 @@ def get_message(queue_id, timestamp)
end
#
-# if we see this, then it was incoming: "relay=0.0.0.0[0.0.0.0]:25"
+# if we see this:
+# relay=0.0.0.0[0.0.0.0]:25
+# then the message was an incoming message
#
-def do_sent(m, ts, matches)
+# if the queue id is the same, but the recipient is different
+# then duplicate the record.
+#
+def do_sent(m, ts, matches, line)
+ recipient = hash_addresses(matches["to"])
+ return if m.recipient == recipient
if m.recipient.nil?
if matches['relay'] == "relay=0.0.0.0[0.0.0.0]:25"
m.is_outgoing = false
@@ -64,18 +80,38 @@ def do_sent(m, ts, matches)
m.sent_at = m.first_seen_at
m.received_at = parse_timestamp(ts)
else
- # sent_at will be set by the 'Date' header
+ m.sent_at = m.date
m.received_at = m.first_seen_at
end
- m.recipient = hash_addresses(matches["to"])
+ m.recipient = recipient
m.orig_to = hash_addresses(matches["orig_to"]) if matches["orig_to"]
+ m.delay = matches['delay'].to_f
+ m.delays = matches['delays']
+ m.status = "sent"
m.save
else
-
+ new_m = m.dup
+ new_m.received_at = parse_timestamp(ts)
+ new_m.recipient = recipient
+ new_m.orig_to = hash_addresses(matches["orig_to"]) if matches["orig_to"]
+ new_m.delay = matches['delay'].to_f
+ new_m.delays = matches['delays']
+ new_m.status = "sent"
+ new_m.save
end
end
#
+# for status=x where x != sent
+#
+def do_status(m, ts, matches)
+ m.delay = matches['delay'].to_f
+ m.delays = matches['delays']
+ m.status = matches['status']
+ m.save
+end
+
+#
# save the message size and the envelope sender
#
def do_queue(m, ts, matches)
@@ -96,6 +132,48 @@ def do_message_id(m, ts, matches, line)
end
#
+# headers
+#
+def do_subject(m, ts, matches)
+ m.subject_length = matches['subject'].length
+ m.save
+end
+def do_date(m, ts, matches)
+ m.date = matches['date']
+ m.save
+end
+def do_from(m, ts, matches)
+ m.from = hash_addresses(matches['from'])
+ m.save
+end
+def do_to(m, ts, matches)
+ m.to = hash_addresses(matches['to'])
+ m.save
+end
+def do_cc(m, ts, matches)
+ m.cc = hash_addresses(matches['cc'])
+ m.save
+end
+def do_bcc(m, ts, matches)
+ m.bcc = hash_addresses(matches['bcc'])
+ m.save
+end
+def do_list(m, ts, matches)
+ m.is_list = true
+ m.save
+end
+def do_in_reply_to(m, ts, matches)
+ m.re_message_id = hash_addresses(matches['in-reply-to'])
+ m.save
+end
+def do_precedence(m, ts, matches)
+ if matches['precedence'] =~ /(bulk|list)/i
+ m.is_list = true
+ m.save
+ end
+end
+
+#
# the message was rejected, likely because milter scan thinks it is a virus.
# so we remove the record from the database
#
@@ -104,14 +182,19 @@ def do_purge_message(m, ts, matches)
end
def do_error(line)
+ puts
puts "ERROR: unmatched line!"
- puts " " + line
+ puts line
+ puts
end
+FROM_HOST = /(local|[A-Za-z0-9\.\-]+\[0.0.0.0\]);/
+
LINE_PARSE_MAP = {
'postfix/smtp' => {
- /to=<(?<to>.*?)>, (orig_to=<(?<orig_to>.*?)>, )?(?<relay>relay=.*?),.*status=sent/ => method(:do_sent),
- /status=sent/ => :error
+ /to=<(?<to>.*?)>, (orig_to=<(?<orig_to>.*?)>, )?(?<relay>relay=.*?), delay=(?<delay>[0-9\.]+), delays=(?<delays>[0-9\.\/]+), .*status=sent/ => method(:do_sent),
+ /delay=(?<delay>[0-9\.]+), delays=(?<delays>[0-9\.\/]+), .*status=(?<status>[a-z]+) / => method(:do_status),
+ /status=/ => :error
},
'postfix/qmgr' => {
/from=<(?<from>.*)>, size=(?<size>\d+),.*\(queue active\)/ => method(:do_queue),
@@ -119,6 +202,17 @@ LINE_PARSE_MAP = {
},
'postfix/cleanup' => {
/message-id=(?<message_id>.*)$/ => method(:do_message_id),
+ /info: header Subject: (?<subject>.*) from #{FROM_HOST}/i => method(:do_subject),
+ /info: header From: (?<from>.*) from #{FROM_HOST}/i => method(:do_from),
+ /info: header To: (?<to>.*) from #{FROM_HOST}/i => method(:do_to),
+ /info: header Cc: (?<cc>.*) from #{FROM_HOST}/i => method(:do_cc),
+ /info: header Bcc: (?<bcc>.*) from #{FROM_HOST}/i => method(:do_bcc),
+ /info: header In-Reply-To: (?<in-reply-to>.*) from #{FROM_HOST}/i => method(:do_in_reply_to),
+ /info: header Precedence: (?<precedence>.*) from #{FROM_HOST}/i => method(:do_precedence),
+ /info: header List-ID/i => method(:do_list),
+ /info: header Mailing-list/i => method(:do_list),
+ /info: header Date: (?<date>.*) from #{FROM_HOST}/i => method(:do_date),
+ /info: header / => :error,
/milter-reject/ => method(:do_purge_message),
// => :error
}