summaryrefslogtreecommitdiff
path: root/bin/parse-email-logs
diff options
context:
space:
mode:
Diffstat (limited to 'bin/parse-email-logs')
-rwxr-xr-xbin/parse-email-logs170
1 files changed, 170 insertions, 0 deletions
diff --git a/bin/parse-email-logs b/bin/parse-email-logs
new file mode 100755
index 0000000..2a24261
--- /dev/null
+++ b/bin/parse-email-logs
@@ -0,0 +1,170 @@
+#!/usr/bin/ruby
+
+require_relative '../config/initializer'
+
+class Message < ActiveRecord::Base
+ self.inheritance_column = 'disabled'
+end
+
+$input = nil
+
+def parse_command_line
+ if ARGV.grep(/-h/).any?
+ usage
+ end
+ if ARGV[0] && File.exist?(ARGV[0])
+ $input = File.open(ARGV[0])
+ else
+ $input = ARGF
+ end
+end
+
+def usage
+ puts "USAGE: "
+ puts " option 1: parse-email-logs [LOGFILE]"
+ puts " option 2: cat log | parse-email-logs"
+ exit(0)
+end
+
+def parse_timestamp(str)
+ # e.g. May 20 20:17:14
+ DateTime.strptime(str, "%b %d %H:%M:%S")
+end
+
+def hash_addresses(str)
+ str.split(',').map {|address|
+ address = address.sub(/<.*>/, '')
+ address.split('@').map {|segment|
+ segment #Digest::HMAC.hexdigest(segment, CONFIG['secret'], Digest::MD5)
+ }.join('@')
+ }.join(',')
+end
+
+def get_message(queue_id, timestamp)
+ msg = Message.find_or_create_by(queue_id: queue_id)
+ if msg.first_seen_at.nil?
+ putc '.'; STDOUT.flush()
+ msg.first_seen_at = parse_timestamp(timestamp)
+ msg.save
+ end
+ return msg
+end
+
+#
+# if we see this, then it was incoming: "relay=0.0.0.0[0.0.0.0]:25"
+#
+def do_sent(m, ts, matches)
+ if m.recipient.nil?
+ if matches['relay'] == "relay=0.0.0.0[0.0.0.0]:25"
+ m.is_outgoing = false
+ else
+ m.is_outgoing = true
+ end
+ if m.is_outgoing?
+ m.sent_at = m.first_seen_at
+ m.received_at = parse_timestamp(ts)
+ else
+ # sent_at will be set by the 'Date' header
+ m.received_at = m.first_seen_at
+ end
+ m.recipient = hash_addresses(matches["to"])
+ m.orig_to = hash_addresses(matches["orig_to"]) if matches["orig_to"]
+ m.save
+ else
+
+ end
+end
+
+#
+# save the message size and the envelope sender
+#
+def do_queue(m, ts, matches)
+ return if m.size != nil
+ m.size = matches['size'].to_i
+ m.sender = hash_addresses(matches['from'])
+ m.save
+end
+
+#
+# save the message id
+#
+def do_message_id(m, ts, matches, line)
+ return if m.message_id
+ m_id = matches['message_id'].gsub(/[<>]/,'').strip
+ m.message_id = hash_addresses(m_id)
+ m.save
+end
+
+#
+# the message was rejected, likely because milter scan thinks it is a virus.
+# so we remove the record from the database
+#
+def do_purge_message(m, ts, matches)
+ m.destroy
+end
+
+def do_error(line)
+ puts "ERROR: unmatched line!"
+ puts " " + line
+end
+
+LINE_PARSE_MAP = {
+ 'postfix/smtp' => {
+ /to=<(?<to>.*?)>, (orig_to=<(?<orig_to>.*?)>, )?(?<relay>relay=.*?),.*status=sent/ => method(:do_sent),
+ /status=sent/ => :error
+ },
+ 'postfix/qmgr' => {
+ /from=<(?<from>.*)>, size=(?<size>\d+),.*\(queue active\)/ => method(:do_queue),
+ /\(queue active\)/ => :error
+ },
+ 'postfix/cleanup' => {
+ /message-id=(?<message_id>.*)$/ => method(:do_message_id),
+ /milter-reject/ => method(:do_purge_message),
+ // => :error
+ }
+}
+
+def process_line(line)
+ splits = line.split(' ')
+ timestamp = splits[0..2].join(' ')
+ daemon = splits[4].split('[').first
+ queue_id = splits[5].sub(':', '')
+ message = splits[6..-1].join(' ')
+ LINE_PARSE_MAP.fetch(daemon, {}).each do |re, method|
+ match = re.match(line)
+ next unless match
+ if method == :error
+ do_error(line)
+ elsif !method.nil?
+ msg = get_message(queue_id, timestamp)
+ if method.arity == 3
+ method.call(msg, timestamp, match)
+ else
+ method.call(msg, timestamp, match, line)
+ end
+ end
+ break
+ end
+end
+
+def main
+ parse_command_line
+ start_time = Time.now
+ start_msg = Message.count
+ line_count = 0
+ Message.transaction do
+ $input.each_line do |line|
+ process_line(line)
+ line_count += 1
+ end
+ end
+ end_time = Time.now
+ end_msg = Message.count
+ puts
+ puts "FINISHED"
+ puts " Time: %s minutes" % ((end_time - start_time).to_i / 60)
+ puts "Records: %s" % (end_msg - start_msg)
+ puts " Lines: %s" % line_count
+end
+
+main() \ No newline at end of file