diff options
Diffstat (limited to 'bin/parse-email-logs')
-rwxr-xr-x | bin/parse-email-logs | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/bin/parse-email-logs b/bin/parse-email-logs new file mode 100755 index 0000000..2a24261 --- /dev/null +++ b/bin/parse-email-logs @@ -0,0 +1,170 @@ +#!/usr/bin/ruby + +require_relative '../config/initializer' + +class Message < ActiveRecord::Base + self.inheritance_column = 'disabled' +end + +$input = nil + +def parse_command_line + if ARGV.grep(/-h/).any? + usage + end + if ARGV[0] && File.exist?(ARGV[0]) + $input = File.open(ARGV[0]) + else + $input = ARGF + end +end + +def usage + puts "USAGE: " + puts " option 1: parse-email-logs [LOGFILE]" + puts " option 2: cat log | parse-email-logs" + exit(0) +end + +def parse_timestamp(str) + # e.g. May 20 20:17:14 + DateTime.strptime(str, "%b %d %H:%M:%S") +end + +def hash_addresses(str) + str.split(',').map {|address| + address = address.sub(/<.*>/, '') + address.split('@').map {|segment| + segment #Digest::HMAC.hexdigest(segment, CONFIG['secret'], Digest::MD5) + }.join('@') + }.join(',') +end + +def get_message(queue_id, timestamp) + msg = Message.find_or_create_by(queue_id: queue_id) + if msg.first_seen_at.nil? + putc '.'; STDOUT.flush() + msg.first_seen_at = parse_timestamp(timestamp) + msg.save + end + return msg +end + +# +# if we see this, then it was incoming: "relay=0.0.0.0[0.0.0.0]:25" +# +def do_sent(m, ts, matches) + if m.recipient.nil? + if matches['relay'] == "relay=0.0.0.0[0.0.0.0]:25" + m.is_outgoing = false + else + m.is_outgoing = true + end + if m.is_outgoing? + m.sent_at = m.first_seen_at + m.received_at = parse_timestamp(ts) + else + # sent_at will be set by the 'Date' header + m.received_at = m.first_seen_at + end + m.recipient = hash_addresses(matches["to"]) + m.orig_to = hash_addresses(matches["orig_to"]) if matches["orig_to"] + m.save + else + + end +end + +# +# save the message size and the envelope sender +# +def do_queue(m, ts, matches) + return if m.size != nil + m.size = matches['size'].to_i + m.sender = hash_addresses(matches['from']) + m.save +end + +# +# save the message id +# +def do_message_id(m, ts, matches, line) + return if m.message_id + m_id = matches['message_id'].gsub(/[<>]/,'').strip + m.message_id = hash_addresses(m_id) + m.save +end + +# +# the message was rejected, likely because milter scan thinks it is a virus. +# so we remove the record from the database +# +def do_purge_message(m, ts, matches) + m.destroy +end + +def do_error(line) + puts "ERROR: unmatched line!" + puts " " + line +end + +LINE_PARSE_MAP = { + 'postfix/smtp' => { + /to=<(?<to>.*?)>, (orig_to=<(?<orig_to>.*?)>, )?(?<relay>relay=.*?),.*status=sent/ => method(:do_sent), + /status=sent/ => :error + }, + 'postfix/qmgr' => { + /from=<(?<from>.*)>, size=(?<size>\d+),.*\(queue active\)/ => method(:do_queue), + /\(queue active\)/ => :error + }, + 'postfix/cleanup' => { + /message-id=(?<message_id>.*)$/ => method(:do_message_id), + /milter-reject/ => method(:do_purge_message), + // => :error + } +} + +def process_line(line) + splits = line.split(' ') + timestamp = splits[0..2].join(' ') + daemon = splits[4].split('[').first + queue_id = splits[5].sub(':', '') + message = splits[6..-1].join(' ') + LINE_PARSE_MAP.fetch(daemon, {}).each do |re, method| + match = re.match(line) + next unless match + if method == :error + do_error(line) + elsif !method.nil? + msg = get_message(queue_id, timestamp) + if method.arity == 3 + method.call(msg, timestamp, match) + else + method.call(msg, timestamp, match, line) + end + end + break + end +end + +def main + parse_command_line + start_time = Time.now + start_msg = Message.count + line_count = 0 + Message.transaction do + $input.each_line do |line| + process_line(line) + line_count += 1 + end + end + end_time = Time.now + end_msg = Message.count + puts + puts "FINISHED" + puts " Time: %s minutes" % ((end_time - start_time).to_i / 60) + puts "Records: %s" % (end_msg - start_msg) + puts " Lines: %s" % line_count +end + +main()
\ No newline at end of file |