#!/bin/sh
# spamsources - a bash script to identify where your spam is coming from based
# on your mail logs
# Author: Bob Apthorpe <apthorpe@cynistar.net>

# Bugs/To-Do:
# - Find a better location for temp files than /tmp
# - Use command-line arguments to return only a list of IP addresses
# - Add Exim, Sendmail, and qmail patterns in addition to Postfix.
# - Do this in one pass with perl and Parse::Syslog

##### Adjust these to match your site #####

# Where are your binaries?
RM=/bin/rm
GREP=/usr/bin/grep
SED=/usr/bin/sed
MKTEMP=/bin/mktemp
SORT=/usr/bin/sort
UNIQ=/usr/bin/uniq

# Where is your mail log?
MAILLOG=/var/log/mail

##### Actual guts of code #####

##### Pass #1 #####

# Find all the pids of spamd processes that identified spam and store them in a
# file after escaping the square brackets around the pid (ex: 'spamd\[5733\]')

TMP_SPAMD_PIDS=`$MKTEMP -q /tmp/spamsources.XXXXXX`
if [ $? -ne 0 ]; then
	echo "$0: Can't create temp file, exiting..."
	exit 1
fi

$GREP 'identified spam' $MAILLOG | $SED "s/^.*spamd\[\([0-9]*\)\].*/spamd\\\[\1\\\]/" > $TMP_SPAMD_PIDS

##### Pass #2 #####

# Find all message-ids of spam identified by spamd
# (ex. 'message-id=<llsa--03iev4@p9jbyo1>')

TMP_MESSAGE_IDS=`$MKTEMP -q /tmp/spamsources.XXXXXX`
if [ $? -ne 0 ]; then
	echo "$0: Can't create temp file, exiting..."
	exit 1
fi

$GREP -f $TMP_SPAMD_PIDS $MAILLOG | $GREP 'processing message' | $SED "s/^[^<]*\(<[^>]*>\).*/message-id=\1/" > $TMP_MESSAGE_IDS

$RM $TMP_SPAMD_PIDS

##### Pass #3 #####

# Postfix-specific! Find all Postfix ids of spam message-ids.
# (ex. 'CFBB674F87')

TMP_POSTFIX_IDS=`$MKTEMP -q /tmp/spamsources.XXXXXX`
if [ $? -ne 0 ]; then
	echo "$0: Can't create temp file, exiting..."
	exit 1
fi

$GREP -f $TMP_MESSAGE_IDS $MAILLOG | $SED "s/^.*: \([A-Z0-9]*\): message-id=.*/\1/" > $TMP_POSTFIX_IDS

$RM $TMP_MESSAGE_IDS

##### Pass #4 #####

# Postfix-specific! Find the name and IP address of the hosts passing spam to
# Postfix, sorted by number of spams sent from each host. 
# (ex. '      1 D40A2138.rev.stofanet.dk[212.10.33.56]')
# The leading number is the amount of spam from this host.
#
# Pipe this output through `| sed 's/^[^\[]*\[//;s/\].*//'` to give just the IP
# addresses, suitable for firewalling.

$GREP -f $TMP_POSTFIX_IDS $MAILLOG | $GREP smtpd | $SED "s/^.*client=//" | $SORT | $UNIQ -c | $SORT -rn
$RM $TMP_POSTFIX_IDS

# __END__
