Spamassassin: Unterschied zwischen den Versionen
K (...) |
(spamreport) |
||
Zeile 298: | Zeile 298: | ||
= | = Spam-Report<br> = | ||
Plugin für SpamAssassin: FuzzyOcr (als Erweiterung von OcrPlugin). http://www.huschi.net/11_207_de.html<br> http://users.own-hero.net/~decoder/fuzzyocr/ | Script for collect (count) all spam and generate a spamreport via email (from http://www.linux-magazin.de/heft_abo/ausgaben/2007/01/muellsortierung), [http://www.linux-magazin.de/static/listings/magazin/2007/01/IMAP-Spamfilter/spamreport_verbesserte_Version.pl Listing]<br> | ||
<br> | |||
*spamreport.pl | |||
<source lang="perl">#!/usr/bin/perl -w | |||
# | |||
# Script for collect all spam and generate a spamreport | |||
# | |||
# Author: Matthias Jansen | |||
# Version: 0.2, 2007/01/05 | |||
use strict; | |||
use Mail::Sendmail; | |||
use HTML::Entities; | |||
use Fcntl ':flock'; # import LOCK_* constants | |||
# get all possible spam | |||
my @files = `/usr/bin/find /var/vpopmail/domains/ -mtime -1 -a -type f -a \\( -wholename '*Maildir/.Spam/cur/*' -o -wholename '*Maildir/.Spam/new/*' \\) | |||
-a -not -name '*T' -print`; | |||
my %spams = (); | |||
my $lastuser = ''; | |||
my $count = 0; | |||
# collect all the data | |||
for (my $i=0;$i<@files;$i++) { | |||
my($domain, $user) = $files[$i] =~ /\/var\/vpopmail\/domains\/([^\/]+)\/([^\/]+)\//; | |||
my $currentuser = $user."@".$domain; | |||
if ($lastuser ne $currentuser) { | |||
my @temp = (); | |||
$spams{$currentuser} = \@temp; | |||
$count = 0; | |||
} | |||
$lastuser = $currentuser; | |||
if (open(SP,"<".$files[$i])) { | |||
# lock the mail before reading | |||
flock(SP,LOCK_EX); | |||
my $subject = ''; | |||
my $from = ''; | |||
while(defined(my $line = <SP>) && (length($subject) == 0 || ($from eq ''))) { | |||
if ($line =~ /^Subject:\s*(.*)$/i) { $subject = $1; } | |||
elsif ($line =~ /^From:\s*(.*)$/i) { $from = $1; } | |||
} | |||
# encode HTML codes | |||
encode_entities($subject); | |||
encode_entities($from); | |||
my %spam = ('subject' => $subject, 'from' => $from); | |||
$spams{$currentuser}[$count] = \%spam; | |||
$count++; | |||
flock(SP,LOCK_UN); | |||
close(SP); | |||
} | |||
} | |||
# generate the spamreport now | |||
open(TPL,"</root/bin/spamreport.tpl"); | |||
my @a_tpl = <TPL>; | |||
close(TPL); | |||
my $tpl = join("",@a_tpl); | |||
while (my ($user,$data) = each(%spams)) { | |||
(my $text = $tpl) =~ s/###EMAIL###/$user/; | |||
my $spam_text = ''; | |||
for (my $i=0;$i<scalar @$data;$i++) { | |||
$spam_text.= '<tr><td>'.@$data[$i]->{'from'}.'</td><td>'.@$data[$i]->{'subject'}.'</td></tr>'; | |||
} | |||
$text =~ s/###SPAMTEXT###/$spam_text/; | |||
my %mail = ( To => $user, | |||
From => 'Spamreporter <spamreporter@domain.tld>', | |||
Message => $text, | |||
Server => '127.0.0.1', | |||
Subject => 'Spamreport', | |||
'Content-Type' => 'text/html; charset="utf8"' | |||
); | |||
sendmail(%mail); | |||
}</source><br> | |||
<br> | |||
*spamreport.tpl | |||
<source lang="html"><html> | |||
<head> | |||
</head> | |||
<body> | |||
<b>Spamreport von domain.tld für die Emailadresse ###EMAIL###</b> | |||
<br><br> | |||
Neuen Mails im Ordner Spam der letzten 24 Stunden<br><br> | |||
<table border=1 cellspacing=0 cellpadding=5> | |||
<tr><th>Absender</th><th>Betreff</th></tr> | |||
###SPAMTEXT### | |||
</table> | |||
<p>Sie können sich den Inhalt des Spam-Ordners über das <a href="https://domain.tld/mail/">Webinterface</a> anzeigen lassen.</p> | |||
</body> | |||
</html></source> | |||
<br> | |||
= Bilderkennung = | |||
Plugin für SpamAssassin: FuzzyOcr (als Erweiterung von OcrPlugin). http://www.huschi.net/11_207_de.html<br> | |||
*http://users.own-hero.net/~decoder/fuzzyocr/ | |||
*http://wiki.apache.org/spamassassin/FuzzyOcrPlugin | |||
= Statistiken = | = Statistiken = |
Version vom 12. Januar 2009, 00:53 Uhr
siehe auch SendMail
Konfig
/etc/mail/spamassassin/v310.pre
# DCC - perform DCC message checks. # # DCC is disabled here because it is not open source. See the DCC # license for more details. # loadplugin Mail::SpamAssassin::Plugin::DCC
Qmail/Vpopmail
# /etc/default/spamassassin # Duncan Findlay # WARNING: please read README.spamd before using. # There may be security risks. # Change to one to enable spamd ENABLED=1 # Options # See man spamd for possible options. The -d option is automatically added. # NOTE: version 3.0.x has switched to a "preforking" model, so you # need to make sure --max-children is not set to anything higher than # 5, unless you know what you're doing. OPTIONS="-u vpopmail -g vchkpw -v --create-prefs --max-children 5 --helper-home-dir" # Pid file # Where should spamd write its PID to file? If you use the -u or # --username option above, this needs to be writable by that user. # Otherwise, the init script will not be able to shut spamd down. PIDFILE="/var/run/spamd.pid" # Set nice level of spamd #NICE="--nicelevel 15"
Damit die user-prefs für vpopmail-accounts richtig geschrieben werden kann, muss spamd gepatched werden, da es sonst nicht geht (http://issues.apache.org/SpamAssassin/show_bug.cgi?id=4714): von http://issues.apache.org/SpamAssassin/attachment.cgi?id=3377
1750,1757c1750,1774 < $dir = `$vpopdir/bin/vuserinfo -d $username`; < if ($? != 0) { < # < # If vuserinfo failed $username could be an alias < # < $dir = `$vpopdir/bin/valias $username`; < if ($? == 0 && $dir !~ /.+ -> &/) { < $dir =~ s,.+ -> (/.+)/Maildir/,$1,; --- > my $itterations = 0; > my $max_itterations = 20; > my $found = 0; > while(!$found && ( $itterations < $max_itterations ) ) { > $itterations++; > $dir = `$vpopdir/bin/vuserinfo -d $username`; > if ($? != 0) { > # > # If vuserinfo failed $username could be an alias > # > $dir = `$vpopdir/bin/valias $username`; > chomp($dir); > if ($? == 0) { > $dir =~ s,.+ -> (.+),$1,; > $username = "$1"; > $username =~ s,&,,g; > > if ("$dir" =~ /\|/) { > $dir = $vpopdir; > $found = 1; > } > } elsif("$dir" eq "" || "$dir" eq "invalid domain, not in qmail assign file") { > $dir = $vpopdir; > $found = 1; > } 1759c1776 < undef($dir); --- > $found = 1; 1760a1778 > chomp($username);
Ham/Spam-Fütterung via Email
http://www.huschi.net/11_148_de.html
Jeder User hat seine eigene Bayes-Datenbank (sa-learn -u $sender --dbpath $userdir)
#/etc/logrotate.d/sa-learn /var/log/sa-learn.log { weekly delaycompress rotate 4 compress create 640 vpopmail vchkpw }
#!/usr/bin/perl -w
use strict;
use MIME::Tools;
use MIME::Parser;
my $DEBUG = 0;
my $LOGFILE = '/var/log/sa-learn.log';
my $UNPACK_DIR = '/var/spool/unpack';
my $SA_LEARN = '/usr/bin/sa-learn';
my @DOMAINS = qw/gtmp.org winnink.org/;
my ($spamham, $sender) = @ARGV;
my $sender2 = $ENV{SENDER};
my $domain = (split(/@/, $sender2))[1];
my $username = (split(/@/, $sender2))[0];
my $domdir = `/var/vpopmail/bin/vdominfo -d $domain`;
chomp($domdir);
my $userdir = "$domdir"."/$username"."/.spamassassin/";
sub recurs
{
my $ent = shift;
if ($ent->head->mime_type eq 'message/rfc822') {
if ($DEBUG) {
#unlink "/tmp/spam.log.$$" if -e "/tmp/spam.log.$$";
open(OUT, "|$SA_LEARN -D -u $sender2 --dbpath $userdir $spamham --single >>$LOGFILE 2>&1") or die "Cannot pipe $SA_LEARN: $!";
} else {
open(OUT, "|$SA_LEARN -u $sender2 --dbpath $userdir $spamham --single") or die "Cannot pipe $SA_LEARN: $!";
}
$ent->bodyhandle->print(\*OUT);
close(OUT);
return;
}
my @parts = $ent->parts;
if (@parts) {
map { recurs($_) } @parts;
}
}
#my ($domain) = $sender =~ /\@(.*)$/;
#unless (grep { $_ eq $domain } @DOMAINS) {
# die "I don't recognize your domain !";
#}
if ($DEBUG) {
MIME::Tools->debugging(1);
open(STDERR, '>>/tmp/spam_err.log');
}
my $parser = new MIME::Parser;
$parser->extract_nested_messages(0);
$parser->output_under($UNPACK_DIR);
my $entity;
eval {
$entity = $parser->parse(\*STDIN);
};
if ($@) {
die $@;
} else {
recurs($entity);
}
$parser->filer->purge;
rmdir $parser->output_dir;
Aktivierung über .qmail, z.b. /var/vpopmail/domains/fffff.tld/learn-spam/.qmail
|/var/qmail/bin/preline /usr/local/bin/sa-wrapper.pl --spam
und im qmail-send-log finden sich die Ergebnisse dazu.
@4000000045e9d8db229274cc new msg 635402 @4000000045e9d8db22927c9c info msg 635402: bytes 6395 from <user@domain.de> qp 6987 uid 64011 @4000000045e9d8db2380c884 starting delivery 1179: msg 635402 to local domain.eu-learn-spam@domain.eu @4000000045e9d8db2380d43c status: local 1/10 remote 0/20 @4000000045e9d8e10f48c5ec delivery 1179: success: Learned_tokens_from_0_message(s)_(1_message(s)_examined)/Learned_tokens_from_0_message(s)_(1_message(s)_examined)/did_0+0+1/ @4000000045e9d8e10f48d58c status: local 0/10 remote 0/20 @4000000045e9d8e10f48dd5c end msg 635402
Ham/Spam-Fütterung via IMAP-Ordner
als Vorlage diente http://www.pqpq.de/2006/01/28/sh-sa-learnsh/ bzw. http://katastrophos.net/andre/blog/2006/10/03/vpopmail-maildrop-sqwebmail-mailfilter-patch/
das Script sa-learn.sh wird jede Stunde aufgerufen und durchläuft jede User-Mailbox. Gibt es dort ein Ordner z.B. 'LearnSPAM' bzw. 'LearnHAM' mit E-Mail(s), so wird diese Mail 'trainiert' und danach gelöscht. Zusätzlich wird eine Info-Mail mit einer kurzen Anleitung erstellt.
#!/bin/sh
#conf
DATE=`date`
FILE_LASTSCAN="lastscan:2,S"
FILE_ATTENTION="attention:2,S"
SA_LEARN="/usr/bin/sa-learn"
base="/var/vpopmail/domains"
spam="Maildir/.Spam.LearnHAM"
ham="Maildir/.Spam.LearnSPAM"
FROM="sadmin@maildom.tld"
TO="sadmin@maildom.tdl"
function create_notices() {
cat > "$base/$domain/$user/$1/cur/$FILE_ATTENTION" << EOF
Return-Path: <$FROM>
Received: (qmail 4181 invoked by uid 89); 9 Apr 2006 13:26:51 -0000
Message-ID: <20040409132651.4180.qmail@maildom.tdl>
From: "Postmaster" <$FROM>
To: "Reporter" <$TO>
Subject: Wichtige Information ueber diesen Ordner ($DATE)
Date: Fri, 09 Apr 2007 15:26:51 +0200
Mime-Version: 1.0
Content-Type: text/plain; format=flowed; charset="utf-8"
Content-Transfer-Encoding: 7bit
Letzter Scan: $DATE
Lieber Benutzer,
die beiden Ordner LearnSPAM bzw. LearnHAM dienen der Verbesserung des SPAM-Filter.
E-Mails, die noch nicht als SPAM erkannt wurden, kommen in den Ordner LearnSPAM.
Versehentlich eingestufte E-Mail kommen in LearnHAM.
Beide Ordner werden stündlich durchsucht. Bei vorhandenen Emails wird der SPAM-Filter aufgerufen, um die Qualität des Filters zu verbessern.
Bitte beachten: Danach werden die E-Mails geloescht!
Bitte E-Mails NICHT in diesen Ordner VERSCHIEBEN, SONDERN KOPIEREN!
viele Grüsse
Ihr Postmaster
+++++++++++++++++++++ english ++++++++++++++++++++++++++++++
Last Scan: $DATE
Dear user,
eMails in this folder are reported to our Spam-database on a regular basis.
Please be aware that all emails in this folder are deleted after they have been successfully scanned.
So in your own interest:
Please DO NOT MOVE mistakenly classified emails to this folder, BUT COPY them here.
Kind regards,
Your Postmaster
EOF
}
function remove_notices() {
#rm "$base/$domain/$user/$1/cur/$FILE_LASTSCAN" &> /dev/null
rm "$base/$domain/$user/$1/cur/$FILE_ATTENTION" &> /dev/null
}
for domain in `ls $base`; do
if [ -d "$base/$domain" ]; then
#echo "$domain"
for user in `ls $base/$domain`; do
if [ -d "$base/$domain/$user" ]; then
username="$user@$domain"
TO=$username
#echo " $user"
# SpamReport directory found..
if [ -d "$base/$domain/$user/$spam" ]; then
if [ `ls $base/$domain/$user/$spam/cur/|wc -l` != 1 -o `ls $base/$domain/$user/$spam/new/|wc -l` != 0 ]; then
remove_notices $spam
#echo " Spam directory found, running sa-learn for $username..."
$SA_LEARN -u $username --dbpath $base/$domain/$user/ --sync --spam $base/$domain/$user/$spam/cur/*
$SA_LEARN -u $username --dbpath $base/$domain/$user/ --sync --spam $base/$domain/$user/$spam/new/*
rm $base/$domain/$user/$spam/cur/* >/dev/null 2>&1
rm $base/$domain/$user/$spam/new/* >/dev/null 2>&1
create_notices $spam
fi
fi
# SpamFalse directory found..
if [ -d "$base/$domain/$user/$ham" ]; then
if [ `ls $base/$domain/$user/$ham/cur/|wc -l` != 1 -o `ls $base/$domain/$user/$ham/new/|wc -l` != 0 ]; then
remove_notices $ham
#echo " Ham directory found, running sa-learn for $username..."
$SA_LEARN -u $username --dbpath $base/$domain/$user/ --ham --sync $base/$domain/$user/$ham/cur/*
$SA_LEARN -u $username --dbpath $base/$domain/$user/ --ham --sync $base/$domain/$user/$ham/new/*
rm $base/$domain/$user/$ham/cur/* >/dev/null 2>&1
rm $base/$domain/$user/$ham/new/* >/dev/null 2>&1
create_notices $ham
fi
fi
fi
done
fi
done
Spam-Report
Script for collect (count) all spam and generate a spamreport via email (from http://www.linux-magazin.de/heft_abo/ausgaben/2007/01/muellsortierung), Listing
- spamreport.pl
#!/usr/bin/perl -w
#
# Script for collect all spam and generate a spamreport
#
# Author: Matthias Jansen
# Version: 0.2, 2007/01/05
use strict;
use Mail::Sendmail;
use HTML::Entities;
use Fcntl ':flock'; # import LOCK_* constants
# get all possible spam
my @files = `/usr/bin/find /var/vpopmail/domains/ -mtime -1 -a -type f -a \\( -wholename '*Maildir/.Spam/cur/*' -o -wholename '*Maildir/.Spam/new/*' \\)
-a -not -name '*T' -print`;
my %spams = ();
my $lastuser = '';
my $count = 0;
# collect all the data
for (my $i=0;$i<@files;$i++) {
my($domain, $user) = $files[$i] =~ /\/var\/vpopmail\/domains\/([^\/]+)\/([^\/]+)\//;
my $currentuser = $user."@".$domain;
if ($lastuser ne $currentuser) {
my @temp = ();
$spams{$currentuser} = \@temp;
$count = 0;
}
$lastuser = $currentuser;
if (open(SP,"<".$files[$i])) {
# lock the mail before reading
flock(SP,LOCK_EX);
my $subject = '';
my $from = '';
while(defined(my $line = <SP>) && (length($subject) == 0 || ($from eq ''))) {
if ($line =~ /^Subject:\s*(.*)$/i) { $subject = $1; }
elsif ($line =~ /^From:\s*(.*)$/i) { $from = $1; }
}
# encode HTML codes
encode_entities($subject);
encode_entities($from);
my %spam = ('subject' => $subject, 'from' => $from);
$spams{$currentuser}[$count] = \%spam;
$count++;
flock(SP,LOCK_UN);
close(SP);
}
}
# generate the spamreport now
open(TPL,"</root/bin/spamreport.tpl");
my @a_tpl = <TPL>;
close(TPL);
my $tpl = join("",@a_tpl);
while (my ($user,$data) = each(%spams)) {
(my $text = $tpl) =~ s/###EMAIL###/$user/;
my $spam_text = '';
for (my $i=0;$i<scalar @$data;$i++) {
$spam_text.= '<tr><td>'.@$data[$i]->{'from'}.'</td><td>'.@$data[$i]->{'subject'}.'</td></tr>';
}
$text =~ s/###SPAMTEXT###/$spam_text/;
my %mail = ( To => $user,
From => 'Spamreporter <spamreporter@domain.tld>',
Message => $text,
Server => '127.0.0.1',
Subject => 'Spamreport',
'Content-Type' => 'text/html; charset="utf8"'
);
sendmail(%mail);
}
- spamreport.tpl
<html>
<head>
</head>
<body>
<b>Spamreport von domain.tld für die Emailadresse ###EMAIL###</b>
<br><br>
Neuen Mails im Ordner Spam der letzten 24 Stunden<br><br>
<table border=1 cellspacing=0 cellpadding=5>
<tr><th>Absender</th><th>Betreff</th></tr>
###SPAMTEXT###
</table>
<p>Sie können sich den Inhalt des Spam-Ordners über das <a href="https://domain.tld/mail/">Webinterface</a> anzeigen lassen.</p>
</body>
</html>
Bilderkennung
Plugin für SpamAssassin: FuzzyOcr (als Erweiterung von OcrPlugin). http://www.huschi.net/11_207_de.html
Statistiken
http://www.rulesemporium.com/programs/sa-stats-1.0.txt
http://software.inl.fr/trac/trac.cgi/wiki/EdenMail/SpamStats?