Spamassassin: Unterschied zwischen den Versionen

Aus crazylinux.de
Zur Navigation springen Zur Suche springen
K (...)
(spamreport)
Zeile 298: Zeile 298:




= Bilderkennung =
= Spam-Report<br>  =


Plugin für SpamAssassin: FuzzyOcr (als Erweiterung von OcrPlugin). http://www.huschi.net/11_207_de.html<br> http://users.own-hero.net/~decoder/fuzzyocr/<br> http://wiki.apache.org/spamassassin/FuzzyOcrPlugin  
Script for collect (count) all spam and generate a spamreport via email (from http://www.linux-magazin.de/heft_abo/ausgaben/2007/01/muellsortierung), [http://www.linux-magazin.de/static/listings/magazin/2007/01/IMAP-Spamfilter/spamreport_verbesserte_Version.pl Listing]<br>
 
<br>
 
*spamreport.pl
 
<source lang="perl">#!/usr/bin/perl -w                                                                                                                                   
#                                                                                                                                                   
# Script for collect all spam and generate a spamreport                                                                                             
#                                                                                                                                                   
# Author: Matthias Jansen                                                                                                                           
# Version: 0.2, 2007/01/05                                                                                                                           
 
use strict;
use Mail::Sendmail;
use HTML::Entities;
use Fcntl ':flock'; # import LOCK_* constants
 
# get all possible spam
my @files = `/usr/bin/find /var/vpopmail/domains/ -mtime -1 -a -type f -a \\( -wholename '*Maildir/.Spam/cur/*' -o -wholename '*Maildir/.Spam/new/*' \\)
-a -not -name '*T' -print`;                                                                                                                           
 
my %spams = ();
my $lastuser = '';
my $count = 0;   
 
# collect all the data
for (my $i=0;$i<@files;$i++) {
 
        my($domain, $user) = $files[$i] =~ /\/var\/vpopmail\/domains\/([^\/]+)\/([^\/]+)\//;
 
        my $currentuser = $user."@".$domain;
 
        if ($lastuser ne $currentuser) {
                my @temp = ();         
                $spams{$currentuser} = \@temp;
                $count = 0;                 
        }                                   
 
        $lastuser = $currentuser;
        if (open(SP,"<".$files[$i])) {
                # lock the mail before reading
                flock(SP,LOCK_EX);
                my $subject = '';
                my $from = '';
 
                while(defined(my $line = <SP>) && (length($subject) == 0 || ($from eq ''))) {
                        if ($line =~ /^Subject:\s*(.*)$/i) { $subject = $1; }
                        elsif ($line =~ /^From:\s*(.*)$/i) { $from = $1; }
                }
                # encode HTML codes
                encode_entities($subject);
                encode_entities($from);
 
                my %spam = ('subject' => $subject, 'from' => $from);
                $spams{$currentuser}[$count] = \%spam;
                $count++;
 
                flock(SP,LOCK_UN);
                close(SP);
        }
}
 
 
# generate the spamreport now
 
open(TPL,"</root/bin/spamreport.tpl");
my @a_tpl = <TPL>;
close(TPL);
my $tpl = join("",@a_tpl);
 
while (my ($user,$data) = each(%spams)) {
        (my $text = $tpl) =~ s/###EMAIL###/$user/;
        my $spam_text = '';
        for (my $i=0;$i<scalar @$data;$i++) {
                $spam_text.= '<tr><td>'.@$data[$i]->{'from'}.'</td><td>'.@$data[$i]->{'subject'}.'</td></tr>';
        }
        $text =~ s/###SPAMTEXT###/$spam_text/;
 
        my %mail = ( To    => $user,
                From    => 'Spamreporter <spamreporter@domain.tld>',
                Message => $text,
                Server  => '127.0.0.1',
                Subject => 'Spamreport',
                'Content-Type' => 'text/html; charset="utf8"'
        );
 
        sendmail(%mail);
}</source><br>
 
<br>
 
*spamreport.tpl
 
<source lang="html"><html>
<head>
</head>
<body>
<b>Spamreport von domain.tld für die Emailadresse ###EMAIL###</b>
<br><br>
Neuen Mails im Ordner Spam der letzten 24 Stunden<br><br>
<table border=1 cellspacing=0 cellpadding=5>
<tr><th>Absender</th><th>Betreff</th></tr>
###SPAMTEXT###
</table>
<p>Sie k&ouml;nnen sich den Inhalt des Spam-Ordners &uuml;ber das <a href="https://domain.tld/mail/">Webinterface</a> anzeigen lassen.</p>
</body>
</html></source>
 
<br>
 
= Bilderkennung  =
 
Plugin für SpamAssassin: FuzzyOcr (als Erweiterung von OcrPlugin). http://www.huschi.net/11_207_de.html<br>  
 
*http://users.own-hero.net/~decoder/fuzzyocr/  
*http://wiki.apache.org/spamassassin/FuzzyOcrPlugin


= Statistiken =
= Statistiken =

Version vom 12. Januar 2009, 00:53 Uhr

siehe auch SendMail

Konfig

/etc/mail/spamassassin/v310.pre

# DCC - perform DCC message checks.
#
# DCC is disabled here because it is not open source.  See the DCC
# license for more details.
#
loadplugin Mail::SpamAssassin::Plugin::DCC


Qmail/Vpopmail

# /etc/default/spamassassin
# Duncan Findlay

# WARNING: please read README.spamd before using.
# There may be security risks.

# Change to one to enable spamd
ENABLED=1

# Options
# See man spamd for possible options. The -d option is automatically added.

# NOTE: version 3.0.x has switched to a "preforking" model, so you
# need to make sure --max-children is not set to anything higher than
# 5, unless you know what you're doing.

OPTIONS="-u vpopmail -g vchkpw -v --create-prefs --max-children 5 --helper-home-dir"

# Pid file
# Where should spamd write its PID to file? If you use the -u or
# --username option above, this needs to be writable by that user.
# Otherwise, the init script will not be able to shut spamd down.
PIDFILE="/var/run/spamd.pid"

# Set nice level of spamd
#NICE="--nicelevel 15"


Damit die user-prefs für vpopmail-accounts richtig geschrieben werden kann, muss spamd gepatched werden, da es sonst nicht geht (http://issues.apache.org/SpamAssassin/show_bug.cgi?id=4714): von http://issues.apache.org/SpamAssassin/attachment.cgi?id=3377

1750,1757c1750,1774
<     $dir = `$vpopdir/bin/vuserinfo -d $username`;
<     if ($? != 0) {
<       #
<       # If vuserinfo failed $username could be an alias
<       #
<       $dir = `$vpopdir/bin/valias $username`;
<       if ($? == 0 && $dir !~ /.+ -> &/) {
<         $dir =~ s,.+ -> (/.+)/Maildir/,$1,;
---
>     my $itterations = 0;
>     my $max_itterations = 20;
>     my $found = 0;
>     while(!$found && ( $itterations < $max_itterations ) ) {
>       $itterations++;
>       $dir = `$vpopdir/bin/vuserinfo -d $username`;
>       if ($? != 0) {
>         #
>         # If vuserinfo failed $username could be an alias
>         #
>         $dir = `$vpopdir/bin/valias $username`;
>         chomp($dir);
>         if ($? == 0) {
>           $dir =~ s,.+ -> (.+),$1,;
>           $username = "$1";
> 	  $username =~ s,&,,g;
> 
>           if ("$dir" =~ /\|/) {
>             $dir = $vpopdir;
>             $found = 1;
>           }
>         } elsif("$dir" eq "" || "$dir" eq "invalid domain, not in qmail assign file") {
>           $dir = $vpopdir;
>           $found = 1;
>         }
1759c1776
<         undef($dir);
---
>         $found = 1;
1760a1778
>       chomp($username);

Ham/Spam-Fütterung via Email

http://www.huschi.net/11_148_de.html

Jeder User hat seine eigene Bayes-Datenbank (sa-learn -u $sender --dbpath $userdir)

#/etc/logrotate.d/sa-learn
/var/log/sa-learn.log {
        weekly
        delaycompress
        rotate 4
        compress
        create 640 vpopmail vchkpw
}


#!/usr/bin/perl -w

use strict;
use MIME::Tools;
use MIME::Parser;

my $DEBUG = 0;
my $LOGFILE = '/var/log/sa-learn.log';
my $UNPACK_DIR = '/var/spool/unpack';
my $SA_LEARN = '/usr/bin/sa-learn';
my @DOMAINS = qw/gtmp.org winnink.org/;

my ($spamham, $sender) = @ARGV;
my $sender2 = $ENV{SENDER};


my $domain = (split(/@/, $sender2))[1];
my $username = (split(/@/, $sender2))[0];

my $domdir = `/var/vpopmail/bin/vdominfo -d $domain`;
chomp($domdir);

my $userdir = "$domdir"."/$username"."/.spamassassin/";

sub recurs
{
        my $ent = shift;

        if ($ent->head->mime_type eq 'message/rfc822') {
                if ($DEBUG) {
                        #unlink "/tmp/spam.log.$$" if -e "/tmp/spam.log.$$";
                        open(OUT, "|$SA_LEARN -D -u $sender2 --dbpath $userdir $spamham --single >>$LOGFILE 2>&1") or die "Cannot pipe $SA_LEARN: $!";
                } else {
                        open(OUT, "|$SA_LEARN -u $sender2 --dbpath $userdir $spamham  --single") or die "Cannot pipe $SA_LEARN: $!";
                }

                $ent->bodyhandle->print(\*OUT);

                close(OUT);
                return;
        }

        my @parts = $ent->parts;

        if (@parts) {
                map { recurs($_) } @parts;
        }
}

#my ($domain) = $sender =~ /\@(.*)$/;
#unless (grep { $_ eq $domain } @DOMAINS) {
#       die "I don't recognize your domain&nbsp;!";
#}

if ($DEBUG) {
        MIME::Tools->debugging(1);
        open(STDERR, '>>/tmp/spam_err.log');
}
my $parser = new MIME::Parser;
$parser->extract_nested_messages(0);
$parser->output_under($UNPACK_DIR);

my $entity;
eval {
        $entity = $parser->parse(\*STDIN);
};

if ($@) {
        die $@;
} else {
        recurs($entity);
}

$parser->filer->purge;
rmdir $parser->output_dir;


Aktivierung über .qmail, z.b. /var/vpopmail/domains/fffff.tld/learn-spam/.qmail

|/var/qmail/bin/preline /usr/local/bin/sa-wrapper.pl --spam

und im qmail-send-log finden sich die Ergebnisse dazu.

@4000000045e9d8db229274cc new msg 635402
@4000000045e9d8db22927c9c info msg 635402: bytes 6395 from <user@domain.de> qp 6987 uid 64011
@4000000045e9d8db2380c884 starting delivery 1179: msg 635402 to local domain.eu-learn-spam@domain.eu
@4000000045e9d8db2380d43c status: local 1/10 remote 0/20
@4000000045e9d8e10f48c5ec delivery 1179: success: Learned_tokens_from_0_message(s)_(1_message(s)_examined)/Learned_tokens_from_0_message(s)_(1_message(s)_examined)/did_0+0+1/
@4000000045e9d8e10f48d58c status: local 0/10 remote 0/20
@4000000045e9d8e10f48dd5c end msg 635402

Ham/Spam-Fütterung via IMAP-Ordner

als Vorlage diente http://www.pqpq.de/2006/01/28/sh-sa-learnsh/ bzw. http://katastrophos.net/andre/blog/2006/10/03/vpopmail-maildrop-sqwebmail-mailfilter-patch/

das Script sa-learn.sh wird jede Stunde aufgerufen und durchläuft jede User-Mailbox. Gibt es dort ein Ordner z.B. 'LearnSPAM' bzw. 'LearnHAM' mit E-Mail(s), so wird diese Mail 'trainiert' und danach gelöscht. Zusätzlich wird eine Info-Mail mit einer kurzen Anleitung erstellt.


#!/bin/sh                                
#conf                                    
DATE=`date`                              
FILE_LASTSCAN="lastscan:2,S"             
FILE_ATTENTION="attention:2,S"           
SA_LEARN="/usr/bin/sa-learn"             
base="/var/vpopmail/domains"             
spam="Maildir/.Spam.LearnHAM"            
ham="Maildir/.Spam.LearnSPAM"            
FROM="sadmin@maildom.tld"              
TO="sadmin@maildom.tdl"                

function create_notices() {

    cat > "$base/$domain/$user/$1/cur/$FILE_ATTENTION" << EOF
Return-Path: <$FROM>                                         
Received: (qmail 4181 invoked by uid 89); 9 Apr 2006 13:26:51 -0000
Message-ID: <20040409132651.4180.qmail@maildom.tdl>              
From: "Postmaster" <$FROM>                                         
To: "Reporter" <$TO>                                               
Subject: Wichtige Information ueber diesen Ordner ($DATE)          
Date: Fri, 09 Apr 2007 15:26:51 +0200                              
Mime-Version: 1.0                                                  
Content-Type: text/plain; format=flowed; charset="utf-8"           
Content-Transfer-Encoding: 7bit                                    

Letzter Scan: $DATE

Lieber Benutzer,

die beiden Ordner LearnSPAM bzw. LearnHAM dienen der Verbesserung des SPAM-Filter.

E-Mails, die noch nicht als SPAM erkannt wurden, kommen in den Ordner LearnSPAM.
Versehentlich eingestufte E-Mail kommen in LearnHAM.                            

Beide Ordner werden stündlich durchsucht. Bei vorhandenen Emails wird der SPAM-Filter aufgerufen, um die Qualität des Filters zu verbessern.

Bitte beachten: Danach werden die E-Mails geloescht!


Bitte E-Mails NICHT in diesen Ordner VERSCHIEBEN, SONDERN KOPIEREN!


viele Grüsse
Ihr Postmaster
+++++++++++++++++++++  english  ++++++++++++++++++++++++++++++

Last Scan: $DATE

Dear user,

eMails in this folder are reported to our Spam-database on a regular basis.                                              
Please be aware that all emails in this folder are deleted after they have been successfully scanned.
So in your own interest:

 Please DO NOT MOVE mistakenly classified emails to this folder, BUT COPY them here.

Kind regards,
Your Postmaster
EOF
}

function remove_notices() {
    #rm "$base/$domain/$user/$1/cur/$FILE_LASTSCAN" &> /dev/null
    rm "$base/$domain/$user/$1/cur/$FILE_ATTENTION" &> /dev/null
}

for domain in `ls $base`; do
  if [ -d "$base/$domain" ]; then
    #echo "$domain"
    for user in `ls $base/$domain`; do
      if [ -d "$base/$domain/$user" ]; then
        username="$user@$domain"
        TO=$username
        #echo "  $user"
        # SpamReport directory found..
        if [ -d "$base/$domain/$user/$spam" ]; then
          if [ `ls $base/$domain/$user/$spam/cur/|wc -l` != 1 -o `ls $base/$domain/$user/$spam/new/|wc -l` != 0 ]; then
          remove_notices $spam
          #echo "    Spam directory found, running sa-learn for $username..."
          $SA_LEARN -u $username --dbpath $base/$domain/$user/ --sync --spam $base/$domain/$user/$spam/cur/*
          $SA_LEARN -u $username --dbpath $base/$domain/$user/ --sync --spam $base/$domain/$user/$spam/new/*
          rm $base/$domain/$user/$spam/cur/* >/dev/null 2>&1
          rm $base/$domain/$user/$spam/new/* >/dev/null 2>&1
          create_notices $spam
          fi
        fi
        # SpamFalse directory found..
        if [ -d "$base/$domain/$user/$ham" ]; then
          if [ `ls $base/$domain/$user/$ham/cur/|wc -l` != 1 -o `ls $base/$domain/$user/$ham/new/|wc -l` != 0 ]; then
          remove_notices $ham
          #echo "    Ham directory found, running sa-learn for $username..."
          $SA_LEARN -u $username --dbpath $base/$domain/$user/ --ham --sync $base/$domain/$user/$ham/cur/*
          $SA_LEARN -u $username --dbpath $base/$domain/$user/ --ham --sync $base/$domain/$user/$ham/new/*
          rm $base/$domain/$user/$ham/cur/* >/dev/null 2>&1
          rm $base/$domain/$user/$ham/new/* >/dev/null 2>&1
          create_notices $ham
          fi
        fi
      fi
    done
  fi
done


Spam-Report

Script for collect (count) all spam and generate a spamreport via email (from http://www.linux-magazin.de/heft_abo/ausgaben/2007/01/muellsortierung), Listing


  • spamreport.pl
#!/usr/bin/perl -w                                                                                                                                    
#                                                                                                                                                     
# Script for collect all spam and generate a spamreport                                                                                               
#                                                                                                                                                     
# Author: Matthias Jansen                                                                                                                             
# Version: 0.2, 2007/01/05                                                                                                                            

use strict;
use Mail::Sendmail;
use HTML::Entities;
use Fcntl ':flock'; # import LOCK_* constants

# get all possible spam
my @files = `/usr/bin/find /var/vpopmail/domains/ -mtime -1 -a -type f -a \\( -wholename '*Maildir/.Spam/cur/*' -o -wholename '*Maildir/.Spam/new/*' \\)
 -a -not -name '*T' -print`;                                                                                                                            

my %spams = ();
my $lastuser = '';
my $count = 0;    

# collect all the data
for (my $i=0;$i<@files;$i++) {

        my($domain, $user) = $files[$i] =~ /\/var\/vpopmail\/domains\/([^\/]+)\/([^\/]+)\//;

        my $currentuser = $user."@".$domain;

        if ($lastuser ne $currentuser) {
                my @temp = ();          
                $spams{$currentuser} = \@temp;
                $count = 0;                   
        }                                     

        $lastuser = $currentuser;
        if (open(SP,"<".$files[$i])) {
                # lock the mail before reading
                flock(SP,LOCK_EX);
                my $subject = '';
                my $from = '';

                while(defined(my $line = <SP>) && (length($subject) == 0 || ($from eq ''))) {
                        if ($line =~ /^Subject:\s*(.*)$/i) { $subject = $1; }
                        elsif ($line =~ /^From:\s*(.*)$/i) { $from = $1; }
                }
                # encode HTML codes
                encode_entities($subject);
                encode_entities($from);

                my %spam = ('subject' => $subject, 'from' => $from);
                $spams{$currentuser}[$count] = \%spam;
                $count++;

                flock(SP,LOCK_UN);
                close(SP);
        }
}


# generate the spamreport now

open(TPL,"</root/bin/spamreport.tpl");
my @a_tpl = <TPL>;
close(TPL);
my $tpl = join("",@a_tpl);

while (my ($user,$data) = each(%spams)) {
        (my $text = $tpl) =~ s/###EMAIL###/$user/;
        my $spam_text = '';
        for (my $i=0;$i<scalar @$data;$i++) {
                $spam_text.= '<tr><td>'.@$data[$i]->{'from'}.'</td><td>'.@$data[$i]->{'subject'}.'</td></tr>';
        }
        $text =~ s/###SPAMTEXT###/$spam_text/;

        my %mail = ( To    => $user,
                From    => 'Spamreporter <spamreporter@domain.tld>',
                Message => $text,
                Server  => '127.0.0.1',
                Subject => 'Spamreport',
                'Content-Type' => 'text/html; charset="utf8"'
        );

        sendmail(%mail);
}



  • spamreport.tpl
<html>
<head>
</head>
<body>
<b>Spamreport von domain.tld für die Emailadresse ###EMAIL###</b>
<br><br>
Neuen Mails im Ordner Spam der letzten 24 Stunden<br><br>
<table border=1 cellspacing=0 cellpadding=5>
<tr><th>Absender</th><th>Betreff</th></tr>
###SPAMTEXT###
</table>
<p>Sie k&ouml;nnen sich den Inhalt des Spam-Ordners &uuml;ber das <a href="https://domain.tld/mail/">Webinterface</a> anzeigen lassen.</p>
</body>
</html>


Bilderkennung

Plugin für SpamAssassin: FuzzyOcr (als Erweiterung von OcrPlugin). http://www.huschi.net/11_207_de.html

Statistiken

http://www.rulesemporium.com/programs/sa-stats-1.0.txt
http://software.inl.fr/trac/trac.cgi/wiki/EdenMail/SpamStats?