|
|
|
#!/usr/bin/perl
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use Mail::IMAPClient;
|
|
|
|
use MIME::Parser;
|
|
|
|
use Getopt::Long;
|
|
|
|
use Data::Dumper;
|
|
|
|
|
|
|
|
# Hide args from process list
|
|
|
|
$0 = 'imap-sa-learn';
|
|
|
|
|
|
|
|
my ($all,$list) = undef;
|
|
|
|
my $server = $ENV{IMAP_SA_LEARN_SERVER};
|
|
|
|
my $port = $ENV{IMAP_SA_LEARN_PORT};
|
|
|
|
my $user = $ENV{IMAP_SA_LEARN_USER};
|
|
|
|
my $password = $ENV{IMAP_SA_LEARN_PASSWORD};
|
|
|
|
my $spamdir = $ENV{IMAP_SA_LEARN_SPAMDIR} || 'Spam';
|
|
|
|
my $hamdir = $ENV{IMAP_SA_LEARN_HAMDIR} || 'Ham';
|
|
|
|
my $security = $ENV{IMAP_SA_LEARN_SECURITY} || 'tls';
|
|
|
|
my $attachment = $ENV{IMAP_SA_LEARN_ATTACHMENT} || 0;
|
|
|
|
my $debug = $ENV{IMAP_SA_LEARN_DEBUG} || 0;
|
|
|
|
my $need_sync = 0;
|
|
|
|
|
|
|
|
GetOptions(
|
|
|
|
'server=s' => \$server,
|
|
|
|
'security=s' => \$security,
|
|
|
|
'port=i' => \$port,
|
|
|
|
'user=s' => \$user,
|
|
|
|
'password=s' => \$password,
|
|
|
|
'spamdir=s' => \$spamdir,
|
|
|
|
'hamdir=s' => \$hamdir,
|
|
|
|
'attachment' => \$attachment,
|
|
|
|
'all' => \$all,
|
|
|
|
'list' => \$list,
|
|
|
|
'debug+' => \$debug
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
# Set ssl and starttls based on security value
|
|
|
|
my $ssl = (lc $security eq 'tls' or lc $security eq 'ssl') ? 1 : 0;
|
|
|
|
my $starttls = (lc $security eq 'starttls' or lc $security eq 'start_tls') ? 1 : 0;
|
|
|
|
|
|
|
|
# Unless port is explicitely given, set the default based on SSL being enabled
|
|
|
|
$port ||= ($ssl) ? '993' : '143';
|
|
|
|
|
|
|
|
# Check required args are given
|
|
|
|
if (not defined $server or not defined $user or not defined $password){
|
|
|
|
die "You must provide server, user and password";
|
|
|
|
}
|
|
|
|
|
|
|
|
# Check for mutual exclusive options
|
|
|
|
if ($ssl and $starttls){
|
|
|
|
die "ssl and starttls are mutually exclusive";
|
|
|
|
}
|
|
|
|
|
|
|
|
# Create the imap client
|
|
|
|
my $imap = Mail::IMAPClient->new(
|
|
|
|
Server => $server,
|
|
|
|
User => $user,
|
|
|
|
Password => $password,
|
|
|
|
Port => $port,
|
|
|
|
Ssl => $ssl,
|
|
|
|
Starttls => $starttls,
|
|
|
|
);
|
|
|
|
if ($@){
|
|
|
|
die "Can't connect to imap server: $@\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
# If --list is given, just print the list of folders available and exit
|
|
|
|
# This can be used to check how some special characters are encoded so
|
|
|
|
# you can just copy/past it later
|
|
|
|
if ($list){
|
|
|
|
print $_ . "\n" foreach (@{$imap->folders});
|
|
|
|
exit 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (defined $spamdir){
|
|
|
|
crawl_imap_dir($spamdir,'spam');
|
|
|
|
}
|
|
|
|
if (defined $hamdir){
|
|
|
|
crawl_imap_dir($hamdir,'ham');
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($need_sync){
|
|
|
|
qx(sa-learn --sync);
|
|
|
|
}
|
|
|
|
|
|
|
|
$imap->close;
|
|
|
|
exit 0;
|
|
|
|
|
|
|
|
sub crawl_imap_dir {
|
|
|
|
my $dir = shift;
|
|
|
|
my $type = shift;
|
|
|
|
$imap->select($dir);
|
|
|
|
debug("Crawling in folder $dir\n");
|
|
|
|
|
|
|
|
# Default is to only process unseen emails
|
|
|
|
my @list = $imap->unseen;
|
|
|
|
# Unless --all is given, in which case we process also seen ones
|
|
|
|
push @list, $imap->seen if ($all);
|
|
|
|
|
|
|
|
foreach my $id (@list){
|
|
|
|
debug("Found mail id $id", 2);
|
|
|
|
my $raw_mail = $imap->message_string($id);
|
|
|
|
|
|
|
|
# When --attachment is passed, we expect spam/ham to be attached as
|
|
|
|
# message/rfc822. Extract the first matching attachment and feed it to sa-learn
|
|
|
|
# Note that there's no support for nested attachment, but it shouldn't be needed
|
|
|
|
if ($attachment) {
|
|
|
|
my $parser = MIME::Parser->new;
|
|
|
|
my $entity = $parser->parse_data($raw_mail);
|
|
|
|
my $inner_found = 0;
|
|
|
|
foreach my $part ($entity->parts) {
|
|
|
|
if ($part->effective_type eq 'message/rfc822'){
|
|
|
|
# Remove the first 3 lines which presents the raw mail, but as an attachment
|
|
|
|
my @lines = split /\n/, $part->stringify;
|
|
|
|
splice @lines,0,3;
|
|
|
|
sa_learn(join("\n", @lines), $type);
|
|
|
|
$inner_found = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (not $inner_found){
|
|
|
|
debug("Couldn't find a message/rfc822 attachment. Are you sure --attachment is needed ?");
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
sa_learn($raw_mail, $type);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sub sa_learn {
|
|
|
|
my $data = shift;
|
|
|
|
my $type = shift;
|
|
|
|
open SALEARN, "| sa-learn --no-sync --$type";
|
|
|
|
print SALEARN $data;
|
|
|
|
close SALEARN;
|
|
|
|
$need_sync = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub debug {
|
|
|
|
my $msg = shift;
|
|
|
|
my $level = shift;
|
|
|
|
$level ||= 1;
|
|
|
|
if ($debug ge $level){
|
|
|
|
print "$msg\n";
|
|
|
|
}
|
|
|
|
}
|