File: //usr/share/perl5/vendor_perl/Amavis/SpamControl/SpamAssassin.pm
# SPDX-License-Identifier: GPL-2.0-or-later
package Amavis::SpamControl::SpamAssassin;
use strict;
use re 'taint';
use warnings;
use warnings FATAL => qw(utf8 void);
no warnings 'uninitialized';
# use warnings 'extra'; no warnings 'experimental::re_strict'; use re 'strict';
BEGIN {
require Exporter;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
$VERSION = '2.412';
@ISA = qw(Exporter);
}
use subs @EXPORT_OK;
use Errno qw(ENOENT EACCES EAGAIN EBADF);
use FileHandle;
use Mail::SpamAssassin;
use Amavis::Conf qw(:platform :confvars :sa $daemon_user c cr ca);
use Amavis::IO::FileHandle;
use Amavis::Lookup qw(lookup lookup2);
use Amavis::ProcControl qw(exit_status_str proc_status_ok
kill_proc run_command run_as_subprocess
collect_results collect_results_structured);
use Amavis::rfc2821_2822_Tools;
use Amavis::Timing qw(section_time get_rusage);
use Amavis::Util qw(ll do_log do_log_safe sanitize_str prolong_timer
add_entropy min max minmax get_deadline
safe_encode_utf8_inplace);
use vars qw(@sa_debug_fac);
sub getCommonSAModules {
my $self = $_[0];
my(@modules) = qw(
Mail::SpamAssassin::Locker
Mail::SpamAssassin::Locker::Flock
Mail::SpamAssassin::Locker::UnixNFSSafe
Mail::SpamAssassin::PersistentAddrList
Mail::SpamAssassin::DBBasedAddrList
Mail::SpamAssassin::AutoWhitelist
Mail::SpamAssassin::BayesStore
Mail::SpamAssassin::BayesStore::DBM
Mail::SpamAssassin::PerMsgLearner
Net::DNS::RR::SOA Net::DNS::RR::NS Net::DNS::RR::MX
Net::DNS::RR::A Net::DNS::RR::AAAA Net::DNS::RR::PTR
Net::DNS::RR::CNAME Net::DNS::RR::DNAME Net::DNS::RR::OPT
Net::DNS::RR::TXT Net::DNS::RR::SPF Net::DNS::RR::NAPTR
Net::DNS::RR::RP Net::DNS::RR::HINFO Net::DNS::RR::AFSDB
Net::CIDR::Lite
URI URI::Escape URI::Heuristic URI::QueryParam URI::Split URI::URL
URI::WithBase URI::_foreign URI::_generic URI::_ldap URI::_login
URI::_query URI::_segment URI::_server URI::_userpass
URI::_idna URI::_punycode URI::data URI::ftp
URI::gopher URI::http URI::https URI::ldap URI::ldapi URI::ldaps
URI::mailto URI::mms URI::news URI::nntp URI::pop URI::rlogin URI::rsync
URI::rtsp URI::rtspu URI::sip URI::sips URI::snews URI::ssh URI::telnet
URI::tn3270 URI::urn URI::urn::oid
URI::file URI::file::Base URI::file::Unix URI::file::Win32
);
# DBD::mysql
# DBI::Const::GetInfo::ANSI DBI::Const::GetInfo::ODBC DBI::Const::GetInfoType
# Mail::SpamAssassin::BayesStore::SQL
# Mail::SpamAssassin::SQLBasedAddrList
# ??? ArchiveIterator Reporter Getopt::Long Sys::Syslog lib
@modules;
}
sub getSA2Modules {
qw(Mail::SpamAssassin::UnixLocker Mail::SpamAssassin::BayesStoreDBM
);
# Mail::SpamAssassin::SpamCopURI
}
sub getSA31Modules {
qw( );
# Mail::SpamAssassin::BayesStore::MySQL
# Mail::SpamAssassin::BayesStore::PgSQL
}
sub getSA32Modules {
qw(Mail::SpamAssassin::Bayes Mail::SpamAssassin::Bayes::CombineChi
Mail::SpamAssassin::Locales Encode::Detect
);
# Mail::SpamAssassin::BayesStore::MySQL
# Mail::SpamAssassin::BayesStore::PgSQL
# /var/db/spamassassin/compiled/.../Mail/SpamAssassin/CompiledRegexps/body_0.pm
}
sub getSAPlugins {
my $self = $_[0];
my(@modules);
my $sa_version_num = $self->{version_num};
push(@modules, qw(Hashcash RelayCountry SPF URIDNSBL)) if $sa_version_num>=3;
push(@modules, qw(DKIM)) if $sa_version_num >= 3.001002;
if ($sa_version_num >= 3.001000) {
push(@modules, qw(
AWL AccessDB AntiVirus AutoLearnThreshold DCC MIMEHeader Pyzor Razor2
ReplaceTags TextCat URIDetail WhiteListSubject));
# 'DomainKeys' plugin fell out of fashion with SA 3.2.0, don't load it
# 'SpamCop' loads Net::SMTP and Net::Cmd, not needed otherwise
}
if ($sa_version_num >= 3.002000) {
push(@modules, qw(
BodyEval DNSEval HTMLEval HeaderEval MIMEEval RelayEval URIEval WLBLEval
ASN Bayes BodyRuleBaseExtractor Check HTTPSMismatch OneLineBodyRuleType
ImageInfo Rule2XSBody Shortcircuit VBounce));
}
if ($sa_version_num >= 3.004000) {
push(@modules, qw(AskDNS));
}
$_ = 'Mail::SpamAssassin::Plugin::'.$_ for @modules;
my(%mod_names) = map(($_,1), @modules);
# add supporting modules
push(@modules, qw(Razor2::Client::Agent))
if $mod_names{'Mail::SpamAssassin::Plugin::Razor2'};
# push(@modules, qw(IP::Country::Fast))
# if $mod_names{'Mail::SpamAssassin::Plugin::RelayCountry'};
push(@modules, qw(Mail::DKIM Mail::DKIM::Verifier Net::DNS::Resolver))
if $mod_names{'Mail::SpamAssassin::Plugin::DKIM'};
push(@modules, qw(Image::Info Image::Info::GIF Image::Info::JPEG
Image::Info::PNG Image::Info::BMP Image::Info::TIFF))
if $mod_names{'Mail::SpamAssassin::Plugin::ImageInfo'};
if ($mod_names{'Mail::SpamAssassin::Plugin::SPF'}) {
if ($sa_version_num < 3.002000) {
# only the old Mail::SPF::Query was supported
push(@modules, qw(Mail::SPF::Query));
} else {
# SA 3.2.0 supports both the newer Mail::SPF and the old Mail::SPF::Query
# but we won't be loading the Mail::SPF::Query
push(@modules, qw(
Mail::SPF Mail::SPF::Server Mail::SPF::Request
Mail::SPF::Mech Mail::SPF::Mech::A Mail::SPF::Mech::PTR
Mail::SPF::Mech::All Mail::SPF::Mech::Exists Mail::SPF::Mech::IP4
Mail::SPF::Mech::IP6 Mail::SPF::Mech::Include Mail::SPF::Mech::MX
Mail::SPF::Mod Mail::SPF::Mod::Exp Mail::SPF::Mod::Redirect
Mail::SPF::SenderIPAddrMech
Mail::SPF::v1::Record Mail::SPF::v2::Record
NetAddr::IP NetAddr::IP::Util
auto::NetAddr::IP::_compV6 auto::NetAddr::IP::short
auto::NetAddr::IP::InetBase::inet_any2n
auto::NetAddr::IP::InetBase::inet_n2ad
auto::NetAddr::IP::InetBase::inet_n2dx
auto::NetAddr::IP::InetBase::inet_ntoa
auto::NetAddr::IP::InetBase::ipv6_aton
auto::NetAddr::IP::InetBase::ipv6_ntoa
));
}
}
if ($mod_names{'Mail::SpamAssassin::Plugin::DomainKeys'} ||
$mod_names{'Mail::SpamAssassin::Plugin::DKIM'}) {
push(@modules, qw(
Crypt::OpenSSL::RSA
auto::Crypt::OpenSSL::RSA::new_public_key
auto::Crypt::OpenSSL::RSA::new_key_from_parameters
auto::Crypt::OpenSSL::RSA::get_key_parameters
auto::Crypt::OpenSSL::RSA::import_random_seed
Digest::SHA Error));
}
# HTML/HeadParser.pm
# do_log(5, "getSAPlugins %s: %s", $sa_version_num, join(', ',@modules));
@modules;
}
# invoked by a parent process before forking and chrooting
#
sub loadSpamAssassinModules {
my $self = $_[0];
my $sa_version_num = $self->{version_num};
my @modules; # modules to be loaded before chroot takes place
push(@modules, $self->getCommonSAModules);
if (!defined($sa_version_num)) {
die "loadSpamAssassinModules: unknown version of Mail::SpamAssassin";
} elsif ($sa_version_num < 3) {
push(@modules, $self->getSA2Modules);
} elsif ($sa_version_num >= 3.001 && $sa_version_num < 3.002) {
push(@modules, $self->getSA31Modules);
} elsif ($sa_version_num >= 3.002) {
push(@modules, $self->getSA32Modules);
}
push(@modules, $self->getSAPlugins);
my $missing;
$missing = Amavis::Boot::fetch_modules('PRE-COMPILE OPTIONAL MODULES', 0,
@modules) if @modules;
do_log(2, 'INFO: SA version: %s, %.6f, no optional modules: %s',
$self->{version}, $sa_version_num, join(' ',@$missing))
if ref $missing && @$missing;
}
# invoked by a parent process before forking but after chrooting
#
sub initializeSpamAssassinLogger {
my $self = $_[0];
local($1,$2,$3,$4,$5,$6); # just in case
if (!Mail::SpamAssassin::Logger->UNIVERSAL::can('add')) {
# old SA?
} elsif (!Mail::SpamAssassin::Logger::add(method => 'Amavislog',
debug => $sa_debug )) {
do_log(-1,"Mail::SpamAssassin::Logger::add failed");
} else { # successfully rigged SpamAssassin with our logger
Mail::SpamAssassin::Logger::remove('stderr'); # remove a default SA logger
if (defined $sa_debug && $sa_debug =~ /[A-Za-z_,-]/) {
# looks like a list of SA debug facilities
push(@sa_debug_fac, split(/[ \t]*,[ \t]*/, $sa_debug));
} else {
unshift(@sa_debug_fac, 'info', $sa_debug ? 'all' : () );
}
}
}
# invoked by a parent process before forking but after chrooting
#
sub new_SpamAssassin_instance {
my($self,$running_as_parent) = @_;
# pick next available number as an instance name
my $sa_instance_name = sprintf('%s', scalar @{$self->{instances}});
do_log(1, "initializing Mail::SpamAssassin (%s)", $sa_instance_name);
my $sa_version_num = $self->{version_num};
my(@new_sa_debug_fac);
for my $fac (@sa_debug_fac) { # handle duplicates and negation: foo,nofoo,x,x
my $bfac = $fac; $bfac =~ s/^none\z/noall/i; $bfac =~ s/^no(?=.)//si;
@new_sa_debug_fac = grep(!/^(no)?\Q$bfac\E\z/si, @new_sa_debug_fac);
push(@new_sa_debug_fac, $fac);
}
do_log(2,"SpamAssassin debug facilities: %s", join(',',@sa_debug_fac));
my $sa_args = {
debug => !@sa_debug_fac ? undef : \@sa_debug_fac,
save_pattern_hits => grep(lc($_) eq 'all', @sa_debug_fac) ? 1 : 0,
dont_copy_prefs => 1,
require_rules => 1,
stop_at_threshold => 0,
need_tags => 'TIMING,LANGUAGES,RELAYCOUNTRY,ASN,ASNCIDR',
local_tests_only => $sa_local_tests_only,
home_dir_for_helpers => $helpers_home,
rules_filename => $sa_configpath,
site_rules_filename => $sa_siteconfigpath,
userprefs_filename => $sa_userprefs_file,
skip_prng_reseeding => 1, # we'll do it ourselves (SA 3.4.0)
# PREFIX => '/usr/local',
# DEF_RULES_DIR => '/usr/local/share/spamassassin',
# LOCAL_RULES_DIR => '/etc/mail/spamassassin',
# LOCAL_STATE_DIR => '/var/lib/spamassassin',
#see Mail::SpamAssassin man page for other options
};
if ($sa_version_num < 3.001005 && !defined $sa_args->{LOCAL_STATE_DIR})
{ $sa_args->{LOCAL_STATE_DIR} = '/var/lib' } # don't ignore sa-update rules
local($1,$2,$3,$4,$5,$6); # avoid Perl bug, $1 gets tainted in compile_now
my $spamassassin_obj = Mail::SpamAssassin->new($sa_args);
# $Mail::SpamAssassin::DEBUG->{rbl}=-3;
# $Mail::SpamAssassin::DEBUG->{rulesrun}=4+64;
if ($running_as_parent) {
# load SA config files and rules, try to preload most modules
$spamassassin_obj->compile_now;
$spamassassin_obj->call_plugins("prefork_init"); # since SA 3.4.0
}
if (ll(2) && !@{$self->{instances}}) {
# created the first/main/only SA instance
if ($spamassassin_obj->UNIVERSAL::can('get_loaded_plugins_list')) {
my(@plugins) = $spamassassin_obj->get_loaded_plugins_list;
do_log(2, "SpamAssassin loaded plugins: %s", join(', ', sort
map { my $n = ref $_; $n =~ s/^Mail::SpamAssassin::Plugin:://; $n }
@plugins));
# printf STDOUT ("%s\n", join(", ",@plugins));
# not in use: AccessDB AntiVirus TextCat; ASN BodyRuleBaseExtractor
# OneLineBodyRuleType Rule2XSBody Shortcircuit
}
}
# provide a default username
my $sa_uname = $spamassassin_obj->{username};
if (!defined $sa_uname || $sa_uname eq '')
{ $spamassassin_obj->{username} = $sa_uname = $daemon_user }
$self->{default_username} = $sa_uname if !defined $self->{default_username};
my $sa_instance = {
instance_name => $sa_instance_name,
spamassassin_obj => $spamassassin_obj,
loaded_user_name => $sa_uname, loaded_user_config => '',
conf_backup => undef, conf_backup_additional => {},
};
# remember some initial settings, like %msa_backup in spamd
for (qw(username user_dir userstate_dir learn_to_journal)) {
if (exists $spamassassin_obj->{$_}) {
$sa_instance->{conf_backup_additional}{$_} = $spamassassin_obj->{$_};
}
}
push(@{$self->{instances}}, $sa_instance);
alarm(0); # seems like SA forgets to clear alarm in some cases
umask($self->{saved_umask}); # restore our umask, SA clobbered it
section_time('SA new');
$sa_instance;
}
sub new {
my($class, $scanner_name,$module,@args) = @_;
my(%options) = @args;
my $self =
bless { scanner_name => $scanner_name, options => \%options }, $class;
$self->{initialized_stage} = 1;
$self->{saved_umask} = umask;
my $sa_version = Mail::SpamAssassin->Version;
local($1,$2,$3);
my $sa_version_num; # turn '3.1.8-pre1' into 3.001008
$sa_version_num = sprintf("%d.%03d%03d", $1,$2,$3)
if $sa_version =~ /^(\d+)\.(\d+)(?:\.(\d+))/; # ignore trailing non-digits
$self->{version} = $sa_version;
$self->{version_num} = $sa_version_num;
$self->{default_username} = undef;
$self->{instances} = [];
$self;
}
sub init_pre_chroot {
my $self = $_[0];
$self->{initialized_stage} == 1
or die "Wrong initialization sequence: " . $self->{initialized_stage};
$self->loadSpamAssassinModules;
$self->{initialized_stage} = 2;
}
sub init_pre_fork {
my $self = $_[0];
$self->{initialized_stage} == 2
or die "Wrong initialization sequence: " . $self->{initialized_stage};
$self->initializeSpamAssassinLogger;
$self->new_SpamAssassin_instance(1) for (1 .. max(1,$sa_num_instances));
$self->{initialized_stage} = 3;
}
sub init_child {
my $self = $_[0];
$self->{initialized_stage} == 3
or die "Wrong initialization sequence: " . $self->{initialized_stage};
for my $sa_instance (@{$self->{instances}}) {
my $spamassassin_obj = $sa_instance->{spamassassin_obj};
next if !$spamassassin_obj;
$spamassassin_obj->call_plugins("spamd_child_init");
umask($self->{saved_umask}); # restore our umask, SA may have clobbered it
}
$self->{initialized_stage} = 4;
}
sub rundown_child {
my $self = $_[0];
for my $sa_instance (@{$self->{instances}}) {
my $spamassassin_obj = $sa_instance->{spamassassin_obj};
next if !$spamassassin_obj;
do_log(3,'SA rundown_child (%s)', $sa_instance->{instance_name});
$spamassassin_obj->call_plugins("spamd_child_post_connection_close");
umask($self->{saved_umask}); # restore our umask, SA may have clobbered it
}
$self->{initialized_stage} = 5;
}
sub call_spamassassin($$$$) {
my($self,$msginfo,$lines,$size_limit) = @_;
my(@result); my($mail_obj,$per_msg_status);
my $which_section = 'SA prepare';
my $saved_pid = $$; my $sa_version_num = $self->{version_num};
my $msg = $msginfo->mail_text; # a file handle or a string ref
my $msg_str_ref = $msginfo->mail_text_str; # have an in-memory copy?
$msg = $msg_str_ref if ref $msg_str_ref;
# pass data to SpamAssassin as ARRAY or GLOB or STRING or STRING_REF
my $data_representation = ref($msg) eq 'SCALAR' ? 'STRING' : 'GLOB';
$data_representation = 'STRING_REF'
if $data_representation eq 'STRING' && $sa_version_num >= 3.004000;
my $data; # this will be passed to SpamAssassin's parser
local(*F);
if ($data_representation eq 'STRING' ||
$data_representation eq 'STRING_REF') {
$which_section = 'SA msg read';
$data = join('', @$lines); # a string to be passed to SpamAssassin
if (!defined $msg) {
# empty mail
} elsif (ref $msg eq 'SCALAR') {
$data .= $$msg;
} elsif ($msg->isa('MIME::Entity')) {
die "passing a MIME::Entity object to SpamAssassin is not implemented";
} else { # read message into memory, yuck
my $file_position = $msginfo->skip_bytes;
$msg->seek($file_position, 0) or die "Can't rewind mail file: $!";
my $nbytes;
while ( $nbytes=$msg->sysread($data, 32768, length $data) ) {
$file_position += $nbytes;
last if defined $size_limit && length($data) > $size_limit;
}
defined $nbytes or die "Error reading: $!";
}
if (defined $size_limit && length($data) > $size_limit) {
substr($data,$size_limit) = "[...]\n";
}
section_time($which_section);
} elsif ($data_representation eq 'ARRAY') {
# read message into memory, yuck - even worse: line-by-line
$which_section = 'SA msg read'; my $ln; my $len = 0;
if (defined $size_limit) { $len += length($_) for @$lines }
$msg->seek($msginfo->skip_bytes, 0) or die "Can't rewind mail file: $!";
for ($! = 0; defined($ln=<$msg>); $! = 0) { # header section
push(@$lines,$ln);
if (defined $size_limit)
{ $len += length($ln); last if $len > $size_limit }
last if $ln eq "\n";
}
defined $ln || $! == 0 or # returning EBADF at EOF is a perl bug
$! == EBADF ? do_log(0,"Error reading mail header section: %s", $!)
: die "Error reading mail header section: $!";
if (!defined $size_limit) {
for ($! = 0; defined($ln=<$msg>); $! = 0) { push(@$lines,$ln) } # body
} else {
for ($! = 0; defined($ln=<$msg>); $! = 0) { # body
push(@$lines,$ln);
$len += length($ln); last if $len > $size_limit;
}
}
defined $ln || $! == 0 or # returning EBADF at EOF is a perl bug
$! == EBADF ? do_log(1,"Error reading mail body: %s", $!)
: die "Error reading mail body: $!";
$data = $lines; # array of lines to be passed to SpamAssassin
section_time($which_section);
}
my($rusage_self_before, $rusage_children_before, @sa_cpu_usage);
my $eval_stat;
$which_section = 'SA prelim';
eval {
if ($data_representation eq 'GLOB') { # pass mail as a GLOB to SpamAssassin
ref($msg) ne 'SCALAR' # expects $msg to be a file handle
or die "panic: data_representation is GLOB, but message is in memory";
do_log(2,"truncating a message passed to SA at %d bytes, orig %d",
$size_limit, $msginfo->msg_size) if defined $size_limit;
# present a virtual file to SA, an original mail file prefixed by @$lines
tie(*F,'Amavis::IO::FileHandle');
open(F, $msg,$lines,$size_limit) or die "Can't open SA virtual file: $!";
binmode(F) or die "Can't set binmode on a SA virtual file: $!";
$data = \*F; # a GLOB to be passed to SpamAssassin
}
$which_section = 'SA userconf';
my $sa_default_username = $self->{default_username};
my $per_recip_data = $msginfo->per_recip_data;
$per_recip_data = [] if !$per_recip_data;
my $uconf_maps_ref = ca('sa_userconf_maps');
my $uname_maps_ref = ca('sa_username_maps');
$uconf_maps_ref = [] if !$uconf_maps_ref;
$uname_maps_ref = [] if !$uname_maps_ref;
my(%uconf_filename_available);
my(%sa_configs_hash); # collects distinct config names and usernames
my $uconf_unsupported = 0;
my $r_ind = 0;
for my $r (@$per_recip_data) {
my($uconf,$uname);
my $recip_addr = $r->recip_addr;
$uconf = lookup2(0, $recip_addr, $uconf_maps_ref) if @$uconf_maps_ref;
$uname = lookup2(0, $recip_addr, $uname_maps_ref) if @$uname_maps_ref;
$uconf = '' if !defined $uconf;
$uname = $sa_default_username if !defined $uname || $uname eq '';
if ($uconf =~ /^sql:/i) {
$uconf = $uname eq $sa_default_username ? '' : 'sql:'.$uname;
}
if ($uconf =~ /^ldap:/i) {
$uconf = $uname eq $sa_default_username ? '' : 'ldap:'.$uname;
}
if ($sa_version_num < 3.003000 && $uconf ne '') {
$uconf = ''; $uconf_unsupported = 1;
}
if ($uconf eq '') {
# ok, no special config required, just using a default
} elsif ($uconf =~ /^sql:/i) {
# assume data is in SQL, possibly an empty set
} elsif ($uconf =~ /^ldap:/i) {
# assume data is in LDAP, possibly an empty set
} else {
$uconf = "$MYHOME/$uconf" if $uconf !~ m{^/};
if ($uconf_filename_available{$uconf}) {
# good, already checked and is available, keep it
} elsif (defined $uconf_filename_available{$uconf}) {
# defined but false, already checked and failed, use a default config
$uconf = '';
} else {
# check for existence of a SA user configuration/preferences file
my(@stat_list) = stat($uconf); # symlinks-friendly
my $errn = @stat_list ? 0 : 0+$!;
my $msg = $errn == ENOENT ? "does not exist"
: $errn ? "is inaccessible: $!"
: -d _ ? "is a directory"
: !-f _ ? "is not a regular file"
: !-r _ ? "is not readable" : undef;
if (defined $msg) {
do_log(1,'SA user config file "%s" %s, ignoring it', $uconf,$msg);
$uconf_filename_available{$uconf} = 0; # defined but false
$uconf = ''; # ignoring it, use a default config
} else {
$uconf_filename_available{$uconf} = 1;
}
}
}
# collect lists of recipient indices for each unique config/user pair
# the %sa_configs_hash is a two-level hash: on $uconf and $uname
my $p = $sa_configs_hash{$uconf};
if (!$p) { $sa_configs_hash{$uconf} = $p = {} }
if (!exists $p->{$uname}) { $p->{$uname} = $r_ind }
else { $p->{$uname} .= ',' . $r_ind }
$r_ind++;
}
if ($uconf_unsupported) {
do_log(5,'SA user config loading unsupported for SA older than 3.3.0');
}
# refresh $sa_instance->{loaded_user_name}, just in case
for my $sa_instance (@{$self->{instances}}) {
my $spamassassin_obj = $sa_instance->{spamassassin_obj};
next if !$spamassassin_obj;
my $sa_uname = $spamassassin_obj->{username};
$sa_instance->{loaded_user_name} = defined $sa_uname ? $sa_uname : '';
}
my $sa_instance = $self->{instances}[0];
my $curr_conf = $sa_instance->{loaded_user_config};
my $curr_user = $sa_instance->{loaded_user_name};
# switching config files is the most expensive, sort to minimize switching
my(@conf_names); # a list of config names for which SA needs to be called;
# sorted: current first, baseline second, then the rest
push(@conf_names, $curr_conf) if exists $sa_configs_hash{$curr_conf};
push(@conf_names, '') if $curr_conf ne '' && exists $sa_configs_hash{''};
push(@conf_names,
grep($_ ne '' && $_ ne $curr_conf, keys %sa_configs_hash));
# call SA checking for each distinct SA userprefs config filename and user
for my $conf_user_pair (map { my $c = $_;
map([$c,$_], keys %{$sa_configs_hash{$c}})
} @conf_names) {
my($uconf,$uname) = @$conf_user_pair;
# comma-separated list of recip indices which use this SA config
my $rind_list = $sa_configs_hash{$uconf}{$uname};
if (ll(5)) {
do_log(5, "SA user config: \"%s\", username: \"%s\", %s, %s",
$uconf, $uname, $rind_list,
join(', ', map("($_)" . $per_recip_data->[$_]->recip_addr,
split(/,/,$rind_list))));
}
my $sa_instance;
if (@{$self->{instances}} <= 1) {
# pick the only choice
$sa_instance = $self->{instances}[0];
} else {
# choosing a suitably-matching SpamAssassin instance
my(@sa_instances_matching_uconf, @sa_instances_matching_both,
@sa_instances_available);
for my $sa_instance (@{$self->{instances}}) {
next if !$sa_instance->{spamassassin_obj};
push(@sa_instances_available, $sa_instance);
if ($sa_instance->{loaded_user_config} eq $uconf) {
push(@sa_instances_matching_uconf, $sa_instance);
if ($sa_instance->{loaded_user_name} eq $uname) {
push(@sa_instances_matching_both, $sa_instance);
}
}
}
my $fit_descr;
if (@sa_instances_matching_both) {
# just pick the first
$sa_instance = $sa_instances_matching_both[0];
$fit_descr = sprintf('exact fit, %d choices',
scalar @sa_instances_matching_both);
} elsif (@sa_instances_matching_uconf) {
# picking one at random
my $j = @sa_instances_matching_uconf <= 1 ? 0
: int(rand(scalar(@sa_instances_matching_uconf)));
$sa_instance = $sa_instances_available[$j];
$fit_descr = sprintf('good fit: same config, other user, %d choices',
scalar @sa_instances_matching_uconf);
} elsif ($uconf eq '') {
# the first instance is a good choice for switching to a dflt config
$sa_instance = $self->{instances}[0];
$fit_descr = 'need a default config, picking first';
} elsif (@sa_instances_available <= 1) {
$sa_instance = $sa_instances_available[0];
$fit_descr = 'different config, picking the only one available';
} elsif (@sa_instances_available == 2) {
$sa_instance = $sa_instances_available[1];
$fit_descr = 'different config, picking the second one';
} else {
# picking one at random, preferably not the first
my $j = 1+int(rand(scalar(@sa_instances_available)-1));
$sa_instance = $sa_instances_available[$j];
$fit_descr = 'different config, picking one at random';
}
do_log(2,'SA user config: instance chosen (%s), %s',
$sa_instance->{instance_name}, $fit_descr);
}
my $curr_conf = $sa_instance->{loaded_user_config};
my $curr_user = $sa_instance->{loaded_user_name};
my $spamassassin_obj = $sa_instance->{spamassassin_obj};
if ($curr_conf ne '' && $curr_conf ne $uconf) {
# revert SA configuration to its initial state
$which_section = 'revert_config';
ref $sa_instance->{conf_backup}
or die "panic, no conf_backup available";
for (qw(username user_dir userstate_dir learn_to_journal)) {
if (exists $sa_instance->{conf_backup_additional}{$_}) {
$spamassassin_obj->{$_} =
$sa_instance->{conf_backup_additional}{$_};
} else {
delete $spamassassin_obj->{$_};
}
}
# config leaks fixed in SpamAssassin 3.3.0, SA bug 6205, 6003, 4179
$spamassassin_obj->copy_config($sa_instance->{conf_backup}, undef)
or die "copy_config: failed to restore";
$sa_instance->{loaded_user_config} = $curr_conf = '';
do_log(5,"SA user config reverted to a saved copy");
section_time($which_section);
}
if ($uconf ne '' && $uconf ne $curr_conf) {
# load SA user configuration/preferences
if (!defined $sa_instance->{conf_backup}) {
$which_section = 'save_config';
do_log(5,"SA user config: saving SA user config");
$sa_instance->{conf_backup} = {};
$spamassassin_obj->copy_config(undef, $sa_instance->{conf_backup})
or die "copy_config: failed to save configuration";
section_time($which_section);
}
$which_section = 'load_config';
# User preferences include scoring options, scores, whitelists
# and blacklists, etc, but do not include rule definitions,
# privileged settings, etc. unless allow_user_rules is enabled;
# and they never include administrator settings
if ($uconf =~ /^sql:/) {
$uconf eq 'sql:'.$uname
or die "panic: loading SA config mismatch: $uname <-> $uconf";
do_log(5,"loading SA user config from SQL %s", $uname);
$spamassassin_obj->load_scoreonly_sql($uname);
} elsif ($uconf =~ /^ldap:/) {
$uconf eq 'ldap:'.$uname
or die "panic: loading SA config mismatch: $uname <-> $uconf";
do_log(5,"loading SA user config from LDAP %s", $uname);
$spamassassin_obj->load_scoreonly_ldap($uname);
} else {
do_log(5,"loading SA user config file %s", $uconf);
$spamassassin_obj->read_scoreonly_config($uconf);
}
$sa_instance->{loaded_user_config} = $curr_conf = $uconf;
section_time($which_section);
}
if ($uname ne $curr_user) {
$which_section = 'SA switch_user';
do_log(5,'SA user config: switching SA (%s) username "%s" -> "%s"',
$sa_instance->{instance_name}, $curr_user, $uname);
$spamassassin_obj->signal_user_changed({ username => $uname });
$sa_instance->{loaded_user_name} = $curr_user = $uname;
section_time($which_section);
}
ll(3) && do_log(3, "calling SA parse (%s), SA vers %s, %.6f, ".
"data as %s, recips_ind [%s]%s%s",
$sa_instance->{instance_name},
$self->{version}, $sa_version_num,
$data_representation, $rind_list,
($uconf eq '' ? '' : ", conf: \"$uconf\""),
($uname eq '' ? '' : ", user: \"$uname\"") );
if ($data_representation eq 'GLOB') {
seek(F,0,0) or die "Can't rewind a SA virtual file: $!";
}
$spamassassin_obj->timer_reset
if $spamassassin_obj->UNIVERSAL::can('timer_reset');
$which_section = 'SA parse';
my($remaining_time, $deadline) = get_deadline('SA check', 1, 5);
my(@mimepart_digests);
for (my(@traversal_stack) = $msginfo->parts_root;
my $part = pop @traversal_stack; ) { # pre-order tree traversal
my $digest = $part->digest;
push(@mimepart_digests, $digest) if defined $digest;
push(@traversal_stack, reverse @{$part->children}) if $part->children;
}
do_log(5,'mimepart digest: %s', $_) for @mimepart_digests;
my(%suppl_attrib) = (
'skip_prng_reseed' => 1, # do not call srand(), we already did it
'return_path' => $msginfo->sender_smtp,
'recipients' => [ map(qquote_rfc2821_local($_->recip_addr),
@$per_recip_data[split(/,/, $rind_list)]) ],
'originating' => $msginfo->originating ? 1 : 0,
'message_size' => $msginfo->msg_size,
'body_size' => $msginfo->orig_body_size,
!@mimepart_digests ? ()
: ('mimepart_digests' => \@mimepart_digests),
!c('enable_dkim_verification') ? ()
: ('dkim_signatures' => $msginfo->dkim_signatures_all),
!defined $deadline ? ()
: ('master_deadline' => $deadline),
'rule_hits' => [
# known attributes: rule, area, score, value, ruletype, tflags, descr
# { rule=>'AM:TEST1', score=>0.11 },
# { rule=>'TESTTEST', defscore=>0.22, descr=>'my test' },
!defined $size_limit ? () :
{ rule=>'__TRUNCATED', score=>-0.1, area=>'RAW: ', tflags=>'nice',
descr=>"Message size truncated to $size_limit B" },
],
'amavis_policy_bank_path' => c('policy_bank_path'),
);
($rusage_self_before, $rusage_children_before) = get_rusage();
$mail_obj = $sa_version_num < 3
? Mail::SpamAssassin::NoMailAudit->new(data=>$data, add_From_line=>0)
: $spamassassin_obj->parse(
$data_representation eq 'STRING_REF' ? \$data : $data,
0, \%suppl_attrib);
section_time($which_section);
$which_section = 'SA check';
if (@conf_names <= 1) {
do_log(4,"CALLING SA check (%s)", $sa_instance->{instance_name});
} else {
do_log(4,"CALLING SA check (%s) for recips: %s",
$sa_instance->{instance_name},
join(", ", @{$suppl_attrib{'recipients'}}));
}
{ local($1,$2,$3,$4,$5,$6); # avoid Perl 5.8.x bug, $1 gets tainted
$per_msg_status = $spamassassin_obj->check($mail_obj);
}
do_log(4,"DONE SA check (%s)", $sa_instance->{instance_name});
section_time($which_section);
$which_section = 'SA collect';
my($spam_level,$spam_report,$spam_summary,%supplementary_info);
{ local($1,$2,$3,$4,$5,$6); # avoid Perl 5.8.x taint bug
if ($sa_version_num < 3) {
$spam_level = $per_msg_status->get_hits;
$supplementary_info{'TESTSSCORES'} = $supplementary_info{'TESTS'} =
$per_msg_status->get_names_of_tests_hit;
} else {
$spam_level = $per_msg_status->get_score;
for my $t (qw(VERSION SUBVERSION RULESVERSION
TESTS TESTSSCORES ADDEDHEADERHAM ADDEDHEADERSPAM
AUTOLEARN AUTOLEARNSCORE SC SCRULE SCTYPE
LANGUAGES RELAYCOUNTRY ASN ASNCIDR DCCB DCCR DCCREP
DKIMDOMAIN DKIMIDENTITY AWLSIGNERMEAN
HAMMYTOKENS SPAMMYTOKENS SUBJPREFIX
CRM114STATUS CRM114SCORE CRM114CACHEID)) {
my $tag_value = $per_msg_status->get_tag($t);
if (defined $tag_value) {
# for some reason tags ASN and ASNCIDR have UTF8 flag on;
# encode any character strings to UTF-8 octets for consistency
safe_encode_utf8_inplace($tag_value); # to octets if not already
$supplementary_info{$t} = $tag_value;
}
}
}
{ # fudge
my $crm114_status = $supplementary_info{'CRM114STATUS'};
my $crm114_score = $supplementary_info{'CRM114SCORE'};
if (defined $crm114_status && defined $crm114_score) {
$supplementary_info{'CRM114STATUS'} =
sprintf("%s ( %s )", $crm114_status,$crm114_score);
}
}
# get_report() taints $1 and $2 !
$spam_summary = $per_msg_status->get_report;
# $spam_summary = $per_msg_status->get_tag('SUMMARY');
$spam_report = $per_msg_status->get_tag('REPORT');
safe_encode_utf8_inplace($spam_summary); # to octets (if not already)
safe_encode_utf8_inplace($spam_report); # to octets (if not already)
# fetch the TIMING tag last:
$supplementary_info{'TIMING'} = $per_msg_status->get_tag('TIMING');
$supplementary_info{'RUSAGE-SA'} = \@sa_cpu_usage; # filled-in later
}
# section_time($which_section); # don't bother reporting separately, short
$which_section = 'SA check finish';
if (defined $per_msg_status)
{ $per_msg_status->finish; undef $per_msg_status }
if (defined $mail_obj)
{ $mail_obj->finish if $sa_version_num >= 3; undef $mail_obj }
# section_time($which_section); # don't bother reporting separately, short
# returning the result as a data structure instead of modifying
# the $msginfo objects directly is used to make it possible to run
# this subroutine as a subprocess; modifications to $msginfo objects
# would be lost if done in a context of a spawned process
push(@result, {
recip_ind_list => $rind_list, user_config => $uconf,
spam_level => $spam_level,
spam_report => $spam_report, spam_summary => $spam_summary,
supplementary_info => \%supplementary_info,
});
}
1;
} or do { $eval_stat = $@ ne '' ? $@ : "errno=$!" };
$which_section = 'SA finish';
if (defined $per_msg_status) # just in case
{ $per_msg_status->finish; undef $per_msg_status }
if (defined $mail_obj) # just in case
{ $mail_obj->finish if $sa_version_num >= 3; undef $mail_obj }
if ($data_representation eq 'GLOB') {
close(F) or die "Can't close SA virtual file: $!";
untie(*F);
}
umask($self->{saved_umask}); # restore our umask, SA may have clobbered it
if ($$ != $saved_pid) {
do_log_safe(-2,"PANIC, SA checking produced a clone process ".
"of [%s], CLONE [%s] SELF-TERMINATING", $saved_pid,$$);
POSIX::_exit(3); # SIGQUIT, avoid END and destructor processing
# POSIX::_exit(6); # SIGABRT, avoid END and destructor processing
}
if ($rusage_self_before && $rusage_children_before) {
my($rusage_self_after, $rusage_children_after) = get_rusage();
@sa_cpu_usage = (
$rusage_self_after->{ru_utime} - $rusage_self_before->{ru_utime},
$rusage_self_after->{ru_stime} - $rusage_self_before->{ru_stime},
$rusage_children_after->{ru_utime} -
$rusage_children_before->{ru_utime},
$rusage_children_after->{ru_stime} -
$rusage_children_before->{ru_stime} );
}
# section_time($which_section);
if (defined $eval_stat) { chomp $eval_stat; die $eval_stat } # resignal
\@result;
}
sub check {
my($self,$msginfo) = @_;
$self->{initialized_stage} == 4
or die "Wrong initialization sequence: " . $self->{initialized_stage};
my $scanner_name = $self->{scanner_name};
my $which_section; my $prefix = '';
my($spam_level,$sa_tests,$spam_report,$spam_summary,$supplementary_info_ref);
my $hdr_edits = $msginfo->header_edits;
my $size_limit;
my $mbsl = $self->{options}->{'mail_body_size_limit'};
$mbsl = c('sa_mail_body_size_limit') if !defined $mbsl;
if (defined $mbsl) {
$size_limit = min(64*1024, $msginfo->orig_header_size) + 1 +
min($mbsl, $msginfo->orig_body_size);
# don't bother if slightly oversized, it's faster without size checks
undef $size_limit if $msginfo->msg_size < $size_limit + 5*1024;
}
# fake a local delivery agent by inserting a Return-Path
$prefix .= sprintf("Return-Path: %s\n", $msginfo->sender_smtp);
$prefix .= sprintf("X-Envelope-To: %s\n",
join(",\n ",qquote_rfc2821_local(@{$msginfo->recips})));
my $os_fp = $msginfo->client_os_fingerprint;
$prefix .= sprintf("X-Amavis-OS-Fingerprint: %s\n",
sanitize_str($os_fp)) if defined($os_fp) && $os_fp ne '';
my(@av_tests);
for my $r (@{$msginfo->per_recip_data}) {
my $spam_tests = $r->spam_tests;
push(@av_tests, grep(/^AV[.:].+=/,
split(/,/, join(',',map($$_,@$spam_tests))))) if $spam_tests;
}
$prefix .= sprintf("X-Amavis-AV-Status: %s\n",
sanitize_str(join(',',@av_tests))) if @av_tests;
$prefix .= sprintf("X-Amavis-PolicyBank: %s\n", c('policy_bank_path'));
$prefix .= sprintf("X-Amavis-MessageSize: %d%s\n", $msginfo->msg_size,
!defined $size_limit ? '' : ", TRUNCATED to $size_limit");
for my $hf_name (qw(
X-CRM114-Status X-CRM114-CacheID X-CRM114-Notice X-CRM114-Action
X-DSPAM-Result X-DSPAM-Class X-DSPAM-Signature X-DSPAM-Processed
X-DSPAM-Confidence X-DSPAM-Probability X-DSPAM-User X-DSPAM-Factors)) {
my $suppl_attr_val = $msginfo->supplementary_info($hf_name);
if (defined $suppl_attr_val && $suppl_attr_val ne '') {
chomp $suppl_attr_val;
$prefix .= sprintf("%s: %s\n", $hf_name, sanitize_str($suppl_attr_val));
}
}
$which_section = 'SA call';
my($proc_fh,$pid); my $eval_stat; my $results_aref;
eval {
# NOTE ON TIMEOUTS: SpamAssassin may use timer for its own purpose,
# disabling it before returning. It seems it only uses timer when
# external tests are enabled.
local $SIG{ALRM} = sub {
my $s = Carp::longmess("SA TIMED OUT, backtrace:");
# crop at some rather arbitrary limit
substr($s,900-3) = '[...]' if length($s) > 900;
do_log(-1,"%s",$s);
};
prolong_timer('spam_scan_sa_pre', 1, 4); # restart the timer
#
# note: array @lines at this point contains only prepended synthesized
# header fields, but may be extended in sub call_spamassassin() by
# reading-in the rest of the message; this may or may not happen in
# a separate process (called through run_as_subprocess or directly);
# each line must be terminated by a \n character, which must be the
# only \n in a line;
#
my(@lines) = split(/^/m, $prefix, -1); $prefix = undef;
if (!$sa_spawned) {
$results_aref = call_spamassassin($self,$msginfo,\@lines,$size_limit);
} else {
($proc_fh,$pid) = run_as_subprocess(\&call_spamassassin,
$self,$msginfo,\@lines,$size_limit);
my($results,$child_stat) =
collect_results_structured($proc_fh,$pid,'spawned SA',200*1024);
$results_aref = $results->[0] if defined $results;
}
1;
} or do { $eval_stat = $@ ne '' ? $@ : "errno=$!" };
section_time($which_section) if $sa_spawned;
$which_section = 'SA done';
prolong_timer('spam_scan_sa'); # restart the timer
if ($results_aref) {
# for each group of recipients using the same SA userconf file
for my $h (@$results_aref) {
my $rind_list = $h->{recip_ind_list};
my(@r_list) = @{$msginfo->per_recip_data}[split(/,/,$rind_list)];
my $uconf = $h->{user_config};
$spam_level = $h->{spam_level};
$spam_report = $h->{spam_report}; $spam_summary = $h->{spam_summary};
$supplementary_info_ref = $h->{supplementary_info};
$supplementary_info_ref = {} if !$supplementary_info_ref;
$sa_tests = $supplementary_info_ref->{'TESTSSCORES'};
add_entropy($spam_level,$sa_tests);
my $score_factor = $self->{options}->{'score_factor'};
if (defined $spam_level && defined $score_factor) {
$spam_level *= $score_factor;
}
do_log(3,"spam_scan: score=%s autolearn=%s tests=[%s] recips=%s",
$spam_level, $supplementary_info_ref->{'AUTOLEARN'},
$sa_tests, $rind_list);
my(%sa_tests_h);
if (defined $sa_tests && $sa_tests ne 'none') {
for my $t (split(/,[ \t]*/, $sa_tests)) {
my($test_name,$score) = split(/=/, $t, 2);
$sa_tests_h{$test_name} = $score;
}
}
my $dkim_adsp_suppress;
if (exists $sa_tests_h{'DKIM_ADSP_DISCARD'}) {
# must honour ADSP 'discardable', suppress a bounce
do_log(2,"spam_scan: dsn_suppress_reason DKIM_ADSP_DISCARD");
$dkim_adsp_suppress = 1;
}
$msginfo->supplementary_info('SCORE-'.$scanner_name, $spam_level);
$msginfo->supplementary_info('VERDICT-'.$scanner_name,
$spam_level >= 5 ? 'Spam' : $spam_level < 1 ? 'Ham' : 'Unknown');
for my $r (@r_list) {
$r->spam_level( ($r->spam_level || 0) + $spam_level );
$r->spam_report($spam_report); $r->spam_summary($spam_summary);
if (!$r->spam_tests) {
$r->spam_tests([ \$sa_tests ]);
} else {
# comes last: here we use push, unlike elsewhere where may do unshift
push(@{$r->spam_tests}, \$sa_tests);
}
if ($dkim_adsp_suppress) {
$r->dsn_suppress_reason('DKIM_ADSP_DISCARD' .
!defined $_ ? '' : ", $_") for $r->dsn_suppress_reason;
}
}
}
}
if (defined($msginfo->spam_report) || defined($msginfo->spam_summary)) {
$spam_report = $msginfo->spam_report . ', ' . $spam_report
if $msginfo->spam_report ne '';
$spam_summary = $msginfo->spam_summary . "\n\n" . $spam_summary
if $msginfo->spam_summary ne '';
}
$msginfo->spam_report($spam_report); $msginfo->spam_summary($spam_summary);
for (keys %$supplementary_info_ref) {
$msginfo->supplementary_info($_, $supplementary_info_ref->{$_});
}
if (defined $eval_stat) { # SA timed out?
kill_proc($pid,'a spawned SA',1,$proc_fh,$eval_stat) if defined $pid;
undef $proc_fh; undef $pid; chomp $eval_stat;
do_log(-2, "SA failed: %s", $eval_stat);
# die "$eval_stat\n" if $eval_stat !~ /timed out\b/;
}
1;
}
1;