File: //usr/share/perl5/vendor_perl/Amavis/SpamControl/ExtProg.pm
# SPDX-License-Identifier: GPL-2.0-or-later
package Amavis::SpamControl::ExtProg;
use strict;
use re 'taint';
use warnings;
use warnings FATAL => qw(utf8 void);
no warnings 'uninitialized';
# use warnings 'extra'; no warnings 'experimental::re_strict'; use re 'strict';
BEGIN {
require Exporter;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
$VERSION = '2.412';
@ISA = qw(Exporter);
}
use subs @EXPORT_OK;
use Errno qw(EIO EINTR EAGAIN ECONNRESET EBADF);
use IO::File qw(O_RDONLY O_WRONLY O_RDWR O_APPEND O_CREAT O_EXCL);
use Time::HiRes ();
use Amavis::Conf qw(:platform :confvars :sa c cr ca);
use Amavis::ProcControl qw(exit_status_str proc_status_ok
kill_proc run_command run_command_consumer);
use Amavis::rfc2821_2822_Tools qw(qquote_rfc2821_local);
use Amavis::Timing qw(section_time);
use Amavis::Util qw(ll do_log sanitize_str min max minmax
prolong_timer get_deadline);
sub new {
my($class, $scanner_name,$module,@args) = @_;
my($cmd,$cmdargs,%options) = @args;
return if !defined $cmd || $cmd eq '';
bless {
scanner_name => $scanner_name, command => $cmd, args => $cmdargs,
options => \%options,
}, $class;
}
sub check {
my($self,$msginfo) = @_;
$self->check_or_learn($msginfo,undef);
};
sub auto_learn {
my($self,$msginfo,$learn_as) = @_;
$self->check_or_learn($msginfo,$learn_as);
}
sub can_auto_learn {
my $self = $_[0];
my $opt = $self->{options};
$opt && defined $opt->{'learn_ham'} && defined $opt->{'learn_spam'};
}
# pass a mail message to an external (spam checking) program,
# extract interesting header fields from the result
#
sub check_or_learn {
my($self,$msginfo,$learn_as) = @_;
my $scanner_name = $self->{scanner_name};
my $cmd = $self->{command};
my $cmdargs; my $auto_learning;
if (!defined $learn_as) {
$cmdargs = $self->{args};
} elsif ($learn_as eq 'ham') {
$cmdargs = $self->{options}->{'learn_ham'}; $auto_learning = 1;
} elsif ($learn_as eq 'spam') {
$cmdargs = $self->{options}->{'learn_spam'}; $auto_learning = 1;
}
my $size_limit;
my $mbsl = $self->{options}->{'mail_body_size_limit'};
$mbsl = c('sa_mail_body_size_limit') if !defined $mbsl;
if (defined $mbsl) {
$size_limit = min(64*1024, $msginfo->orig_header_size) + 1 +
min($mbsl, $msginfo->orig_body_size);
# don't bother if slightly oversized, it's faster without size checks
undef $size_limit if $msginfo->msg_size < $size_limit + 5*1024;
}
my $prefix = '';
# fake a local delivery agent by inserting a Return-Path
$prefix .= sprintf("Return-Path: %s\n", $msginfo->sender_smtp);
$prefix .= sprintf("X-Envelope-To: %s\n",
join(",\n ",qquote_rfc2821_local(@{$msginfo->recips})));
my $os_fp = $msginfo->client_os_fingerprint;
$prefix .= sprintf("X-Amavis-OS-Fingerprint: %s\n",
sanitize_str($os_fp)) if defined($os_fp) && $os_fp ne '';
my(@av_tests);
my $per_recip_data = $msginfo->per_recip_data;
$per_recip_data = [] if !$per_recip_data;
for my $r (@$per_recip_data) {
my $spam_tests = $r->spam_tests;
push(@av_tests, grep(/^AV\..+=/,
split(/,/, join(',',map($$_,@$spam_tests))))) if $spam_tests;
}
$prefix .= sprintf("X-Amavis-AV-Status: %s\n",
sanitize_str(join(',',@av_tests))) if @av_tests;
$prefix .= sprintf("X-Amavis-PolicyBank: %s\n", c('policy_bank_path'));
$prefix .= sprintf("X-Amavis-MessageSize: %d%s\n", $msginfo->msg_size,
!defined $size_limit ? '' : ", TRUNCATED to $size_limit");
my $resp_stdout_fh = IO::File->new; # parent reading side of the pipe
my $child_stdout_fh = IO::File->new; # child stdout writing side of a pipe
my $resp_stderr_fh = IO::File->new; # parent reading side of the pipe
my $child_stderr_fh = IO::File->new; # child stderr writing side of a pipe
pipe($resp_stdout_fh, $child_stdout_fh)
or die "$scanner_name: Can't create pipe1: $!";
pipe($resp_stderr_fh, $child_stderr_fh)
or die "$scanner_name: Can't create pipe2: $!";
binmode($resp_stdout_fh) or die "Can't set pipe1 to binmode: $!";
binmode($resp_stderr_fh) or die "Can't set pipe2 to binmode: $!";
my($proc_fh,$pid) = run_command_consumer('&='.fileno($child_stdout_fh),
'&='.fileno($child_stderr_fh),
$cmd, @$cmdargs);
$child_stdout_fh->close
or die "Parent failed to close child side of the pipe1: $!";
$child_stderr_fh->close
or die "Parent failed to close child side of the pipe2: $!";
undef $child_stdout_fh; undef $child_stderr_fh;
my($remaining_time, $deadline) = get_deadline($scanner_name.'_scan', 0.8, 5);
alarm(0); # stop the timer
my $proc_fd = fileno($proc_fh);
my $resp_stdout_fd = fileno($resp_stdout_fh);
my $resp_stderr_fd = fileno($resp_stderr_fh);
my $response = ''; my $response_stderr = ''; my $response_chopped = 0;
my $child_stat; my $bytes_sent = 0; my $err_on_child = 0;
my $msg = $msginfo->mail_text;
my $msg_str_ref = $msginfo->mail_text_str; # have an in-memory copy?
$msg = $msg_str_ref if ref $msg_str_ref;
eval {
if (!defined $msg) {
# empty mail
} elsif (ref $msg ne 'SCALAR' && $msg->isa('MIME::Entity')) {
# $msg->print_body($proc_fh); # flushing the pipe?
die "$scanner_name: reading from MIME::Entity is not implemented";
} else { # handles a message in-memory or on a file
my $file_position = $msginfo->skip_bytes;
if (ref $msg ne 'SCALAR') {
$msg->seek($file_position, 0) or die "Can't rewind mail file: $!";
}
my $data_source = $prefix;
my $eof_on_response = 0;
my $eof_on_msg = 0; my $force_eof_on_msg = 0;
my($rout,$wout,$eout,$rin,$win,$ein); $rin=$win=$ein='';
vec($rin,$resp_stdout_fd,1) = 1;
vec($rin,$resp_stderr_fd,1) = 1;
for (;;) {
vec($win,$proc_fd,1) = 0;
vec($win,$proc_fd,1) = 1 if defined $proc_fh &&
(!$eof_on_msg || $data_source ne '');
$ein = $rin | $win;
my $timeout = max(3, $deadline - Time::HiRes::time);
my($nfound,$timeleft) =
select($rout=$rin, $wout=$win, $eout=$ein, $timeout);
defined $nfound && $nfound >= 0
or die "$scanner_name: select failed: $!";
if (vec($rout,$resp_stderr_fd,1)) {
my $inbuf = ''; $! = 0;
my $nread = sysread($resp_stderr_fh, $inbuf, 16384);
if ($nread) { # successful read
ll(5) && do_log(5, 'rx stderr: %d %s [...]',
$nread, substr($inbuf,0,1000));
$response_stderr .= $inbuf if length($response_stderr) < 10000;
} elsif (defined $nread) { # defined but zero: EOF
# sysread returns 0 at eof
} elsif ($! == EAGAIN || $! == EINTR) {
Time::HiRes::sleep(0.1); # slow down, just in case
} else { # read error
do_log(0,"%s: error reading from pipe2: %s", $scanner_name,$!);
}
}
if (vec($rout,$resp_stdout_fd,1)) {
my $inbuf = ''; $! = 0;
my $nread = sysread($resp_stdout_fh, $inbuf, 16384);
if ($nread) { # successful read
ll(5) && do_log(5, 'rx: %d %s [...]',
$nread, substr($inbuf,0,30));
my $response_l = length($response);
if ($response_chopped || $response_l >= 65536) {
# ignore the rest of input
} else {
$response .= $inbuf;
my $j = $response_l <= 1 ? 0 : $response_l - 1;
# we only need a mail header from the returned text
$response_chopped = 1 if index($response,"\n\n",$j) >= 0;
}
} elsif (defined $nread) { # defined but zero: EOF
$eof_on_response = 1; # sysread returns 0 at eof
} elsif ($! == EAGAIN || $! == EINTR) {
Time::HiRes::sleep(0.1); # slow down, just in case
} else { # read error
$eof_on_response = 1;
die "$scanner_name: error reading from pipe1: $!";
}
}
if (vec($wout,$proc_fd,1)) { # subprocess is ready to receive more
if ($data_source eq '' && !$eof_on_msg) { # get more data
my $nread = 0;
if ($force_eof_on_msg) {
# pretend to already be at eof
} elsif (ref $msg ne 'SCALAR') { # message is on a file
$nread = $msg->read($data_source,32768);
} elsif ($file_position < length($$msg)) { # message in memory
# do it in chunks, saves memory, cache friendly
$data_source = substr($$msg,$file_position,32768);
$nread = length($data_source);
}
if (!$nread) {
$eof_on_msg = 1;
defined $nread or die "$scanner_name: error reading message: $!";
if (defined $proc_fh) { $proc_fh->close or $err_on_child=$! };
undef $proc_fh;
do_log(5,"tx: eof");
}
$file_position += $nread;
if (defined $size_limit) {
my $remaining_room = $size_limit - $bytes_sent;
$remaining_room = 0 if $remaining_room < 0;
if ($nread > $remaining_room) {
substr($data_source, $remaining_room) = '';
do_log(5,"tx: (size limit) %d -> %d", $nread,$remaining_room);
$force_eof_on_msg = 1;
}
}
}
if ($data_source ne '' && defined $proc_fh) {
ll(5) && do_log(5, "tx: %d %s [...]",
length($data_source), substr($data_source,0,30));
# syswrite does a write(2), no need to call $proc_fh->flush
my $nwrite = syswrite($proc_fh, $data_source);
if (!defined($nwrite)) {
if ($! == EAGAIN || $! == EINTR) {
Time::HiRes::sleep(0.1); # slow down, just in case
} else {
$data_source = ''; $eof_on_msg = 1; # simulate an eof
do_log(-1,"%s: error writing to pipe: %s", $scanner_name,$!);
$proc_fh->close or $err_on_child=$!; undef $proc_fh;
do_log(5,"tx: eof (wr err)");
}
} elsif ($nwrite > 0) { # successful write
$bytes_sent += $nwrite;
if ($nwrite < length($data_source)) {
substr($data_source,0,$nwrite) = '';
} else {
$data_source = '';
}
}
}
}
last if $eof_on_response;
if (Time::HiRes::time >= $deadline) {
die "$scanner_name: exceeded allowed time\n";
}
}
}
if (defined $proc_fh) { $proc_fh->close or $err_on_child=$! }
$child_stat = defined $pid && waitpid($pid,0) > 0 ? $? : undef;
undef $proc_fh; undef $pid;
1;
} or do {
my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
do_log(-1,"%s failed: %s", $scanner_name,$eval_stat);
kill_proc($pid,$scanner_name,1,$proc_fh,$eval_stat) if defined $pid;
undef $proc_fh; undef $pid;
};
prolong_timer($scanner_name); # restart timer
substr($response_stderr,2000) = '[...]' if length($response_stderr) > 2000;
if (proc_status_ok($child_stat,$err_on_child)) {
do_log(2, "%s stderr: %s",
$scanner_name,$response_stderr) if $response_stderr ne '';
} else {
do_log(-1,"%s stderr: %s",
$scanner_name,$response_stderr) if $response_stderr ne '';
die "$scanner_name: error running program $cmd: " .
exit_status_str($child_stat,$err_on_child) . "\n";
}
# keep just a header section in $response
if ($response eq '') {
# empty mail
} elsif (substr($response, 0,1) eq "\n") {
$response = ''; # empty header section
} else {
my $ind = index($response,"\n\n"); # find header/body separator
substr($response, $ind+1) = '' if $ind >= 0;
}
my $crm114_score;
if ($cmd =~ /\bcrm/ && $response =~ /^\s*([+-]?\d*(?:\.\d*)?)\s*$/) {
$crm114_score = $1;
$response = ''; # skip the header parsing loop below
}
my(@response_lines) = split(/^/m, $response, -1);
push(@response_lines, "\n", "\n"); # insure a trailing NL and a separator
undef $response;
my(%header_field, @header_field_name, $curr_head);
# scan mail header section retrieved from an external program on its stdout
for my $ln (@response_lines) { # guaranteed to contain header/body separator
if ($ln =~ /^[ \t]/) { # folded
$curr_head .= $ln;
} else { # a new header field, process previous if any
if (defined $curr_head) {
local($1,$2);
if ($curr_head =~ /^ ( (?: X-DSPAM | X-CRM114 | X-Bogosity) [^:]*? )
[ \t]* : [ \t]* (.*) $/xs) {
my($hn,$hb) = ($1,$2); my $hnlc = lc $hn;
push(@header_field_name, $hn) if !exists($header_field{$hnlc});
$header_field{$hnlc} = $hb; # keep last
}
}
$curr_head = $ln;
last if $ln eq "\n";
}
}
my($spam_score, $spam_tests);
my $score_factor = $self->{options}->{'score_factor'};
my $dspam_result = $header_field{lc('X-DSPAM-Result')};
if (defined $dspam_result) {
if ($dspam_result =~ /\b(signature|result|probability|confidence)=.*;/) {
# combined result, split
my(%attribute);
for my $attr (split(/;\s*/, $dspam_result)) {
local($1,$2);
my($n,$v) = ($attr =~ /^([^=]*)=(.*)\z/s) ? ($1,$2) : ('user',$attr);
$v =~ s/^"//; $v =~ s/"\z//; $attribute{$n} = $v;
}
# simulate separate header fields
@header_field_name = qw(X-DSPAM-Result X-DSPAM-Class X-DSPAM-Confidence
X-DSPAM-Probability X-DSPAM-Signature);
for my $hn (@header_field_name) {
my $hnlc = lc $hn; my $name = $hnlc; $name =~ s/^X-DSPAM-//i;
$header_field{$hnlc} = $attribute{$name};
}
}
$dspam_result = $header_field{lc('X-DSPAM-Result')};
my $dspam_signature = $header_field{lc('X-DSPAM-Signature')};
$dspam_result = '' if !defined $dspam_result;
$dspam_signature = '' if !defined $dspam_signature;
chomp($dspam_result); chomp($dspam_signature);
$dspam_signature = '' if $dspam_signature eq 'N/A';
if (!$auto_learning) {
$msginfo->supplementary_info('DSPAMRESULT', $dspam_result);
$msginfo->supplementary_info('DSPAMSIGNATURE', $dspam_signature);
$msginfo->supplementary_info('VERDICT-'.$scanner_name, $dspam_result);
$spam_score = $dspam_result eq 'Spam' ? 10 : -1; # fabricated
$score_factor = 1 if !defined $score_factor;
$spam_score *= $score_factor;
$spam_tests = sprintf("%s.%s=%.3f",
$scanner_name, $dspam_result, $spam_score);
do_log(2,"%s result: %s, score=%.3f, sig=%s",
$scanner_name, $dspam_result, $spam_score, $dspam_signature);
}
}
my $crm114_status = $header_field{lc('X-CRM114-Status')};
if (defined $crm114_score || defined $crm114_status) {
local($1,$2);
if (!defined $crm114_status) { # presumably using --stats_only
# fabricate a Status from score
$crm114_status = !defined $crm114_score ? 'unknown'
: $crm114_score <= -10 ? uc('spam')
: $crm114_score >= +10 ? 'GOOD' : 'UNSURE';
$header_field{lc('X-CRM114-Status')} =
sprintf("%s ( %s )", $crm114_status, $crm114_score);
@header_field_name = qw(X-CRM114-Status);
} elsif ($crm114_status =~ /^([A-Z]+)\s+\(\s+([-\d\.]+)\s+\)/) {
$crm114_status = $1; $crm114_score = $2;
}
my $crm114_cacheid = $header_field{lc('X-CRM114-CacheID')};
if (defined $crm114_cacheid && $crm114_cacheid =~ /^sfid-\s*$/i) {
delete $header_field{lc('X-CRM114-CacheID')}; $crm114_cacheid = undef;
}
s/[ \t\r\n]+\z// for ($crm114_status, $crm114_score, $crm114_cacheid);
$score_factor = -0.10 if !defined $score_factor;
$spam_score = $score_factor * $crm114_score;
$spam_tests = sprintf("%s.%s(%s)=%.3f",
$scanner_name, $crm114_status, $crm114_score, $spam_score);
if (!$auto_learning) {
$msginfo->supplementary_info('VERDICT-'.$scanner_name,
uc $crm114_status eq 'GOOD' ? 'Ham' : $crm114_status);
$msginfo->supplementary_info('CRM114STATUS',
sprintf("%s ( %s )", $crm114_status,$crm114_score));
$msginfo->supplementary_info('CRM114SCORE', $crm114_score);
$msginfo->supplementary_info('CRM114CACHEID', $crm114_cacheid);
do_log(2,"%s result: score=%s (%s), status=%s, cacheid=%s",
$scanner_name, $spam_score,
$crm114_score, $crm114_status, $crm114_cacheid);
}
}
my $bogo_line = $header_field{lc('X-Bogosity')};
my($bogo_status, $bogo_score, $bogo_tests);
if (defined $bogo_line) {
($bogo_status, $bogo_tests, $bogo_score) = split(/,\s*/,$bogo_line);
local($1);
$bogo_score =~ s/^spamicity=([0-9.+-]*).*\z/$1/s;
$spam_score = $bogo_status eq 'Spam' ? 5 : $bogo_status eq 'Ham' ? -5 : 0;
$score_factor = 1 if !defined $score_factor;
$spam_score = $score_factor * $spam_score;
# trim trailing fraction zeroes
$spam_score = 0 + sprintf("%.3f",$spam_score);
$spam_tests = sprintf("%s=%s", $scanner_name, $spam_score);
# $spam_tests = sprintf("%s(%s/%s)=%s",
# $scanner_name, $bogo_status, $bogo_score, $spam_score);
if (!$auto_learning) {
$msginfo->supplementary_info('VERDICT-'.$scanner_name, $bogo_status);
$msginfo->supplementary_info('BOGOSTATUS', sprintf("%s ( %s )",
$bogo_status, $bogo_score));
$msginfo->supplementary_info('BOGOSCORE', $bogo_score);
do_log(2,"%s result: score=%s (%s), status=%s",
$scanner_name, $spam_score, $bogo_score, $bogo_status);
}
}
if (!$auto_learning) {
my $hdr_edits = $msginfo->header_edits;
my $use_our_hdrs = cr('prefer_our_added_header_fields');
my $allowed_hdrs = cr('allowed_added_header_fields');
my $all_local = !grep(!$_->recip_is_local, @$per_recip_data);
for my $hn (@header_field_name) {
my $hnlc = lc $hn; my $hb = $header_field{$hnlc};
if (defined $hb) {
$hb =~ s/[ \t\r\n]+\z//; # trim trailing whitespace and eol
do_log(5,"%s: suppl attr: %s = '%s'", $scanner_name,$hn,$hb);
$msginfo->supplementary_info($hn,$hb);
# add header fields to passed mail for all recipients
if ($all_local && $allowed_hdrs && $allowed_hdrs->{$hnlc} &&
!($use_our_hdrs && $use_our_hdrs->{$hnlc})) {
$hdr_edits->add_header($hn,$hb,2);
}
}
}
if (defined $spam_score) {
$msginfo->supplementary_info('SCORE-'.$scanner_name, $spam_score);
for my $r (@$per_recip_data) {
$r->spam_level( ($r->spam_level || 0) + $spam_score );
if (!$r->spam_tests) {
$r->spam_tests([ \$spam_tests ]);
} else {
push(@{$r->spam_tests}, \$spam_tests);
}
}
}
}
section_time($scanner_name);
}
1;