File: //usr/share/perl5/vendor_perl/Amavis/Out/SQL/Log.pm
# SPDX-License-Identifier: GPL-2.0-or-later
package Amavis::Out::SQL::Log;
use strict;
use re 'taint';
use warnings;
use warnings FATAL => qw(utf8 void);
no warnings 'uninitialized';
# use warnings 'extra'; no warnings 'experimental::re_strict'; use re 'strict';
BEGIN {
require Exporter;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
$VERSION = '2.412';
@ISA = qw(Exporter);
import Amavis::Out::SQL::Connection ();
}
use DBI qw(:sql_types);
use Amavis::Conf qw(:platform :confvars c cr ca);
use Amavis::Lookup qw(lookup lookup2);
use Amavis::rfc2821_2822_Tools;
use Amavis::Util qw(ll do_log do_log_safe min max minmax add_entropy
untaint untaint_inplace format_time_interval
truncate_utf_8 orcpt_encode
idn_to_utf8 idn_to_ascii mail_addr_idn_to_ascii
safe_encode safe_encode_utf8 safe_decode_mime
snmp_count ccat_split ccat_maj);
sub new {
my($class,$conn_h) = @_; bless { conn_h=>$conn_h, incarnation=>0 }, $class;
}
sub DESTROY {
my $self = $_[0]; local($@,$!,$_);
do_log_safe(5,"Amavis::Out::SQL::Log DESTROY called");
}
# find an existing e-mail address record or insert one, returning its id;
# may return undef if 'sel_adr' or 'ins_adr' SQL clauses are not defined;
#
sub find_or_save_addr {
my($self,$addr,$partition_tag,$keep_localpart_case) = @_;
my $id; my $existed = 0; my($localpart,$domain);
my $naddr = untaint($addr);
if ($naddr ne '') { # normalize address (lowercase, 7-bit, max 255 ch...)
($localpart,$domain) = split_address($naddr);
$domain = idn_to_ascii($domain);
if (!$keep_localpart_case && !c('localpart_is_case_sensitive')) {
$localpart = lc $localpart;
}
local($1);
$domain = $1 if $domain=~/^\@?(.*?)\.*\z/s; # chop leading @ and tr. dots
$naddr = $localpart.'@'.$domain;
substr($naddr,255) = '' if length($naddr) > 255;
# avoid UTF-8 SQL trouble, legitimate RFC 5321 addresses only need 7 bits
$naddr =~ s/[^\040-\176]/?/gs if !$sql_allow_8bit_address;
# SQL character strings disallow zero octets, and also disallow any other
# octet values and sequences of octet values that are invalid according to
# the database's selected character set encoding
}
my $conn_h = $self->{conn_h}; my $sql_cl_r = cr('sql_clause');
my $sel_adr = $sql_cl_r->{'sel_adr'};
my $ins_adr = $sql_cl_r->{'ins_adr'};
if (!defined($sel_adr) || $sel_adr eq '') {
# no way to query a database, behave as if no record was found
do_log(5,"find_or_save_addr: sel_adr query disabled, %s", $naddr);
} else {
$conn_h->begin_work_nontransaction; #(re)connect if necessary, autocommit
my $datatype = SQL_VARCHAR;
if ($sql_allow_8bit_address) {
my $driver = $conn_h->driver_name; # only available when connected
$datatype = $driver eq 'Pg' ? { pg_type => DBD::Pg::PG_BYTEA() }
: SQL_VARBINARY;
}
$conn_h->execute($sel_adr, $partition_tag, [$naddr,$datatype]);
my($a_ref,$a2_ref);
if (defined($a_ref=$conn_h->fetchrow_arrayref($sel_adr))) { # exists?
$id = $a_ref->[0]; $conn_h->finish($sel_adr);
$existed = 1;
} elsif (!defined($ins_adr) || $ins_adr eq '') {
# record does not exist, insertion is not allowed
do_log(5,"find_or_save_addr: ins_adr insertion disabled, %s", $naddr);
} else { # does not exist, attempt to insert a new e-mail address record
my $invdomain; # domain with reversed fields, chopped to 255 characters
$invdomain = join('.', reverse split(/\./,$domain,-1));
substr($invdomain,255) = '' if length($invdomain) > 255;
$conn_h->begin_work_nontransaction; # (re)connect if not connected
my $eval_stat;
eval { $conn_h->execute($ins_adr, $partition_tag,
[$naddr,$datatype], $invdomain); 1 }
or do { $eval_stat = $@ ne '' ? $@ : "errno=$!" };
# INSERT may have failed because of race condition with other processes;
# try the SELECT again, it will most likely succeed this time;
# SELECT after INSERT also avoids the need for a working last_insert_id()
$conn_h->begin_work_nontransaction; # (re)connect if not connected
# try select again, regardless of the success of INSERT
$conn_h->execute($sel_adr, $partition_tag, [$naddr,$datatype]);
if ( defined($a2_ref=$conn_h->fetchrow_arrayref($sel_adr)) ) {
$id = $a2_ref->[0]; $conn_h->finish($sel_adr);
add_entropy($id);
if (!defined($eval_stat)) { # status of the INSERT
do_log(5,"find_or_save_addr: record inserted, id=%s, %s",
$id,$naddr);
} else {
$existed = 1; chomp $eval_stat;
do_log(5,"find_or_save_addr: found on a second attempt, ".
"id=%s, %s, (first attempt: %s)", $id,$naddr,$eval_stat);
die $eval_stat if $eval_stat =~ /^timed out\b/; # resignal timeout
}
} else { # still does not exist
$id = $existed = undef;
if (defined $eval_stat) { # status of the INSERT
chomp $eval_stat;
die $eval_stat if $eval_stat =~ /^timed out\b/; # resignal timeout
};
die "find_or_save_addr: failed to insert addr $naddr: $eval_stat";
}
}
}
($id, $existed);
}
# find a penpals record which proves that a local user (sid) really sent a
# mail to a recipient (rid) some time ago. Returns an interval time in seconds
# since the last such mail was sent by our local user to a specified recipient
# (or undef if information is not available). If @$message_id_list is a
# nonempty list of Message-IDs as found in References header field, the query
# also finds previous outgoing messages with a matching Message-ID but
# possibly to recipients different from what the mail was originally sent to.
#
sub penpals_find {
my($self, $sid,$rid,$message_id_list, $msginfo) = @_;
my($a_ref,$found,$age,$send_time,$ref_mail_id,$ref_subj,$ref_mid,$ref_rid);
my $conn_h = $self->{conn_h}; my $sql_cl_r = cr('sql_clause');
my $sel_penpals = $sql_cl_r->{'sel_penpals'};
my $sel_penpals_msgid = $sql_cl_r->{'sel_penpals_msgid'};
$message_id_list = [] if !$message_id_list;
if (defined($sel_penpals_msgid) && @$message_id_list && defined($sid)) {
# list of refs to Message-ID is nonempty, try reference or recipient match
my $n = scalar(@$message_id_list); # number of keys
my(@args) = ($sid,$rid); my(@pos_args); local($1);
my $sel_taint = substr($sel_penpals_msgid,0,0); # taintedness
$sel_penpals_msgid =~
s{ ( %m | \? ) } # substitute %m for keys and ? for next arg
{ push(@pos_args,
$1 eq '%m' ? (map { my $s=$_; $s=~s/[^\040-\176]/?/gs; $s }
@$message_id_list)
: shift @args),
$1 eq '%m' ? join(',', ('?') x $n) : '?' }xgse;
# keep original clause taintedness
$sel_penpals_msgid = untaint($sel_penpals_msgid) . $sel_taint;
untaint_inplace($_) for @pos_args; # untaint arguments
do_log(4, "penpals: query args: %s", join(', ',@pos_args));
do_log(4, "penpals: %s", $sel_penpals_msgid);
$conn_h->begin_work_nontransaction; # (re)connect if not connected
$conn_h->execute($sel_penpals_msgid,@pos_args);
snmp_count('PenPalsAttempts'); snmp_count('PenPalsAttemptsMid');
if (!defined($a_ref=$conn_h->fetchrow_arrayref($sel_penpals_msgid))) {
snmp_count('PenPalsMisses');
} else {
($send_time, $ref_mail_id, $ref_subj, $ref_mid, $ref_rid) = @$a_ref;
$found = 1; $conn_h->finish($sel_penpals_msgid);
my $rid_match = defined $ref_rid && defined $rid && $rid eq $ref_rid;
my $mid_match = grep($ref_mid eq $_, @$message_id_list);
my $t = $mid_match && $rid_match ? 'MidRid' :
# $mid_match && !defined($rid) ? 'MidNullRPath' :
$mid_match ? 'Mid' : $rid_match ? 'Rid' : 'none';
snmp_count('PenPalsHits'.$t); snmp_count('PenPalsHits');
ll(4) && do_log(4, "penpals: MATCH ON %s: %s",
$t, join(", ",@$a_ref));
}
}
if (!$found && defined($sel_penpals) && defined($rid) && defined($sid)) {
# list of Message-ID references not given, try matching on recipient only
$conn_h->begin_work_nontransaction; # (re)connect if not connected
$conn_h->execute($sel_penpals, untaint($sid), untaint($rid));
snmp_count('PenPalsAttempts'); snmp_count('PenPalsAttemptsRid');
if (!defined($a_ref=$conn_h->fetchrow_arrayref($sel_penpals))) { # exists?
snmp_count('PenPalsMisses');
} else {
($send_time, $ref_mail_id, $ref_subj) = @$a_ref;
$found = 1; $conn_h->finish($sel_penpals);
snmp_count('PenPalsHitsRid'); snmp_count('PenPalsHits');
ll(4) && do_log(4, "penpals: MATCH ON RID(%s): %s",
$rid, join(", ",@$a_ref));
}
}
if (!$found) {
ll(4) && do_log(4, "penpals: (sql) not found (%s,%s)%s", $sid,$rid,
!@$message_id_list ? '' : ' refs: '.join(", ",@$message_id_list));
} else {
$age = max(0, $msginfo->rx_time - $send_time);
ll(3) && do_log(3, "penpals: (sql) found (%s,%s) %s age %s (%.0f s)",
$sid, $rid, $ref_mail_id,
format_time_interval($age), $age);
}
($age, $ref_mail_id, $ref_subj);
}
sub save_info_preliminary {
my($self, $msginfo) = @_;
my $mail_id = $msginfo->mail_id;
defined $mail_id or die "save_info_preliminary: mail_id still undefined";
my $partition_tag = $msginfo->partition_tag;
my($sid,$existed,$sender_smtp); local($1);
$sender_smtp = $msginfo->sender_smtp; $sender_smtp =~ s/^<(.*)>\z/$1/s;
# find an existing e-mail address record for sender, or insert a new one
($sid,$existed) = $self->find_or_save_addr($sender_smtp,$partition_tag);
if (defined $sid) {
$msginfo->sender_maddr_id($sid);
# there is perhaps 30-50% chance the sender address is already in the db
snmp_count('SqlAddrSenderAttempts');
snmp_count($existed ? 'SqlAddrSenderHits' : 'SqlAddrSenderMisses');
do_log(4,"save_info_preliminary %s, sender id: %s, %s, %s",
$mail_id, $sid, $sender_smtp, $existed ? 'exists' : 'new' );
}
# find existing address records for recipients, or insert them
for my $r (@{$msginfo->per_recip_data}) {
my $addr_smtp = $r->recip_addr_smtp;
if (defined $addr_smtp) {
$addr_smtp =~ s/^<(.*)>\z/$1/s;
$addr_smtp = mail_addr_idn_to_ascii($addr_smtp);
}
my($rid, $o_rid, $existed);
if ($addr_smtp ne '') {
($rid,$existed) = $self->find_or_save_addr($addr_smtp,$partition_tag);
# there is perhaps 90-100% chance the recipient addr is already in the db
if (defined $rid) {
$r->recip_maddr_id($rid);
snmp_count('SqlAddrRecipAttempts');
snmp_count($existed ? 'SqlAddrRecipHits' : 'SqlAddrRecipMisses');
my($addr_type, $addr) = orcpt_encode($r->dsn_orcpt, 1);
ll(4) && do_log(4,"save_info_preliminary %s, recip id: %s, %s%s, %s",
$mail_id, $rid, $addr_smtp,
defined $addr ? " (ORCPT $addr_type;$addr)" : '',
$existed ? 'exists' : 'new');
}
}
}
my $conn_h = $self->{conn_h}; my $sql_cl_r = cr('sql_clause');
my $ins_msg = $sql_cl_r->{'ins_msg'};
if (!defined($ins_msg) || $ins_msg eq '') {
do_log(4,"save_info_preliminary: ins_msg undef, not saving");
} elsif (!defined($sid)) {
do_log(4,"save_info_preliminary: sid undef, not saving");
} else {
$conn_h->begin_work; # SQL transaction starts
eval {
# MySQL does not like a standard iso8601 delimiter 'T' or a timezone
# when data type of msgs.time_iso is TIMESTAMP (instead of a string)
my $time_iso = $timestamp_fmt_mysql && ($conn_h->driver_name eq 'mysql' || $conn_h->driver_name eq 'MariaDB')
? iso8601_utc_timestamp($msginfo->rx_time,1,'')
: iso8601_utc_timestamp($msginfo->rx_time);
# insert a placeholder msgs record with sender information
$conn_h->execute($ins_msg,
$partition_tag, $msginfo->mail_id, $msginfo->secret_id,
$msginfo->log_id, int($msginfo->rx_time), $time_iso,
untaint($sid), c('policy_bank_path'), untaint($msginfo->client_addr),
0+untaint($msginfo->msg_size),
untaint(substr(idn_to_utf8(c('myhostname')),0,255)));
$conn_h->commit; 1;
} or do {
my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
if ($conn_h->in_transaction) {
eval {
$conn_h->rollback;
do_log(1,"save_info_preliminary: rollback done"); 1;
} or do {
$@ = "errno=$!" if $@ eq ''; chomp $@;
do_log(1,"save_info_preliminary: rollback %s", $@);
die $@ if $@ =~ /^timed out\b/; # resignal timeout
};
}
do_log(-1, "WARN save_info_preliminary: %s", $eval_stat);
die $eval_stat if $eval_stat =~ /^timed out\b/; # resignal timeout
return 0;
};
}
1;
}
sub save_info_final {
my($self, $msginfo, $report_ref) = @_;
my $mail_id = $msginfo->mail_id;
defined $mail_id or die "save_info_final: mail_id still undefined";
my $dsn_sent = $msginfo->dsn_sent;
$dsn_sent = !$dsn_sent ? 'N' : $dsn_sent==1 ? 'Y' : $dsn_sent==2 ? 'q' : '?';
my $sid = $msginfo->sender_maddr_id;
my $conn_h = $self->{conn_h}; my($sql_cl_r) = cr('sql_clause');
my $ins_msg = $sql_cl_r->{'ins_msg'};
my $upd_msg = $sql_cl_r->{'upd_msg'};
my $ins_rcp = $sql_cl_r->{'ins_rcp'};
if ($ins_msg eq '' || $upd_msg eq '' || $ins_rcp eq '') {
# updates disabled
} elsif (!defined($sid)) {
# sender not in table maddr, msgs record was not inserted by preliminary
} else {
$conn_h->begin_work; # SQL transaction starts
eval {
my(%ccat_short_name) = ( # as written to a SQL record
CC_VIRUS,'V', CC_BANNED,'B', CC_UNCHECKED,'U',
CC_SPAM,'S', CC_SPAMMY,'Y', CC_BADH.",2",'M', CC_BADH,'H',
CC_OVERSIZED,'O', CC_MTA,'T', CC_CLEAN,'C', CC_CATCHALL,'?');
my($min_spam_level, $max_spam_level) =
minmax(map($_->spam_level, @{$msginfo->per_recip_data}));
# insert per-recipient records into table msgrcpt
my $r_seq_num = 0; # can serve as a component of a primary key
for my $r (@{$msginfo->per_recip_data}) {
$r_seq_num++;
my $rid = $r->recip_maddr_id;
next if !defined $rid; # e.g. always_bcc, or table 'maddr' is disabled
my $o_rid = $r->recip_maddr_id_orig; # may be undef
my $spam_level = $r->spam_level;
my($dest,$resp) = ($r->recip_destiny, $r->recip_smtp_response);
my $d = $resp=~/^4/ ? 'TEMPFAIL'
: ($dest==D_BOUNCE && $resp=~/^5/) ? 'BOUNCE'
: ($dest!=D_BOUNCE && $resp=~/^5/) ? 'REJECT'
: ($dest==D_PASS && ($resp=~/^2/ || !$r->recip_done)) ? 'PASS'
: ($dest==D_DISCARD) ? 'DISCARD' : '?';
my $r_content_type =
$r->setting_by_contents_category(\%ccat_short_name);
for ($r_content_type) { $_ = ' ' if !defined $_ || /^ *\z/ }
substr($resp,255) = '' if length($resp) > 255;
$resp =~ s/[^\040-\176]/?/gs; # just in case, only need 7 bit printbl
# avoid op '?:' on tainted operand in args list, see PR [perl #81028]
my $recip_local_yn = $r->recip_is_local ? 'Y' : 'N';
my $blacklisted_yn = $r->recip_blacklisted_sender ? 'Y' : 'N';
my $whitelisted_yn = $r->recip_whitelisted_sender ? 'Y' : 'N';
$conn_h->execute($ins_rcp,
$msginfo->partition_tag, $mail_id,
$sql_schema_version < 2.007000 ? untaint($rid)
: ($r_seq_num, untaint($rid), $recip_local_yn, $r_content_type),
substr($d,0,1), ' ',
$blacklisted_yn, $whitelisted_yn, 0+untaint($spam_level),
untaint($resp),
);
# untaint(defined $o_rid ? $o_rid : $rid),
# int($msginfo->rx_time),
# untaint($r->user_policy_id),
}
my $q_to = $msginfo->quarantined_to; # ref to a list of quar. locations
if (!defined($q_to) || !@$q_to) { $q_to = undef }
else {
$q_to = $q_to->[0]; # keep only the first quarantine location
$q_to =~ s{^\Q$QUARANTINEDIR\E/}{}; # strip directory name
}
my $m_id = $msginfo->get_header_field_body('message-id');
$m_id = join(' ',parse_message_id($m_id)) if $m_id ne ''; # strip CFWS
my $subj = $msginfo->get_header_field_body('subject');
my $from = $msginfo->get_header_field_body('from'); # raw full field
my $rfc2822_from = $msginfo->rfc2822_from; # undef, scalar or listref
my $rfc2822_sender = $msginfo->rfc2822_sender; # undef or scalar
$rfc2822_from = join(', ',@$rfc2822_from) if ref $rfc2822_from;
my $os_fp = $msginfo->client_os_fingerprint;
$_ = !defined($_) ? '' :untaint($_) for ($subj,$from,$m_id,$q_to,$os_fp);
for ($subj,$from) { # character set decoding, sanitation
chomp; s/\n(?=[ \t])//gs; s/^[ \t]+//s; s/[ \t]+\z//s; # unfold, trim
eval { # decode mime and truncate to 255 bytes
my $chars = safe_decode_mime($_); # to logical characters
substr($chars, 255) = '' if length($chars) > 255;
# DBD::mysql will throw an error with native encoding, while
# DBD::MariaDB and DBD::Pg can cope with native as well as utf8.
# Upgrade to be on the safe side. Suggestion via issue#67.
utf8::upgrade($chars);
$_ = $chars; 1;
} or do {
my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
do_log(1,"save_info_final INFO: header field ".
"not decodable, keeping raw bytes: %s", $eval_stat);
substr($_,255) = '' if length($_) > 255;
die $eval_stat if $eval_stat =~ /^timed out\b/; # resignal timeout
};
}
for ($m_id,$q_to,$os_fp) { # truncate to 255 ch, ensure 7-bit characters
substr($_,255) = '' if length($_) > 255;
s/[^\040-\176]/?/gs; # only use 7 bit printable, compatible with UTF-8
}
my $content_type =
$msginfo->setting_by_contents_category(\%ccat_short_name);
my $checks_performed = $msginfo->checks_performed;
$checks_performed = !ref $checks_performed ? ''
: join('', grep($checks_performed->{$_}, qw(V S H B F P D)));
my $q_type = $msginfo->quar_type;
# only keep the first quarantine type used (e.g. ignore archival quar.)
$q_type = $q_type->[0] if ref $q_type;
for ($q_type,$content_type) { $_ = ' ' if !defined $_ || /^ *\z/ }
$min_spam_level = 0 if !defined $min_spam_level;
$max_spam_level = 0 if !defined $max_spam_level;
my $orig = $msginfo->originating ? 'Y' : 'N';
ll(4) && do_log(4,"save_info_final %s, orig=%s, chks=%s, cont.ty=%s, ".
"q.type=%s, q.to=%s, dsn=%s, score=%s, ".
"Message-ID: %s, From: '%s', Subject: '%s'",
$mail_id, $orig, $checks_performed, $content_type,
$q_type, $q_to, $dsn_sent, $min_spam_level,
$m_id, $from, $subj);
# update message record with additional information
$conn_h->execute($upd_msg,
$content_type, $q_type, $q_to, $dsn_sent,
0+untaint($min_spam_level), $m_id, $from, $subj,
untaint($msginfo->client_addr), # we may have a better info now
$sql_schema_version < 2.007000 ? () : $orig,
$msginfo->partition_tag, $mail_id);
# $os_fp, $rfc2822_sender, $rfc2822_from, $checks_performed, ...
# SQL_CHAR, SQL_VARCHAR, SQL_VARBINARY, SQL_BLOB, SQL_INTEGER, SQL_FLOAT,
# SQL_TIMESTAMP, SQL_TYPE_TIMESTAMP_WITH_TIMEZONE, ...
$conn_h->commit; 1;
} or do {
my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
if ($conn_h->in_transaction) {
eval {
$conn_h->rollback;
do_log(1,"save_info_final: rollback done"); 1;
} or do {
$@ = "errno=$!" if $@ eq ''; chomp $@;
do_log(1,"save_info_final: rollback %s", $@);
die $@ if $@ =~ /^timed out\b/; # resignal timeout
};
}
do_log(-1, "WARN save_info_final: %s", $eval_stat);
die $eval_stat if $eval_stat =~ /^timed out\b/; # resignal timeout
return 0;
};
}
1;
}
1;