File: //usr/share/perl5/vendor_perl/Amavis/Unpackers/Validity.pm
# SPDX-License-Identifier: GPL-2.0-or-later
package Amavis::Unpackers::Validity;
use strict;
use re 'taint';
BEGIN {
require Exporter;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
$VERSION = '2.412';
@ISA = qw(Exporter);
@EXPORT_OK = qw(&check_header_validity &check_for_banned_names);
}
use subs @EXPORT_OK;
use Amavis::Conf qw(:platform %banned_rules c cr ca);
use Amavis::Lookup qw(lookup lookup2);
use Amavis::Util qw(ll do_log min max minmax untaint untaint_inplace
is_valid_utf_8 truncate_utf_8);
sub check_header_validity($) {
my $msginfo = $_[0];
my(%field_head_counts, @bad);
my $minor_badh_category = 0;
my $allowed_tests = cr('allowed_header_tests');
my($t_syntax, $t_empty, $t_long, $t_control, $t_8bit, $t_utf8,
$t_missing, $t_multiple) =
!$allowed_tests ? () : @$allowed_tests{qw(syntax empty long control
8bit utf8 missing multiple)};
# minor category: 2: 8-bit char, 3: NUL/CR control, 4: empty line, 5: long,
# 6: syntax, 7: missing, 8: multiple
local($1,$2,$3);
for my $curr_head (@{$msginfo->orig_header}) {#array of hdr fields, not lines
my($field_name,$msg1,$msg2,$pre,$mid,$post);
# obsolete RFC 822 syntax allowed whitespace before colon
$field_name = $1 if $curr_head =~ /^([!-9;-\x7E\x80-\xFF]+)[ \t]*:/s;
$field_head_counts{lc($field_name)}++ if defined $field_name;
if (!defined($field_name) || substr($field_name,0,2) eq '--') {
if ($t_syntax) {
$msg1 = "Invalid header field syntax"; $msg2 = $curr_head;
$minor_badh_category = max(6, $minor_badh_category);
}
} elsif ($t_syntax && $field_name =~ /([^\x00-\x7F])/gs) {
$mid = $1; $msg1 = "Invalid header field name, contains non-ASCII char";
$minor_badh_category = max(6, $minor_badh_category);
} elsif ($t_empty && $curr_head =~ /^([ \t]+)(?=\n|\z)/gms) {
$mid = $1;
$msg1 ="Improper folded header field made up entirely of whitespace";
# note: using //g and pos to avoid deep recursion in regexp
$minor_badh_category = max(4, $minor_badh_category);
} elsif ($t_long && $curr_head =~ /^([^\n]{999,})(?=\n|\z)/gms) {
$msg1 = "Header line longer than 998 characters"; $msg2 = $1;
substr($msg2, 50) = '[...]' if length($msg2) > 55;
$minor_badh_category = max(5, $minor_badh_category);
} elsif ($t_control && $curr_head =~ /([\000\015])/gs) {
$mid = $1; $msg1 = "Improper use of control character";
$minor_badh_category = max(3, $minor_badh_category);
} elsif ($t_8bit && $curr_head =~ /([^\x00-\x7F])/gs) { # non-ASCII
$mid = $1;
if (!is_valid_utf_8($curr_head)) {
$msg1 = 'Non-encoded non-ASCII data (and not UTF-8)';
} elsif ($curr_head =~ /^([\x00-\x08\x0B-\x1F\x7F])/xgs) { # but TAB,NL
$mid = $1; $msg1 = 'UTF-8 string contains C0 Controls';
} elsif ($curr_head =~
/( (?: \xC2 | \xE0 \x82 | \xF0 \x80 \x82 ) [\x80-\x9F] )/xgs) {
# RFC 5198 prohibits "C1 Controls" (U+0080..U+009F) for Net-Unicode
$mid = $1; $msg1 = 'UTF-8 string contains C1 Controls';
} elsif ($msginfo->smtputf8) {
# UTF-8 header bodies (but not field names) are valid with SMTPUTF8
} elsif ($t_utf8) {
$msg1 = 'Non-encoded UTF-8 string in non-EAI mail';
if ($curr_head =~ /( [\xC0-\xDF][\x80-\xBF] |
[\xE0-\xEF][\x80-\xBF]{2} |
[\xF0-\xF4][\x80-\xBF]{3} )/xgs ) {
$mid = $1; # capture the entire first non-ASCII UTF-8 character
}
}
$minor_badh_category = max(2, $minor_badh_category) if defined $msg1;
}
if (defined $msg1) {
$mid = '' if !defined $mid;
if (!defined $msg2) {
$pre = substr($curr_head, 0, pos($curr_head)-length($mid))
if !defined $pre;
$post = substr($curr_head,pos($curr_head)) if !defined $post;
chomp($post);
$mid = truncate_utf_8($mid, 15).'[...]' if length($mid) > 20;
$post = truncate_utf_8($post,15).'[...]' if length($post) > 20;
if (length($pre)-length($field_name)-2 > 50-length($post)) {
$pre = $field_name . ': ...'
. substr($pre, length($pre) - (45-length($post)));
}
$msg2 = $pre . $mid . $post;
}
if ($mid ne '' && length($mid) <= 4) {
$msg1 .= " (char ";
$msg1 .= join(' ', map(sprintf('%02X',ord($_)), split(//,$mid)));
$msg1 .= " hex)";
}
push(@bad, "$msg1: $msg2");
last if @bad >= 100; # some sanity limit
}
}
# RFC 5322 (ex RFC 2822), RFC 2045, RFC 2183
for (qw(Date From Sender Reply-To To Cc Bcc Subject Message-ID References
In-Reply-To MIME-Version Content-Type Content-Transfer-Encoding
Content-ID Content-Description Content-Disposition Auto-Submitted)) {
my $n = $field_head_counts{lc($_)};
if (!$n && $t_missing && /^(?:Date|From)\z/i) {
push(@bad, "Missing required header field: \"$_\"");
$minor_badh_category = max(7, $minor_badh_category);
} elsif ($n > 1 && $t_multiple) {
if ($n == 2) {
push(@bad, "Duplicate header field: \"$_\"");
} else {
push(@bad, sprintf('Header field occurs more than once: "%s" '.
'occurs %d times', $_, $n));
}
$minor_badh_category = max(8, $minor_badh_category);
}
}
for (@bad) { # sanitize C0 controls and non-ASCII
s{ ( [^\x20-\x7E] | \\ (?= x \{ ) ) }
{ sprintf('\\x{%02X}', ord($1)) }xgse if tr/\x00-\x7F//c;
}
if (!@bad) {
do_log(5,"check_header: %d, OK", $minor_badh_category);
} elsif (ll(2)) {
do_log(2,"check_header: %d, %s", $minor_badh_category, $_) for @bad;
}
(\@bad, $minor_badh_category);
}
sub check_for_banned_names($) {
my $msginfo = $_[0];
do_log(3, "Checking for banned types and filenames");
my $bfnmr = ca('banned_filename_maps'); # two-level map: recip, partname
my(@recip_tables); # a list of records describing banned tables for recips
my $any_table_in_recip_tables = 0; my $any_not_bypassed = 0;
for my $r (@{$msginfo->per_recip_data}) {
my $recip = $r->recip_addr;
my(@tables,@tables_m); # list of banned lookup tables for this recipient
if (!$r->bypass_banned_checks) { # not bypassed
$any_not_bypassed = 1;
my($t_ref,$m_ref) = lookup2(1,$recip,$bfnmr);
if (defined $t_ref) {
for my $ti (0..$#$t_ref) { # collect all relevant tables for each recip
my $t = $t_ref->[$ti];
# an entry may be a ref to a list of lookup tables, or a comma- or
# whitespace-separated list of table names (suitable for SQL),
# which are mapped to actual lookup tables through %banned_rules
if (!defined($t)) {
# ignore
} elsif (ref($t) eq 'ARRAY') { # a list of actual lookup tables
push(@tables, @$t);
push(@tables_m, ($m_ref->[$ti]) x @$t);
} else { # a list of rules _names_, to be mapped via %banned_rules
my(@names);
my(@rawnames) = grep(!/^[, ]*\z/,
($t =~ /\G (?: " (?: \\. | [^"\\] ){0,999} "
| [^, ] )+ | [, ]+/xgs));
# in principle quoted strings could be used
# to construct lookup tables on-the-fly (not implemented)
for my $n (@rawnames) { # collect only valid names
if (!exists($banned_rules{$n})) {
do_log(2,"INFO: unknown banned table name %s, recip=%s",
$n,$recip);
} elsif (!defined($banned_rules{$n})) { # ignore undef
} else { push(@names,$n) }
}
ll(3) && do_log(3,"collect banned table[%d]: %s, tables: %s",
$ti,$recip, join(', ',map($_.'=>'.$banned_rules{$_}, @names)));
if (@names) { # any known and valid table names?
push(@tables, map($banned_rules{$_}, @names));
push(@tables_m, ($m_ref->[$ti]) x @names);
}
}
}
}
}
push(@recip_tables, { r => $r, recip => $recip,
tables => \@tables, tables_m => \@tables_m } );
$any_table_in_recip_tables=1 if @tables;
}
my $bnpre = cr('banned_namepath_re');
$bnpre = $$bnpre if ref($bnpre) eq 'REF'; # allow one level of indirection
if (!$any_not_bypassed) {
do_log(3,"skipping banned check: all recipients bypass banned checks");
} elsif (!$any_table_in_recip_tables && !ref($bnpre)) {
do_log(3,"skipping banned check: no applicable lookup tables");
} else {
do_log(4,"starting banned checks - traversing message structure tree");
my $parts_root = $msginfo->parts_root;
my $part;
for (my(@unvisited)=($parts_root);
@unvisited and $part=shift(@unvisited);
push(@unvisited,@{$part->children}))
{ # traverse decomposed parts tree breadth-first
my(@path) = @{$part->path};
next if @path <= 1;
shift(@path); # ignore place-holder root node
next if @{$part->children}; # ignore non-leaf nodes
my(@descr_trad); # a part path: list of predecessors of a message part
my(@descr); # same, but in form suitable for check on banned_namepath_re
for my $p (@path) {
my(@k,$n);
$n = $p->base_name;
if ($n ne '') { $n=~s/[\t\n]/ /g; push(@k,"P=$n") }
$n = $p->mime_placement;
if ($n ne '') { $n=~s/[\t\n]/ /g; push(@k,"L=$n") }
$n = $p->type_declared;
$n = [$n] if !ref($n);
for (@$n) {if ($_ ne ''){my $m=$_; $m=~s/[\t\n]/ /g; push(@k,"M=$m")}}
$n = $p->type_short;
$n = [$n] if !ref($n);
for (@$n) {if (defined($_) && $_ ne '')
{my $m=$_; $m=~s/[\t\n]/ /g; push(@k,"T=$m")} }
$n = $p->name_declared;
$n = [$n] if !ref($n);
for (@$n) {if (defined($_) && $_ ne '')
{my $m=$_; $m=~s/[\t\n]/ /g; push(@k,"N=$m")} }
$n = $p->attributes;
if (defined $n && $n ne '') { push(@k,"A=$_") for split(/ */,$n) }
push(@descr, join("\t",@k));
push(@descr_trad, [map { local($1,$2);
/^([a-zA-Z0-9])=(.*)\z/s; my($key_what,$key_val) = ($1,$2);
$key_what eq 'M' || $key_what eq 'N' ? $key_val
: $key_what eq 'T' ? ('.'.$key_val) # prepend a dot (compatibility)
: $key_what eq 'A' && $key_val eq 'U' ? 'UNDECIPHERABLE' : ()} @k]);
}
# we have obtained a description of a part as a list of its predecessors
# in a message structure including the part itself at the end of the list
my $key_val_str = join(' | ',@descr); $key_val_str =~ s/\t/,/g;
my $key_val_trad_str = join(' | ', map(join(',',@$_), @descr_trad));
# simplified result to be presented in an SMTP response and DSN
my $simple_part_name = join(',', @{$descr_trad[-1]}); # just leaf node
# evaluate current mail component path against each recipients' tables
ll(4) && do_log(4, "check_for_banned (%s) %s",
join(',', map($_->base_name, @path)), $key_val_trad_str);
for my $e (@recip_tables) {
@$e{qw(found result matchk part_descr_attr part_descr_trad part_name)}
= (0, undef, undef, undef, undef, undef);
}
my($result, $matchingkey, $t_ref_old);
for my $e (@recip_tables) { # for each recipient and his tables
my($found,$recip,$t_ref) = @$e{qw(found recip tables)};
if ($t_ref && @$t_ref) {
my $same_as_prev = $t_ref_old && @$t_ref_old==@$t_ref &&
!grep($t_ref_old->[$_] ne $t_ref->[$_], (0..$#$t_ref)) ? 1 : 0;
if ($same_as_prev) {
do_log(4,
"skip banned check for %s, same tables as previous, result => %s",
$recip,$result);
} else {
do_log(5,"doing banned check for %s on %s",
$recip,$key_val_trad_str);
($result,$matchingkey) =
lookup2(0, [map(@$_,@descr_trad)], # check all attribs in one go
[map(ref($_) eq 'ARRAY' ? @$_ : $_, @$t_ref)],
Label=>"check_bann:$recip");
$t_ref_old = $t_ref;
}
if (defined $result) {
@$e{qw(found result matchk
part_descr_attr part_descr_trad part_name)} =
(1, $result, $matchingkey,
$key_val_str, $key_val_trad_str, $simple_part_name);
}
}
}
if (ref $bnpre && grep(!$_->{result}, @recip_tables)) { # any non-true?
# try new style: banned_namepath_re; it is global, not per-recipient
my $descr_str = join("\n",@descr);
if ($] < 5.012003) {
# avoid a [perl #62048] bug in lookup_re():
# Unwarranted "Malformed UTF-8 character" on tainted variable
untaint_inplace($descr_str);
}
my($result,$matchingkey) = lookup2(0, $descr_str, [$bnpre],
Label=>'banned_namepath_re');
if (defined $result) {
for my $e (@recip_tables) {
if (!$e->{found}) {
@$e{qw(found result matchk
part_descr_attr part_descr_trad part_name)} =
(1, $result, $matchingkey,
$key_val_str, $key_val_trad_str, $simple_part_name);
}
}
}
}
my(%esc) = (r => "\r", n => "\n", f => "\f", b => "\b",
e => "\e", a => "\a", t => "\t"); # for pretty-printing
my $ll = grep($_->{result}, @recip_tables) ? 1 : 3; # log level
for my $e (@recip_tables) { # log and store results
my($r, $recip, $result, $matchingkey,
$part_descr_attr, $part_descr_trad, $part_name) =
@$e{qw(r recip result matchk
part_descr_attr part_descr_trad part_name)};
if (ll($ll)) { # only bother with logging when needed
local($1);
my $mk = defined $matchingkey ? $matchingkey : ''; # pretty-print
$mk =~ s{ \\(.) }{ exists($esc{$1}) ? $esc{$1} : '\\'.$1 }xgse;
do_log($result?1:3, 'p.path%s %s: "%s"%s',
!$result?'':" BANNED:$result", $recip, $key_val_str,
!defined $result ? '' : ", matching_key=\"$mk\"");
}
my $a;
if ($result) { # the part being tested is banned for this recipient
$a = $r->banned_parts || [];
push(@$a,$part_descr_trad); $r->banned_parts($a);
$a = $r->banned_parts_as_attr || [];
push(@$a,$part_descr_attr); $r->banned_parts_as_attr($a);
$a = $r->banning_rule_rhs || [];
push(@$a,$result); $r->banning_rule_rhs($a);
$a = $r->banning_rule_key || [];
$matchingkey = "$matchingkey"; # make a plain string out of a qr
push(@$a,$matchingkey); $r->banning_rule_key($a);
my(@comments) = $matchingkey =~ / \( \? \# \s* (.*?) \s* \) /xgs;
$a = $r->banning_rule_comment || [];
push(@$a, @comments ? join(' ',@comments) : $matchingkey);
$r->banning_rule_comment($a);
if (!defined($r->banning_reason_short)) { # just the first
my $s = $part_name;
$s =~ s/[ \t]{6,}/ ... /g; # compact whitespace
$s = join(' ',@comments) . ':' . $s if @comments;
$r->banning_reason_short($s);
}
}
}
# last if !grep(!$_->{result}, @recip_tables); # stop if all recips true
} # endfor: message tree traversal
} # endif: doing parts checking
}
1;