File: //usr/share/perl5/vendor_perl/Amavis/Util.pm
package Amavis::Util;
use strict;
use re 'taint';
BEGIN {
require Exporter;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
$VERSION = '2.412';
@ISA = qw(Exporter);
@EXPORT_OK = qw(&untaint &untaint_inplace &min &max &minmax
&unique_list &unique_ref &format_time_interval
&is_valid_utf_8 &truncate_utf_8
&safe_encode &safe_encode_utf8 &safe_encode_utf8_inplace
&safe_decode &safe_decode_utf8 &safe_decode_latin1
&safe_decode_mime &q_encode &orcpt_encode &orcpt_decode
&xtext_encode &xtext_decode &proto_encode &proto_decode
&idn_to_ascii &idn_to_utf8 &clear_idn_cache
&mail_addr_decode &mail_addr_idn_to_ascii
&ll &do_log &do_log_safe &snmp_count &snmp_count64
&snmp_counters_init &snmp_counters_get &snmp_initial_oids
&debug_oneshot &update_current_log_level
&flush_captured_log &reposition_captured_log_to_end
&dump_captured_log &log_capture_enabled
&am_id &new_am_id &stir_random
&add_entropy &fetch_entropy_bytes
&generate_mail_id &make_password
&crunching_start_time &prolong_timer &get_deadline
&waiting_for_client &switch_to_my_time &switch_to_client_time
&sanitize_str &fmt_struct &freeze &thaw
&ccat_split &ccat_maj &cmp_ccat &cmp_ccat_maj
&setting_by_given_contents_category_all
&setting_by_given_contents_category &rmdir_recursively
&read_file &read_text &read_l10n_templates
&read_hash &read_array &dump_hash &dump_array
&dynamic_destination &collect_equal_delivery_recips);
}
use subs @EXPORT_OK;
use Errno qw(ENOENT EACCES EAGAIN ESRCH EBADF);
use IO::File qw(O_RDONLY O_WRONLY O_RDWR O_APPEND O_CREAT O_EXCL);
use Digest::MD5; # 2.22 provides 'clone' method, no longer needed since 2.7.0
use MIME::Base64;
use Encode (); # Perl 5.8 UTF-8 support
use Scalar::Util qw(tainted);
BEGIN {
if (eval { require Net::LibIDN2 }) {
*libidn_to_ascii = \&Net::LibIDN2::idn2_lookup_u8;
*libidn_to_unicode = \&Net::LibIDN2::idn2_to_unicode_88;
} elsif (eval { require Net::LibIDN }) {
*libidn_to_ascii = sub { Net::LibIDN::idn_to_ascii($_[0], 'UTF-8') };
*libidn_to_unicode = sub { Net::LibIDN::idn_to_unicode($_[0], 'UTF-8') };
} else {
die 'Neither Net::LibIDN2 nor Net::LibIDN module found';
}
}
use Amavis::Conf qw(:platform $DEBUG c cr ca $mail_id_size_bits
$myversion $snmp_contact $snmp_location
$trim_trailing_space_in_lookup_result_fields);
use Amavis::DbgLog;
use Amavis::Log qw(amavis_log_id write_log);
use Amavis::rfc2821_2822_Tools;
use Amavis::Timing qw(section_time);
use vars qw($enc_ascii $enc_utf8 $enc_latin1 $enc_w1252 $enc_tainted
$enc_taintsafe $enc_is_utf8_buggy);
BEGIN {
$enc_ascii = Encode::find_encoding('ascii');
$enc_utf8 = Encode::find_encoding('UTF-8'); # same as utf-8-strict
$enc_latin1 = Encode::find_encoding('ISO-8859-1');
$enc_w1252 = Encode::find_encoding('Windows-1252');
$enc_ascii or die "Amavis::Util: unknown encoding 'ascii'";
$enc_utf8 or die "Amavis::Util: unknown encoding 'UTF-8'";
$enc_latin1 or die "Amavis::Util: unknown encoding 'ISO-8859-1'";
$enc_w1252 or warn "Amavis::Util: unknown encoding 'Windows-1252'";
$enc_tainted = substr($ENV{PATH}.$ENV{HOME}.$ENV{AMAVIS_TEST_CONFIG}, 0,0); # tainted empty string
$enc_taintsafe = 1; # guessing
if (!tainted($enc_tainted)) {
warn "Amavis::Util: can't obtain a tainted string";
} else {
# NOTE: [rt.cpan.org #85489] - Encode::encode turns on the UTF8 flag
# on a passed argument. Give it a copy to avoid turning $enc_tainted
# or $enc_ps into a UTF-8 string!
# Encode::is_utf8 is always false on tainted in Perl 5.8, Perl bug #32687
my $enc_ps = "\x{2029}"; # Paragraph Separator, utf8 flag on
if (!Encode::is_utf8("$enc_ps $enc_tainted")) {
$enc_is_utf8_buggy = 1;
warn "Amavis::Util, Encode::is_utf8() fails to detect utf8 on tainted";
}
# test for Encode taint laundering bug [rt.cpan.org #84879], fixed in 2.50
if (!tainted($enc_ascii->encode("$enc_ps $enc_tainted"))) {
$enc_taintsafe = 0;
warn "Amavis::Util, Encode::encode() taint laundering bug, ".
"fixed in Encode 2.50";
} elsif (!tainted($enc_ascii->decode("xx $enc_tainted"))) {
$enc_taintsafe = 0;
warn "Amavis::Util, Encode::decode() taint laundering bug, ".
"fixed in Encode 2.50";
}
utf8::is_utf8("$enc_ps $enc_tainted")
or die "Amavis::Util, utf8::is_utf8() fails to detect utf8 on tainted";
!utf8::is_utf8("\xA0 $enc_tainted")
or die "Amavis::Util, utf8::is_utf8() claims utf8 on tainted";
my $t = "$enc_ps $enc_tainted";
utf8::encode($t);
tainted($t)
or die "Amavis::Util, utf8::encode() taint laundering bug";
!utf8::is_utf8($t)
or die "Amavis::Util, utf8::encode() failed to clear utf8 flag";
}
1;
}
# Return untainted copy of a string (argument can be a string or a string ref)
#
sub untaint($) {
return undef if !defined $_[0]; # must return undef even in a list context!
no re 'taint';
local $1; # avoids Perl taint bug: tainted global $1 propagates taintedness
(ref($_[0]) ? ${$_[0]} : $_[0]) =~ /^(.*)\z/s;
$1;
}
sub untaint_inplace($) {
return undef if !defined $_[0]; # must return undef even in a list context!
no re 'taint';
local $1; # avoid Perl taint bug: tainted global $1 propagates taintedness
$_[0] =~ /^(.*)\z/s;
$_[0] = $1;
}
# Returns the smallest defined number from the list, or undef
#
sub min(@) {
my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_; # accept list, or a list ref
my $m; defined $_ && (!defined $m || $_ < $m) && ($m = $_) for @$r;
$m;
}
# Returns the largest defined number from the list, or undef
#
sub max(@) {
my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_; # accept list, or a list ref
my $m; defined $_ && (!defined $m || $_ > $m) && ($m = $_) for @$r;
$m;
}
# Returns a pair of the smallest and the largest defined number from the list
#
sub minmax(@) {
my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_; # accept list, or a list ref
my $min; my $max;
for (@$r) {
if (defined $_) {
$min = $_ if !defined $min || $_ < $min;
$max = $_ if !defined $max || $_ > $max;
}
}
($min,$max);
}
# Returns a sublist of the supplied list of elements in an unchanged order,
# where only the first occurrence of each defined element is retained
# and duplicates removed
#
sub unique_list(@) {
my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_; # accepts list, or a list ref
my %seen; my(@result) = grep(defined($_) && !$seen{$_}++, @$r);
@result;
}
# same as unique, except that it returns a ref to the resulting list
#
sub unique_ref(@) {
my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_; # accepts list, or a list ref
my %seen; my(@result) = grep(defined($_) && !$seen{$_}++, @$r);
\@result;
}
sub format_time_interval($) {
my $t = $_[0];
return 'undefined' if !defined $t;
my $sign = ''; if ($t < 0) { $sign = '-'; $t = - $t };
my $dd = int($t / (24*3600)); $t = $t - $dd*(24*3600);
my $hh = int($t / 3600); $t = $t - $hh*3600;
my $mm = int($t / 60); $t = $t - $mm*60;
sprintf("%s%d %d:%02d:%02d", $sign, $dd, $hh, $mm, int($t+0.5));
}
# returns true if the provided string of octets represents a syntactically
# valid UTF-8 string, otherwise a false is returned
#
sub is_valid_utf_8($) {
# my $octets = $_[0];
return undef if !defined $_[0];
#
# RFC 6532: UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4
# RFC 3629 section 4: Syntax of UTF-8 Byte Sequences
# UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
# UTF8-1 = %x00-7F
# UTF8-2 = %xC2-DF UTF8-tail
# UTF8-3 = %xE0 %xA0-BF UTF8-tail /
# %xE1-EC 2( UTF8-tail ) /
# %xED %x80-9F UTF8-tail /
# # U+D800..U+DFFF are utf16 surrogates, not legal utf8
# %xEE-EF 2( UTF8-tail )
# UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) /
# %xF1-F3 3( UTF8-tail ) /
# %xF4 %x80-8F 2( UTF8-tail )
# UTF8-tail = %x80-BF
#
# loose variant:
# [\x00-\x7F] |
# [\xC0-\xDF][\x80-\xBF] |
# [\xE0-\xEF][\x80-\xBF]{2} |
# [\xF0-\xF4][\x80-\xBF]{3}
#
$_[0] =~ /^ (?: [\x00-\x7F] |
[\xC2-\xDF] [\x80-\xBF] |
\xE0 [\xA0-\xBF] [\x80-\xBF] |
[\xE1-\xEC] [\x80-\xBF]{2} |
\xED [\x80-\x9F] [\x80-\xBF] |
[\xEE-\xEF] [\x80-\xBF]{2} |
\xF0 [\x90-\xBF] [\x80-\xBF]{2} |
[\xF1-\xF3] [\x80-\xBF]{3} |
\xF4 [\x80-\x8F] [\x80-\xBF]{2} )* \z/xs ? 1 : 0;
}
# cleanly chop a UTF-8 byte sequence to $max_len or less, RFC 3629;
# if $max_len is undefined just chop off any partial last character
#
sub truncate_utf_8($;$) {
my($octets, $max_len) = @_;
return $octets if !defined $octets;
return '' if defined $max_len && $max_len <= 0;
substr($octets,$max_len) = ''
if defined $max_len && length($octets) > $max_len;
# missing one or more UTF8-tail octets? chop the entire last partial char
if ($octets =~ tr/\x00-\x7F//c) { # triage - is non-ASCII
$octets =~ s/[\xC0-\xDF]\z//s
or $octets =~ s/[\xE0-\xEF][\x80-\xBF]{0,1}\z//s
or $octets =~ s/[\xF0-\xF7][\x80-\xBF]{0,2}\z//s
or $octets =~ s/[\xF8-\xFB][\x80-\xBF]{0,3}\z//s # not strictly valid
or $octets =~ s/[\xFC-\xFD][\x80-\xBF]{0,4}\z//s # not strictly valid
or $octets =~ s/ \xFE [\x80-\xBF]{0,5}\z//sx; # not strictly valid
}
$octets;
}
# A wrapper for Encode::encode, avoiding a bug in Perl 5.8.0 which causes
# Encode::encode to loop and fill memory when given a tainted string.
# Also works around a CPAN bug #64642 in module Encode:
# Tainted values have the taint flag cleared when encoded or decoded.
# https://rt.cpan.org/Public/Bug/Display.html?id=64642
# Fixed in Encode 2.50 [rt.cpan.org #84879].
#
sub safe_encode($$;$) {
# my($encoding,$str,$check) = @_;
my $encoding = shift;
return undef if !defined $_[0]; # must return undef even in a list context!
my $enc = Encode::find_encoding($encoding);
$enc or die "safe_encode: unknown encoding '$encoding'";
# the resulting UTF8 flag is always off
return $enc->encode(@_) if $enc_taintsafe || !tainted($_[0]);
# Work around a taint laundering bug in Encode [rt.cpan.org #84879].
# Propagate taintedness across taint-related bugs in module Encode
# ( Encode::encode in Perl 5.8.0 fills up all available memory
# when given a tainted string with a non-encodeable character. )
$enc_tainted . $enc->encode(untaint($_[0]), $_[1]);
}
# Encodes logical characters to UTF-8 octets, or returns a string of octets
# (with utf8 flag off) unchanged. Ensures the result is always a string of
# octets (utf8 flag off). Unlike safe_encode(), a non-ASCII string with
# utf8 flag off will be returned unchanged, so the result may not be a
# valid UTF-8 string!
#
sub safe_encode_utf8($) {
my $str = $_[0];
return undef if !defined $str; # must return undef even in a list context!
utf8::encode($str) if utf8::is_utf8($str);
$str;
}
sub safe_encode_utf8_inplace($) {
return undef if !defined $_[0]; # must return undef even in a list context!
utf8::encode($_[0]) if utf8::is_utf8($_[0]);
}
sub safe_decode_latin1($) {
my $str = $_[0];
return undef if !defined $str; # must return undef even in a list context!
#
# -> http://en.wikipedia.org/wiki/Windows-1252
# Windows-1252 character encoding is a superset of ISO 8859-1, but differs
# from the IANA's ISO-8859-1 by using displayable characters rather than
# control characters in the 80 to 9F (hex) range. [...]
# It is very common to mislabel Windows-1252 text with the charset label
# ISO-8859-1. A common result was that all the quotes and apostrophes
# (produced by "smart quotes" in word-processing software) were replaced
# with question marks or boxes on non-Windows operating systems, making
# text difficult to read. Most modern web browsers and e-mail clients
# treat the MIME charset ISO-8859-1 as Windows-1252 to accommodate
# such mislabeling. This is now standard behavior in the draft HTML 5
# specification, which requires that documents advertised as ISO-8859-1
# actually be parsed with the Windows-1252 encoding.
#
if ($enc_taintsafe || !tainted($str)) {
return ($enc_w1252||$enc_latin1)->decode($str);
} else { # work around bugs in Encode
untaint_inplace($str);
return $enc_tainted . ($enc_w1252||$enc_latin1)->decode($str);
}
}
sub safe_decode_utf8($;$) {
my($str,$check) = @_;
return undef if !defined $str; # must return undef even in a list context!
if ($enc_taintsafe || !tainted($str)) {
return utf8::is_utf8($str) ? $str : $enc_utf8->decode($str, $check||0);
} else {
# Work around a taint laundering bug in Encode [rt.cpan.org #84879].
# Propagate taintedness across taint-related bugs in module Encode.
untaint_inplace($str);
return $enc_tainted .
(utf8::is_utf8($str) ? $str : $enc_utf8->decode($str, $check||0));
}
}
sub safe_decode($$;$) {
my($encoding,$str,$check) = @_;
return undef if !defined $str; # must return undef even in a list context!
my $enc = Encode::find_encoding($encoding);
return $str if !$enc;
# if the $check argument in a call to Encode::decode() is present it must be
# defined to avoid warning "Use of uninitialized value in subroutine entry"
return $enc->decode($str, $check||0) if $enc_taintsafe || !tainted($str);
# Work around a taint laundering bug in Encode [rt.cpan.org #84879].
# Propagate taintedness across taint-related bugs in module Encode.
untaint_inplace($str);
$enc_tainted . $enc->decode($str, $check||0);
}
# Handle Internationalized Domain Names according to IDNA: RFC 5890, RFC 5891.
# Similar to ToASCII (RFC 3490), but does not fail on garbage.
# Takes a domain name (possibly with utf8 flag on) consisting of U-labels
# or A-labels or NR-LDH labels, converting each label to A-label, lowercased.
# Non- IDNA-valid strings are only encoded to UTF-8 octets but are otherwise
# unchanged. Result is in octets regardless of input, taintedness of the
# argument is propagated to the result.
#
my %idn_encode_cache;
sub clear_idn_cache() { %idn_encode_cache = () }
sub idn_to_ascii($) {
# propagate taintedness of the argument, but not its utf8 flag
return tainted($_[0]) ? $idn_encode_cache{$_[0]} . $enc_tainted
: $idn_encode_cache{$_[0]}
if exists $idn_encode_cache{$_[0]};
my $s = $_[0];
my $t = tainted($s); # taintedness of the argument
return undef if !defined $s;
untaint_inplace($s) if $t;
# to octets if needed, not necessarily valid UTF-8
utf8::encode($s) if utf8::is_utf8($s);
if ($s !~ tr/\x00-\x7F//c) { # is all-ASCII (including IP address literal)
$s = lc $s;
} else {
# Net::LibIDN(2) does not like a leading dot (or '@') in a valid domain name,
# but we need it (e.g. in lookups, meaning subdomains are included), so
# we have to carry a prefix across the call to Net::LibIDN::idn_to_ascii() or
# Net::LibIDN2::idn2_lookup_u8() (wrapped in libidn_to_ascii() here).
my $prefix; local($1);
$prefix = $1 if $s =~ s/^([.\@])//s; # strip a leading dot or '@'
# to ASCII-compatible encoding (ACE)
my $sa = libidn_to_ascii($s);
$s = lc $sa if defined $sa;
$s = $prefix.$s if $prefix;
}
$idn_encode_cache{$_[0]} = $s;
$t ? $s.$enc_tainted : $s; # propagate taintedness of the argument
}
# Handle Internationalized Domain Names according to IDNA: RFC 5890, RFC 5891.
# Implements ToUnicode (RFC 3490). ToUnicode always succeeds, because it just
# returns the original string if decoding fails. In particular, this means that
# ToUnicode has no effect on a label that does not begin with the ACE prefix.
# Takes a domain name (as a string of octets or logical characters)
# of "Internationalized labels" (A-labels, U-labels, or NR-LDH labels),
# converting each label to U-label. Result is a string of octets encoded
# as UTF-8 if input was valid.
#
sub idn_to_utf8($) {
my $s = $_[0];
return undef if !defined $s;
safe_encode_utf8_inplace($s); # to octets (if not already)
if ($s =~ /(?: ^ | \. ) xn-- [\x00-\x2D\x2F-\xFF]{0,58} [\x00-\x2C\x2F-\xFF]
(?: \z | \. )/xsi) { # contains XN-label
my $su = libidn_to_unicode(lc $s);
return $su if defined $su;
}
$s;
}
# decode octets found in a mail header field body to a logical chars string
#
sub safe_decode_mime($) {
my $str = $_[0]; # octets
return undef if !defined $str;
my $chars; # logical characters
if ($str !~ tr/\x00-\x7F//c) { # is all-ASCII
# test for any RFC 2047 encoded-words
# encoded-text: Any printable ASCII character other than "?" or SPACE
# permissive: SPACE and other characters can be observed in Q encoded-word
if ($str !~ m{ =\? [^?]* \? (?: [Bb] \? [A-Za-z0-9+/=]*? |
[Qq] \? .*? ) \?= }xs) {
return $str; # good, keep as-is, all-ASCII with no encoded-words
}
# normal, all-ASCII with some encoded-words, try to decode encoded-words
# using Encode::MIME::Header
eval { $chars = safe_decode('MIME-Header',$str); 1 } # RFC 2047
and return $chars;
# give up, is all-ASCII but not MIME, just return as-is
return $str;
}
# contains at least some non-ASCII
if ($str =~ m{ =\? [^?]* \? (?: [Bb] \? [A-Za-z0-9+/=]* |
[Qq] \? [\x20-\x3E\x40-\x7F]* ) \?= }xs) {
# strange/rare, non-ASCII, but also contains RFC 2047 encoded-words !?
# decode any RFC 2047 encoded-words, attempt to decode the rest
# as UTF-8 if valid, or as Windows-1252 (or ISO-8859-1) otherwise
local($1);
$str =~ s{ ( =\? [^?]* \? (?: [Bb] \? [A-Za-z0-9+/=]* |
[Qq] \? [\x20-\x3E\x40-\x7F]* ) \?= ) |
( [^=]* | . )
}{ my $s;
if (defined $1) {
$s = $1; # using Encode::MIME::Header
eval { $s = safe_decode('MIME-Header',$s) };
} else {
$s = $2;
eval { $s = safe_decode_utf8($s, 1|8); 1 }
or do { $s = safe_decode_latin1($s) };
}
$s;
}xgse;
return $str;
}
# contains at least some non-ASCII and no RFC 2047 encoded-words
# non-MIME-encoded KOI8 seems to be pretty common, attempt some guesswork
if (length($str) >= 4 &&
$str !~ tr/\x80-\xA2\xA5\xA8-\xAC\xAE-\xB2\xB5\xB8-\xBC\xBE-\xBF//) {
# does *not* contain UTF8-tail octets (sans KOI8-U letters in that range)
my $koi8_cyr_lett_cnt = # count cyrillic letters
$str =~ tr/\xA3\xA4\xA6\xA7\xAD\xB3\xB4\xB6\xB7\xBD\xC0-\xFF//;
if ($koi8_cyr_lett_cnt >= length($str)*2/3 && # mostly cyrillic letters
($str =~ tr/A-Za-z//) <= 5 && # not many ASCII letters
!is_valid_utf_8($str) ) {
# try decoding as KOI8-U (like KOI8-R but with 8 extra letters)
eval { $chars = safe_decode('KOI8-U',$str,1|8); 1; }
and return $chars; # hopefully the result makes sense
}
}
# contains at least some non-ASCII, no RFC 2047 encoded-words, not KOI8
if ($enc_taintsafe || !tainted($str)) {
# FB_CROAK | LEAVE_SRC
eval { $chars = $enc_utf8->decode($str,1|8); 1; } # try strict UTF-8
and return $chars;
# fallback, assume Windows-1252 or ISO-8859-1
# note that Windows-1252 is a proper superset of ISO-8859-1
return ($enc_w1252||$enc_latin1)->decode($str);
} else { # work around bugs in Encode
untaint_inplace($str);
eval { $chars = $enc_utf8->decode($str,1|8); 1; } # try strict UTF-8
and return $enc_tainted . $chars;
return $enc_tainted . ($enc_w1252||$enc_latin1)->decode($str);
}
}
# Do the Q-encoding manually, the MIME::Words::encode_mimeword does not
# encode spaces and does not limit to 75 ch, which violates the RFC 2047
#
sub q_encode($$$) {
my($octets,$encoding,$charset) = @_;
my $prefix = '=?' . $charset . '?' . $encoding . '?';
my $suffix = '?='; local($1,$2,$3);
# FWS | utext (= NO-WS-CTL|rest of US-ASCII)
$octets =~ /^ ( [\001-\011\013\014\016-\177]* [ \t] )? (.*?)
( [ \t] [\001-\011\013\014\016-\177]* )? \z/xs;
my($head,$rest,$tail) = ($1,$2,$3);
# Q-encode $rest according to RFC 2047 (not for use in comments or phrase)
$rest =~ s{([\000-\037\177\200-\377=?_])}{sprintf('=%02X',ord($1))}gse;
$rest =~ tr/ /_/; # turn spaces into _ (RFC 2047 allows it)
my $s = $head; my $len = 75 - (length($prefix)+length($suffix)) - 2;
while ($rest ne '') {
$s .= ' ' if $s !~ /[ \t]\z/; # encoded words must be separated by FWS
$rest =~ /^ ( .{0,$len} [^=] (?: [^=] | \z ) ) (.*) \z/xs;
$s .= $prefix.$1.$suffix; $rest = $2;
}
$s.$tail;
}
# encode "+", "=" and any character outside the range "!" (33) .. "~" (126)
#
sub xtext_encode($) { # RFC 3461
my $str = $_[0]; local($1);
safe_encode_utf8_inplace($str); # to octets (if not already)
$str =~ s/([^\041-\052\054-\074\076-\176])/sprintf('+%02X',ord($1))/gse;
$str;
}
# decode xtext-encoded string as per RFC 3461
#
sub xtext_decode($) {
my $str = $_[0]; local($1);
$str =~ s/\+([0-9a-fA-F]{2})/pack('C',hex($1))/gse;
$str;
}
sub proto_encode($@) {
my($attribute_name,@strings) = @_; local($1);
for ($attribute_name,@strings) {
# just in case, handle non-octet characters:
s/([^\000-\377])/sprintf('\\x{%04x}',ord($1))/gse and
do_log(-1,'proto_encode: non-octet character encountered: %s', $_);
}
$attribute_name =~ # encode all but alfanumerics, . _ + -
s/([^0-9a-zA-Z._+-])/sprintf('%%%02x',ord($1))/gse;
for (@strings) { # encode % and nonprintables
s/([^\041-\044\046-\176])/sprintf('%%%02x',ord($1))/gse;
}
$attribute_name . '=' . join(' ',@strings);
}
sub proto_decode($) {
my $str = $_[0]; local($1);
$str =~ s/%([0-9a-fA-F]{2})/pack('C',hex($1))/gse;
$str;
}
# Expects an e-mail address as a string of octets, where a local part
# may be encoded as UTF-8, and the domain part may be an international
# domain name (IDN) consisting either of U-labels or A-labels or NR-LDH
# labels. Decodes A-labels to U-labels in domain name. If $result_as_octets
# is false decodes the resulting UTF-8 octets from previous step and returns
# a string of characters. If $result_as_octets is true the subroutine skips
# decoding of UTF-8 octets, the result will be a string of octets, only valid
# as UTF-8 if the provided $addr was a valid UTF-8 (garbage-in/garbage-out).
#
sub mail_addr_decode($;$) {
my($addr, $result_as_octets) = @_;
return undef if !defined $addr;
safe_encode_utf8_inplace($addr); # to octets (if not already)
local($1); my $domain;
my $bracketed = $addr =~ s/^<(.*)>\z/$1/s;
if ($addr =~ s{ \@ ( [^\@]* ) \z}{}xs) {
$domain = $1;
$domain = idn_to_utf8($domain) if $domain =~ /(?:^|\.)xn--/si;
if ($domain !~ tr/\x00-\x7F//c) { # all-ASCII
$domain = lc $domain;
} elsif (!$result_as_octets) { # non-ASCII, attempt decoding UTF-8
# attempt decoding as strict UTF-8, otherwise fall back to Latin1
# Not lowercased.
eval { $domain = safe_decode_utf8($domain, 1|8); 1 }
or do { $domain = safe_decode_latin1($domain) };
}
}
# deal with localpart
if (!$result_as_octets && $addr =~ tr/\x00-\x7F//c) { # non-ASCII
# attempt decoding as strict UTF-8, otherwise fall back to Latin1
eval { $addr = safe_decode_utf8($addr, 1|8); 1 }
or do { $addr = safe_decode_latin1($addr) };
}
$addr .= '@'.$domain if defined $domain; # put back the domain part
$bracketed ? '<'.$addr.'>' : $addr;
}
# Expects an e-mail address as a string of octets or as logical characters
# (with utf8 flag on), where a local part may be encoded as UTF-8, and the
# domain part may be an international domain name (IDN) consisting either
# of U-labels or A-labels or NR-LDH. Leaves the localpart unchanged, encodes
# the domain name to ASCII-compatible encoding (ACE) if it is non-ASCII.
# The result is always in octets (UTF-8), domain part is lowercased.
#
sub mail_addr_idn_to_ascii($) {
my $addr = $_[0];
return undef if !defined $addr;
safe_encode_utf8_inplace($addr); # to octets (if not already)
local($1);
my $bracketed = $addr =~ s/^<(.*)>\z/$1/s;
$addr =~ s{ (\@ [^\@]*) \z }{ idn_to_ascii($1) }xse;
$bracketed ? '<'.$addr.'>' : $addr;
}
# RFC 6533: encode an ORCPT mail address (as obtained from orcpt_decode,
# logical characters (utf8 flag may be on)) into one of the forms:
# utf-8-address, utf-8-addr-unitext, utf-8-addr-xtext, or as a legacy
# xtext (RFC 3461), returning a string of octets
#
sub orcpt_encode($;$$) {
my($str, $smtputf8, $encode_for_smtp) = @_;
return (undef,undef) if !defined $str;
# "Original-Recipient" ":" address-type ";" generic-address
# address-type = atom
# atom = [CFWS] 1*atext [CFWS]
# RFC 3461: Due to limitations in the Delivery Status Notification format,
# the value of the original recipient address prior to encoding as "xtext"
# MUST consist entirely of printable (graphic and white space) characters
# from the US-ASCII [4] repertoire.
my $addr_type = ''; # expected 'rfc822' or 'utf-8', possibly empty
local($1); # get address-type (atom, up to a semicolon) and remove it
if ($str =~ s{^[ \t]*([0-9A-Za-z!\#\$%&'*/=?^_`{|}~+-]*)[ \t]*;[ \t]*}{}s) {
$addr_type = lc $1;
}
ll(5) && do_log(5, 'orcpt_encode %s, %s%s%s%s',
$addr_type, $str,
$smtputf8 ? ', smtputf8' : '',
$encode_for_smtp ? ', encode_for_smtp' : '',
utf8::is_utf8($str) ? ', is_utf8' : '');
$str = $1 if $str =~ /^<(.*)>\z/s;
if ($smtputf8 && utf8::is_utf8($str) &&
($addr_type eq 'utf-8' || $str =~ tr/\x00-\x7F//c)) {
# for use in SMTPUTF8 (RCPT TO) or in message/global-delivery-status
if ($encode_for_smtp && $str =~ tr{\x00-\x20+=\\}{}) {
# contains +,=,\,SP,ctrl -> encode as utf-8-addr-unitext
# HEXPOINT in EmbeddedUnicodeChar is 2 to 6 hexadecimal digits.
$str =~ s{ ( [^\x21-\x2A\x2C-\x3C\x3E-\x5B\x5D-\x7E\x80-\xF4] ) }
{ sprintf('\\x{%02X}', ord($1)) }xgse; # 2..6 uppercase hex!
} else {
# no restricted characters or not for SMTP -> keep as utf-8-address
#
# The utf-8-address form MAY be used in the ORCPT parameter when the
# SMTP server also advertises support for SMTPUTF8 and the address
# doesn't contain any ASCII characters not permitted in the ORCPT
# parameter. It SHOULD be used in a message/global-delivery-status
# "Original-Recipient:" or "Final-Recipient:" DSN field, or in an
# "Original-Recipient:" header field [RFC3798] if the message is a
# SMTPUTF8 message.
}
safe_encode_utf8_inplace($str); # to octets (if not already)
$addr_type = 'utf-8';
} else {
# RFC 6533: utf-8-addr-xtext MUST be used in the ORCPT parameter
# when the SMTP server doesn't advertise support for SMTPUTF8
if ($str =~ tr/\x00-\x7F//c && utf8::is_utf8($str)) {
# non-ASCII UTF-8, encode as utf-8-addr-xtext
# RFC 6533: QCHAR = %x21-2a / %x2c-3c / %x3e-5b / %x5d-7e
# HEXPOINT in EmbeddedUnicodeChar is 2 to 6 hexadecimal digits.
$str =~ s{ ( [^\x21-\x2A\x2C-\x3C\x3E-\x5B\x5D-\x7E] ) }
{ sprintf('\\x{%02X}', ord($1)) }xgse; # 2..6 uppercase hex!
safe_encode_utf8_inplace($str); # to octets (if not already)
$addr_type = 'utf-8';
} else { # encode as legacy RFC 3461 xtext
# encode +, =, \, SP, controls
safe_encode_utf8_inplace($str); # encode to octets first!
$str =~ s{ ( [^\x21-\x2A\x2C-\x3C\x3E-\x5B\x5D-\x7E] ) }
{ sprintf('+%02X', ord($1)) }xgse; # exactly two uppercase hex
$addr_type = 'rfc822';
}
}
($addr_type, $str);
}
# Decode an encoded ORCPT e-mail address (a string of octets, encoded as
# xtext, utf-8-addr-xtext, utf-8-addr-unitext, or utf-8-address) as per
# RFC 3461 and RFC 6533. Result is presumably an RFC 5322 -encoded mail
# address, possibly as utf8-flagged characters string (if valid UTF-8),
# no angle brackets.
#
sub orcpt_decode($;$) {
my($str, $smtputf8) = @_;
return (undef,undef) if !defined $str;
my $addr_type = ''; local($1);
# get address-type (atom, up to a semicolon) and remove it
if ($str =~ s{^[ \t]*([0-9A-Za-z!\#\$%&'*/=?^_`{|}~+-]*)[ \t]*;[ \t]*}{}s) {
$addr_type = lc $1;
}
if ($addr_type eq '') {
# assumed not encoded (e.g. internally generated)
if ($str =~ tr/\x00-\x7F//c && is_valid_utf_8($str) &&
eval { $str = safe_decode_utf8($str, 1|8); 1 }) {
$addr_type = 'utf-8';
} else {
$addr_type = 'rfc822';
}
} elsif ($addr_type ne 'utf-8') { # presumably 'rfc822'
# decode xtext-encoded string as per RFC 3461,
# hexchar = ASCII "+" immediately followed by two UPPER CASE hex digits
$str =~ s{ \+ ( [0-9A-F]{2} ) }{ pack('C',hex($1)) }xgse;
# now have a string of octets, possibly with (invalid) 8bit characters
# we may have a legacy encoding which should really be a utf-8 addr_type
if ($smtputf8 && lc $addr_type eq 'rfc822' &&
$str =~ tr/\x00-\x7F//c && is_valid_utf_8($str) &&
eval { $str = safe_decode_utf8($str, 1|8); 1 }) {
$addr_type = 'utf-8';
}
} elsif ($str !~ tr/\x00-\x7F//c) { # address-type is 'utf-8', is all-ASCII
# Looks like utf-8-addr-xtext or utf-8-addr-unitext.
# Permissive decoding of EmbeddedUnicodeChar, as well as a legacy xtext:
# RFC 6533: UTF-8 address type has 3 forms:
# utf-8-addr-xtext, utf-8-addr-unitext, and utf-8-address.
$str =~ s{ \\ x \{ ( [0-9A-Fa-f]{2,6} ) \} |
\+ ( [0-9A-F]{2} ) }
{ pack('U', hex(defined $1 ? $1 : $2)) }xgse;
# RFC 6533 prohibits <NUL> and surrogates in EmbeddedUnicodeChar,
# as well as encoded printable ASCII chars except xtext-specials +, =, \
} elsif (is_valid_utf_8($str) &&
eval { $str = safe_decode_utf8($str, 1|8); 1 }) {
# Looks like a utf-8-address. Successfully decoded UTF-8 octets to chars.
# permissive decoding of EmbeddedUnicodeChar, as well as a legacy xtext
$str =~ s{ \\ x \{ ( [0-9A-Fa-f]{2,6} ) \} |
\+ ( [0-9A-F]{2} ) }
{ pack('U', hex(defined $1 ? $1 : $2)) }xgse;
} else { # address-type is 'utf-8', non-ASCII, invalid UTF-8 string
# RFC 6533: if an address is labeled with the UTF-8 address type
# but does not conform to utf-8 syntax, then it MUST be copied into
# the message/global-delivery-status field without alteration.
# --> just leave $str unchanged as octets
}
# result in $str is presumably an RFC 5322 -encoded addr,
# possibly as utf8-flagged characters, no angle brackets
($addr_type, $str);
}
# Mostly for debugging and reporting purposes:
# Convert nonprintable characters in the argument
# to \[rnftbe], or hex code, ( and '\' to '\\' ???),
# and Unicode characters to UTF-8, returning a sanitized string.
#
use vars qw(%quote_controls_map);
BEGIN {
%quote_controls_map =
("\r" => '\\r', "\n" => '\\n', "\t" => '\\t', "\\" => '\\\\');
# leave out the <FF>, <BS> and <ESC>, these are too confusing in the log,
# better to just hand them over to hex quoting ( \xHH )
# ("\r" => '\\r', "\n" => '\\n', "\f" => '\\f', "\t" => '\\t',
# "\b" => '\\b', "\e" => '\\e', "\\" => '\\\\');
}
sub sanitize_str {
my($str, $keep_eol) = @_;
return '' if !defined $str;
safe_encode_utf8_inplace($str); # to octets (if not already)
# $str is now in octets, UTF8 flag is off
local($1);
if ($keep_eol) {
# controls except LF, DEL, backslash
$str =~ s/([\x00-\x09\x0B-\x1F\x7F\\])/
$quote_controls_map{$1} || sprintf('\\x%02X', ord($1))/gse;
} else {
# controls, DEL, backslash
$str =~ s/([\x00-\x1F\x7F\\])/
$quote_controls_map{$1} || sprintf('\\x%02X', ord($1))/gse;
}
$str;
}
# Set or get Amavis internal task id (also called: log id).
# This task id performs a similar function as queue-id in MTA responses.
# It may only be used in generating text part of SMTP responses,
# or in generating log entries. It is only unique within a limited timespan.
use vars qw($amavis_task_id); # internal task id
# (accessible via am_id() and later also as $msginfo->log_id)
sub am_id(;$) {
if (@_) { # set, if argument is present
$amavis_task_id = $_[0];
amavis_log_id($amavis_task_id);
$0 = c('myprogram_name') .
(!defined $amavis_task_id ? '' : " ($amavis_task_id)");
}
$amavis_task_id; # return current value
}
sub new_am_id($;$$) {
my($str, $cnt, $seq) = @_;
my $id = defined $str ? $str : sprintf('%05d', $$);
$id .= sprintf('-%02d', $cnt) if defined $cnt;
$id .= '-'.$seq if defined $seq && $seq > 1;
am_id($id);
}
use vars qw($entropy); # MD5 ctx (128 bits, 32 hex digits or 22 base64 chars)
sub add_entropy(@) { # arguments may be strings or array references
$entropy = Digest::MD5->new if !defined $entropy;
my $s = join(',', map((!defined $_ ? 'U' : ref eq 'ARRAY' ? @$_ : $_), @_));
utf8::encode($s) if utf8::is_utf8($s);
# do_log(5,'add_entropy: %s',$s);
$entropy->add($s);
}
sub fetch_entropy_bytes($) {
my $n = $_[0]; # number of bytes to collect
my $result = '';
for (; $n > 0; $n--) {
# collect as few bits per MD5 iteration as possible (RFC 4086 sect 6.2.1)
# let's settle for 8 bits for practical reasons; fewer would be better
my $digest = $entropy->digest; # 16 bytes; also destroys accumulator
$result .= substr($digest,0,1); # take 1 byte
$entropy->reset; $entropy->add($digest); # cycle it back
}
# ll(5) && do_log(5,'fetch_entropy_bytes %s',
# join(' ', map(sprintf('%02x',$_), unpack('C*',$result))));
$result;
}
# read number of bytes from a /dev/urandom device
#
sub read_random_bytes($$) {
# my($buff,$required_bytes) = @_;
$_[0] = '';
my $required_bytes = $_[1];
my $fname = '/dev/urandom'; # nonblocking device!
if ($required_bytes > 0) {
my $fh = IO::File->new;
$fh->open($fname,O_RDONLY) # does a sysopen()
or die "Can't open $fname: $!";
$fh->binmode or die "Can't set $fname to binmode: $!";
my $nbytes = $fh->sysread($_[0], $required_bytes);
defined $nbytes or die "Error reading from $fname: $!";
$nbytes >= $required_bytes or die "Less data read than requested: $!";
$fh->close or die "Error closing $fname: $!";
}
undef;
}
# stir/initialize perl's random generator and our entropy pool;
# to be called at startup of the main process and each child processes
#
sub stir_random() {
my $random_bytes;
eval {
read_random_bytes($random_bytes,16); 1;
} or do {
my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
do_log(0, 'read_random_bytes error: %s', $eval_stat);
undef $random_bytes;
};
srand(); # let perl give it a try first, then stir-in some additional bits
add_entropy($random_bytes, Time::HiRes::gettimeofday, $$, rand());
#
# must prevent all child processes working with the same inherited random
# seed, otherwise modules like File::Temp will step on each other's toes
my $r = unpack('L', fetch_entropy_bytes(4)) ^ int(rand(0xffffffff));
srand($r & 0x7fffffff);
}
# generate a reasonably unique (long-term) id based on collected entropy.
# The result is a pair of a (mostly public) mail_id, and a secret id,
# where mail_id == b64(md5(secret_bin)). The secret id could be used to
# authorize releasing quarantined mail. Both the mail_id and secret id are
# strings of characters [A-Za-z0-9-_], with an additional restriction
# for mail_id which must begin and end with an alphanumeric character.
# The number of bits in a mail_id is configurable through $mail_id_size_bits
# and defaults to 72, yielding a 12-character base64url-encoded string.
# The number of bits must be an integral multiple of 24, so that no base64
# trailing padding characters '=' are needed (RFC 4648).
# Note the difference in base64-like encodings:
# amavisd almost-base64: 62 +, 63 - (old, no longer used since 2.7.0)
# RFC 4648 base64: 62 +, 63 / (not used here)
# RFC 4648 base64url: 62 -, 63 _
# Generally, RFC 5322 controls, SP and specials must be avoided: ()<>[]:;@\,."
# With version 2.7.0 of amavisd we switched from almost-base64 to base64url
# to avoid having to quote a '+' in regular expressions and in URL.
#
sub generate_mail_id() {
my($id_b64, $secret_bin);
# 72 bits = 9 bytes = 12 b64 chars
# 96 bits = 12 bytes = 16 b64 chars
$mail_id_size_bits > 0 &&
$mail_id_size_bits == int $mail_id_size_bits &&
$mail_id_size_bits % 24 == 0
or die "\$mail_id_size_bits ($mail_id_size_bits) must be a multiple of 24";
for (my $j=0; $j<100; $j++) { # provide some sanity loop limit just in case
$secret_bin = fetch_entropy_bytes($mail_id_size_bits/8);
# mail_id is computed as md5(secret), rely on unidirectionality of md5
$id_b64 = Digest::MD5->new->add($secret_bin)->b64digest; # b64(md5(sec))
add_entropy($id_b64,$j); # fold it back into accumulator
substr($id_b64, $mail_id_size_bits/6) = ''; # b64, crop to size
# done if it starts and ends with an alfanumeric character
last if $id_b64 =~ /^[A-Za-z0-9].*[A-Za-z0-9]\z/s;
# retry on less than 7% of cases
do_log(5,'generate_mail_id retry: %s', $id_b64);
}
$id_b64 =~ tr{+/}{-_}; # base64 -> RFC 4648 base64url [A-Za-z0-9-_]
if (!wantarray) { # not interested in secret
$secret_bin = 'X' x length($secret_bin); # can't hurt to wipe out
return $id_b64;
}
my $secret_b64 = encode_base64($secret_bin,''); # $mail_id_size_bits/6 chars
$secret_bin = 'X' x length($secret_bin); # can't hurt to wipe out
$secret_b64 =~ tr{+/}{-_}; # base64 -> RFC 4648 base64url [A-Za-z0-9-_]
# do_log(5,'generate_mail_id: %s %s', $id_b64, $secret_b64);
($id_b64, $secret_b64);
}
# Returns a password that may be used for scrambling of a message being
# released from a quarantine or mangled, with intention of preventing an
# automatic or undesired implicit opening of a potentially dangerous message.
# The first argument may be: a plain string, which is simply passed on
# to the result, or: a code reference (to be evaluated in a scalar context),
# allowing for lazy evaluation of a supplied password generating code,
# or: undef, which causes a generation of a simple 4-digit PIN-like random
# password. The second argument is just passed on unchanged to the supplied
# subroutine and is expected to be a $msginfo object.
#
sub make_password($$) {
my($password,$msginfo) = @_;
if (ref $password eq 'CODE') {
eval {
$password = &$password($msginfo);
chomp $password; $password =~ s/^[ \t]+//; $password =~ s/[ \t]+\z//;
untaint_inplace($password) if $password =~ /^[A-Za-z0-9:._=+-]*\z/;
1;
} or do {
my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
do_log(-1, 'password generating subroutine failed, '.
'supplying a default: %s', $@);
$password = undef;
};
}
if (!defined $password) { # create a 4-digit random string
my $r;
do {
$r = unpack('S',fetch_entropy_bytes(2)); # 0 .. 65535
# ditch useless samples beyond 60000
} until $r < 65536 - (65536 % 10000);
$password = sprintf('%04d', $r % 10000);
$r = 0; # clear the IV field of a scalar (the undef() doesn't do so)
}
$password;
}
use vars qw(@counter_names);
# elements may be counter names (increment is 1), or pairs: [name,increment],
# or triples: [name,value,type], where type can be: C32, C64, INT, TIM or OID
sub snmp_counters_init() { @counter_names = () }
sub snmp_count(@) { push(@counter_names, @_) }
sub snmp_count64(@) { push(@counter_names, map(ref $_ ?$_ :[$_,1,'C64'], @_)) }
sub snmp_counters_get() { \@counter_names }
sub snmp_initial_oids() {
return [
['sysDescr', 'STR', $myversion], # 0..255 octets
['sysObjectID', 'OID', '1.3.6.1.4.1.15312.2'],
# iso.org.dod.internet.private.enterprise.ijs.amavis
['sysUpTime', 'INT', int(time)], # to be converted to TIM
# later it must be converted to timeticks (10ms units since start)
['sysContact', 'STR', safe_encode_utf8($snmp_contact)], # 0..255 octets
# Network Unicode format (Net-Unicode) RFC 5198, instead of NVT ASCII
['sysName', 'STR', idn_to_utf8(c('myhostname'))], # 0..255 octets
['sysLocation', 'STR', safe_encode_utf8($snmp_location)], # 0..255 octets
['sysServices', 'INT', 64], # application
];
}
use vars qw($debug_oneshot);
sub debug_oneshot(;$$) {
if (@_) {
my $new_debug_oneshot = shift;
if (($new_debug_oneshot ? 1 : 0) != ($debug_oneshot ? 1 : 0)) {
do_log(0, 'DEBUG_ONESHOT: TURNED '.($new_debug_oneshot ? 'ON' : 'OFF'));
do_log(0, shift) if @_; # caller-provided extra log entry, usually
# the one that caused debug_oneshot call
}
$debug_oneshot = $new_debug_oneshot;
}
$debug_oneshot;
}
use vars qw($dbg_log);
sub log_capture_enabled(;$) {
if (@_) {
my $new_state = $_[0];
if (!$dbg_log && $new_state) {
$dbg_log = Amavis::DbgLog->new;
} elsif ($dbg_log && !$new_state) {
undef $dbg_log; # calls its destructor
}
}
$dbg_log ? 1 : 0;
}
use vars qw($current_config_log_level
$current_config_syslog_ident
$current_config_syslog_facility);
# keeping current settings avoids the most frequent calls to c()
sub update_current_log_level() {
$current_config_log_level = c('log_level') || 0;
$current_config_syslog_ident = c('syslog_ident');
$current_config_syslog_facility = c('syslog_facility');
}
# is message log level below the current log level (i.e. eligible for logging)?
#
sub ll($) {
(($DEBUG || $debug_oneshot) && $_[0] > 0 ? 0 : $_[0])
<= $current_config_log_level
|| $dbg_log;
}
# write a log entry (optimized, called often)
#
sub do_log($$;@) {
# my($level,$errmsg,@args) = @_;
my $level = $_[0];
# if (ll($level)) { # inlined and reorderd the ll() call for speed
if ( $level <= $current_config_log_level ||
( ($DEBUG || $debug_oneshot) && $level > 0
&& 0 <= $current_config_log_level ) ||
$dbg_log ) {
my $errmsg; # the $_[1] is expected to be ASCII or UTF-8 octets (not char)
if (@_ <= 2) { # no arguments to sprintf
$errmsg = $_[1];
} elsif (@_ == 3) { # a single argument to sprintf, optimized common case
if (utf8::is_utf8($_[2])) {
my $arg1 = $_[2]; utf8::encode($arg1);
$errmsg = sprintf($_[1], $arg1);
} else {
$errmsg = sprintf($_[1], $_[2]);
}
} else {
# treat $errmsg as sprintf format string if additional args are provided;
# encode arguments individually to avoid mojibake when UTF8-flagged and
# non- UTF8-flagged strings are concatenated;
my @args = @_[2..$#_];
for (@args) { utf8::encode($_) if utf8::is_utf8($_) }
$errmsg = sprintf($_[1], @args);
}
local($1);
# protect controls, DEL, and backslash; make sure to leave UTF-8 untouched
$errmsg =~ s/([\x00-\x1F\x7F\\])/
$quote_controls_map{$1} || sprintf('\\x%02X', ord($1))/gse;
$dbg_log->write_dbg_log($level,$errmsg) if $dbg_log;
$level = 0 if ($DEBUG || $debug_oneshot) && $level > 0;
if ($level <= $current_config_log_level) {
write_log($level,$errmsg);
### $Amavis::zmq_obj->write_log($level,$errmsg) if $Amavis::zmq_obj;
}
}
1;
}
# equivalent to do_log, but protected by eval so that it can't bail out
#
sub do_log_safe($$;@) {
# ignore failures while keeping perlcritic happy
eval { do_log(shift,shift,@_) } or 1;
1;
}
sub flush_captured_log() {
$dbg_log->flush
or die "Can't flush debug log file: $!" if $dbg_log;
}
sub reposition_captured_log_to_end() {
$dbg_log->reposition_to_end
or die "Can't reposition debug log file to its end: $!" if $dbg_log;
}
sub dump_captured_log($$) {
my($dump_log_level, $enable_log_capture_dump) = @_;
$dbg_log->dump_captured_log($dump_log_level,
$enable_log_capture_dump && ll($dump_log_level)) if $dbg_log;
}
# $timestamp_of_last_reception: a Unix time stamp when an MTA client send the
# last command to us, the most important of which is the reception of a final
# dot in SMTP session, which is a time when a client started to wait for our
# response; this timestamp, along with a c('child_timeout'), make a deadline
# time for our processing
#
# $waiting_for_client: which timeout is running:
# false: processing is in our courtyard, true: waiting for a client
#
use vars qw($timestamp_of_last_reception $waiting_for_client);
sub waiting_for_client(;$) {
$waiting_for_client = $_[0] if @_;
$waiting_for_client;
}
sub get_deadline(@) {
my($which_section, $allowed_share, $reserve, $max_time) = @_;
# $allowed_share ... factor between 0 and 1 of the remaining time till a
# deadline, to be allocated to the task that follows
# $reserve ... try finishing up $reserve seconds before the deadline;
# $max_time ... upper limit in seconds for the timer interval
my($timer_interval, $timer_deadline, $time_to_deadline);
my $child_t_o = c('child_timeout');
if (!$child_t_o) {
do_log(2, 'get_deadline %s - ignored, child_timeout not set',
$which_section);
} elsif (!defined $timestamp_of_last_reception) {
do_log(2, 'get_deadline %s - ignored, master deadline not known',
$which_section);
} else {
my $now = Time::HiRes::time;
$time_to_deadline = $timestamp_of_last_reception + $child_t_o - $now;
$timer_interval = $time_to_deadline;
if (!defined $allowed_share) {
$allowed_share = 0.6;
$timer_interval *= $allowed_share;
} elsif ($allowed_share <= 0) {
$timer_interval = 0;
} elsif ($allowed_share >= 1) {
# leave it unchanged
} else {
$timer_interval *= $allowed_share;
}
$reserve = 4 if !defined $reserve;
if ($reserve > 0 && $timer_interval > $time_to_deadline - $reserve) {
$timer_interval = $time_to_deadline - $reserve;
}
if ($timer_interval < 8) { # be generous, allow at least 6 seconds
$timer_interval = max(6, min(8,$time_to_deadline));
}
my $j = int($timer_interval);
$timer_interval = $timer_interval > $j ? $j+1 : $j; # ceiling
if (defined $max_time && $max_time > 0 && $timer_interval > $max_time) {
$timer_interval = $max_time;
}
ll(5) && do_log(5,'get_deadline %s - deadline in %.1f s, set to %.3f s',
$which_section, $time_to_deadline, $timer_interval);
$timer_deadline = $now + $timer_interval;
}
!wantarray ? $timer_interval
: ($timer_interval, $timer_deadline, $time_to_deadline);
}
sub prolong_timer($;$$$) {
my($which_section, $allowed_share, $reserve, $max_time) = @_;
my($timer_interval, $timer_deadline, $time_to_deadline) = get_deadline(@_);
if (defined $timer_interval) {
my $prev_timer = alarm($timer_interval); # restart/prolong the timer
ll(5) && do_log(5,'prolong_timer %s: timer %d, was %d, deadline in %.1f s',
$which_section, $timer_interval, $prev_timer, $time_to_deadline);
}
!wantarray ? $timer_interval
: ($timer_interval, $timer_deadline, $time_to_deadline);
}
sub switch_to_my_time($) { # processing is in our courtyard
my $msg = $_[0];
$waiting_for_client = 0;
$timestamp_of_last_reception = Time::HiRes::time;
my $child_t_o = c('child_timeout');
if (!$child_t_o) {
alarm(0);
} else {
prolong_timer( 'switch_to_my_time(' . $msg . ')' );
}
}
sub switch_to_client_time($) { # processing is now in client's hands
my $msg = $_[0];
my $interval = c('smtpd_timeout');
$interval = 5 if $interval < 5;
ll(5) && do_log(5, 'switch_to_client_time %d s, %s', $interval,$msg);
undef $timestamp_of_last_reception;
alarm($interval); $waiting_for_client = 1;
}
# pretty-print a structure for logging purposes: returns a string
#
sub fmt_struct($); # prototype
sub fmt_struct($) {
my $arg = $_[0];
my $r = ref $arg;
!$r ?
(defined($arg) ? '"'.$arg.'"' : 'undef')
: $r eq 'ARRAY' ?
'[' . join(',', map(fmt_struct($_), @$arg)) . ']'
: $r eq 'HASH' ?
'{' . join(',', map($_.'=>'.fmt_struct($arg->{$_}), keys %$arg)) . '}'
: $arg;
};
# used by freeze: protect % and ~, as well as NUL and \200 for good measure
#
sub st_encode($) {
my $str = $_[0]; local($1);
{ # concession on a perl 5.20.0 bug [perl #122148] (fixed in 5.20.1)
# - just warn, do not abort
use warnings NONFATAL => qw(utf8);
$str =~ s/([%~\000\200])/sprintf('%%%02X',ord($1))/gse;
};
$str;
}
# simple Storable::freeze lookalike
#
sub freeze($); # prototype
sub freeze($) {
my $obj = $_[0]; my $ty = ref($obj);
if (!defined($obj)) { 'U' }
elsif (!$ty) { join('~', '', st_encode($obj)) } # string
elsif ($ty eq 'SCALAR') { join('~', 'S', st_encode(freeze($$obj))) }
elsif ($ty eq 'REF') { join('~', 'R', st_encode(freeze($$obj))) }
elsif ($ty eq 'ARRAY') { join('~', 'A', map(st_encode(freeze($_)),@$obj)) }
elsif ($ty eq 'HASH') {
join('~', 'H',
map {(st_encode($_),st_encode(freeze($obj->{$_})))} sort keys %$obj)
} else { die "Can't freeze object type $ty" }
}
# simple Storable::thaw lookalike
#
sub thaw($); # prototype
sub thaw($) {
my $str = $_[0];
return undef if !defined $str; # must return undef even in a list context!
my($ty,@val) = split(/~/,$str,-1);
s/%([0-9a-fA-F]{2})/pack('C',hex($1))/gse for @val;
if ($ty eq 'U') { undef }
elsif ($ty eq '') { $val[0] }
elsif ($ty eq 'S') { my $obj = thaw($val[0]); \$obj }
elsif ($ty eq 'R') { my $obj = thaw($val[0]); \$obj }
elsif ($ty eq 'A') { [map(thaw($_),@val)] }
elsif ($ty eq 'H') {
my $hr = {};
while (@val) { my $k = shift @val; $hr->{$k} = thaw(shift @val) }
$hr;
} else { die "Can't thaw object type $ty" }
}
# accepts either a single contents category (a string: "maj,min" or "maj"),
# or a list of contents categories, in which case only the first element
# is considered; returns a passed pair: (major_ccat, minor_ccat)
#
sub ccat_split($) {
my $ccat = $_[0]; my $major; my $minor;
$ccat = $ccat->[0] if ref $ccat; # pick the first element if given a list
($major,$minor) = split(/,/,$ccat,-1) if defined $ccat;
!wantarray ? $major : ($major,$minor);
}
# accepts either a single contents category (a string: "maj,min" or "maj"),
# or a list of contents categories, in which case only the first element
# is considered; returns major_ccat
#
sub ccat_maj($) {
my $ccat = $_[0]; my $major; my $minor;
$ccat = $ccat->[0] if ref $ccat; # pick the first element if given a list
($major,$minor) = split(/,/,$ccat,-1) if defined $ccat;
$major;
}
# compare numerically two strings of the form "maj,min" or just "maj", where
# maj and min are numbers, representing major and minor contents category
#
sub cmp_ccat($$) {
my($a_maj,$a_min) = split(/,/, $_[0], -1);
my($b_maj,$b_min) = split(/,/, $_[1], -1);
$a_maj == $b_maj ? $a_min <=> $b_min : $a_maj <=> $b_maj;
}
# similar to cmp_ccat, but consider only the major category of both arguments
#
sub cmp_ccat_maj($$) {
my($a_maj,$a_min) = split(/,/, $_[0], -1);
my($b_maj,$b_min) = split(/,/, $_[1], -1);
$a_maj <=> $b_maj;
}
# get a list of settings corresponding to all listed contents categories,
# ordered from the most important category to the least; @ccat is a list of
# relevant contents categories for which a query is made, it MUST already be
# sorted in descending order; this is a classical subroutine, not a method!
#
sub setting_by_given_contents_category_all($@) {
my($ccat,@settings_href_list) = @_; my(@r);
if (@settings_href_list) {
for my $e ((!defined $ccat ? () : ref $ccat ?@$ccat :$ccat), CC_CATCHALL) {
if (grep(defined($_) && exists($_->{$e}), @settings_href_list)) {
# supports lazy evaluation (a setting may be a subroutine)
my(@slist) = map { !defined($_) || !exists($_->{$e}) ? undef :
do { my $s = $_->{$e}; ref($s) eq 'CODE' ? &$s : $s}
} @settings_href_list;
push(@r, [$e,@slist]); # a tuple: [corresponding ccat, settings list]
}
}
}
@r; # a list of tuples
}
# similar to setting_by_given_contents_category_all(), but only the first
# (the most relevant) setting is returned, without a corresponding ccat
#
sub setting_by_given_contents_category($@) {
my($ccat,@settings_href_list) = @_; my(@slist);
if (@settings_href_list) {
for my $e ((!defined $ccat ? () : ref $ccat ?@$ccat :$ccat), CC_CATCHALL) {
if (grep(defined($_) && exists($_->{$e}), @settings_href_list)) {
# supports lazy evaluation (setting may be a subroutine)
@slist = map { !defined($_) || !exists($_->{$e}) ? undef :
do { my $s = $_->{$e}; ref($s) eq 'CODE' ? &$s : $s }
} @settings_href_list;
last;
}
}
}
!wantarray ? $slist[0] : @slist; # only the first entry
}
# Removes a directory, along with its contents
#
# The readdir() is entitled to fail if the directory changes underneath,
# so do the deletions by chunks: read a limited set of filenames into
# memory, close directory, delete these files, and repeat.
# The current working directory must not be within directories which are
# to be deleted, otherwise rmdir can fail with 'Invalid argument' (e.g.
# on Solaris 10).
#
sub rmdir_recursively($;$); # prototype
sub rmdir_recursively($;$) {
my($dir, $exclude_itself) = @_;
ll(4) && do_log(4, 'rmdir_recursively: %s, excl=%s', $dir,$exclude_itself);
my($f, @rmfiles, @rmdirs); my $more = 1; my $dir_chmoded = 0;
while ($more) {
local(*DIR); $more = 0;
my $errn = opendir(DIR,$dir) ? 0 : 0+$!;
if ($errn == EACCES && !$dir_chmoded) {
# relax protection on directory, then try again
do_log(3,'rmdir_recursively: enabling read access to directory %s',$dir);
chmod(0750,$dir)
or do_log(-1, "Can't change protection-1 on dir %s: %s", $dir, $!);
$dir_chmoded = 1;
$errn = opendir(DIR,$dir) ? 0 : 0+$!; # try again
}
if ($errn) { die "Can't open directory $dir: $!" }
my $cnt = 0;
# avoid slurping the whole directory contents into memory
while (defined($f = readdir(DIR))) {
next if $f eq '.' || $f eq '..';
my $fname = $dir . '/' . $f;
$errn = lstat($fname) ? 0 : 0+$!;
if ($errn == EACCES && !$dir_chmoded) {
# relax protection on the directory and retry
do_log(3,'rmdir_recursively: enabling access to files in dir %s',$dir);
chmod(0750,$dir)
or do_log(-1, "Can't change protection-2 on dir %s: %s", $dir, $!);
$dir_chmoded = 1;
$errn = lstat($fname) ? 0 : 0+$!; # try again
}
if ($errn) { do_log(-1, "Can't access file \"%s\": $!", $fname,$!) }
if (-d _) { push(@rmdirs,$f) } else { push(@rmfiles,$f) }
$cnt++;
if ($cnt >= 1000) {
do_log(3,'rmdir_recursively: doing %d files and %d dirs for now in %s',
scalar(@rmfiles), scalar(@rmdirs), $dir);
$more = 1;
last;
}
}
# fixed by perl5.20: readdir() now only sets $! on error. $! is no longer
# set to EBADF when then terminating undef is read from the directory
# unless the system call sets $!. [perl #118651]
closedir(DIR) or die "Error closing directory $dir: $!";
my $cntf = scalar(@rmfiles);
for my $f (@rmfiles) {
my $fname = $dir . '/' . untaint($f);
if (unlink($fname)) {
# ok
} elsif ($! == EACCES && !$dir_chmoded) {
# relax protection on the directory, then try again
do_log(3,'rmdir_recursively: enabling write access to dir %s',$dir);
my $what = -l _ ? 'symlink' :-d _ ? 'directory' :'non-regular file';
chmod(0750,$dir)
or do_log(-1, "Can't change protection-3 on dir %s: %s", $dir, $!);
$dir_chmoded = 1;
unlink($fname) or die "Can't remove $what $fname: $!";
}
}
undef @rmfiles;
section_time("unlink-$cntf-files") if $cntf > 0;
for my $d (@rmdirs) {
rmdir_recursively($dir . '/' . untaint($d));
}
undef @rmdirs;
}
if (!$exclude_itself) {
rmdir($dir) or die "rmdir_recursively: Can't remove directory $dir: $!";
section_time('rmdir');
}
1;
}
# efficiently read a file (in binmode) into a provided string;
# either an open file handle may be given, or a filename
#
sub read_file($$) {
my($fname,$strref) = @_;
my($fh, $file_size, $nbytes);
if (ref $fname) {
$fh = $fname; # assume a file handle was given
} else { # a filename
$fh = IO::File->new;
$fh->open($fname,O_RDONLY) # does a sysopen
or die "Can't open file $fname for reading: $!";
$fh->binmode or die "Can't set file $fname to binmode: $!";
}
my(@stat_list) = stat($fh);
@stat_list or die "Failed to access file: $!";
$file_size = -s _ if -f _;
if ($file_size) {
# preallocate exact storage size, avoids realloc/copying while growing
$$strref = ''; vec($$strref, $file_size + 32768, 8) = 0;
}
$$strref = '';
#*** handle EINTR
while ( $nbytes=sysread($fh, $$strref, 32768, length $$strref) ) { }
defined $nbytes or die "Error reading from $fname: $!";
if (!ref $fname) { $fh->close or die "Error closing $fname: $!" }
$strref;
}
# read a text file, returning its contents as a string - suitable for
# calling from amavisd.conf
#
sub read_text($;$) {
my($fname, $encoding) = @_;
my $fh = IO::File->new;
$fh->open($fname,'<') or die "Can't open file $fname for reading: $!";
if (defined($encoding) && $encoding ne '') {
binmode($fh, ":encoding($encoding)")
or die "Can't set :encoding($encoding) on file $fname: $!";
}
my $nbytes; my $str = '';
while (($nbytes = $fh->read($str, 16384, length($str))) > 0) { }
defined $nbytes or die "Error reading from $fname: $!";
$fh->close or die "Error closing $fname: $!";
my $result = $str; undef $str; # shrink allocated storage to actual size
$result;
}
# attempt to read all user-visible replies from a l10n dir
# This function auto-fills $notify_sender_templ, $notify_virus_sender_templ,
# $notify_virus_admin_templ, $notify_virus_recips_templ,
# $notify_spam_sender_templ and $notify_spam_admin_templ from files named
# template-dsn.txt, template-virus-sender.txt, template-virus-admin.txt,
# template-virus-recipient.txt, template-spam-sender.txt,
# template-spam-admin.txt. If this is available, it uses the charset
# file to do automatic charset conversion. Used by the Debian distribution.
#
sub read_l10n_templates($;$) {
my $dir = $_[0];
if (@_ > 1) # compatibility with Debian
{ my($l10nlang, $l10nbase) = @_; $dir = "$l10nbase/$l10nlang" }
my $file_chset = Amavis::Util::read_text("$dir/charset");
local($1,$2);
if ($file_chset =~ m{^(?:\#[^\n]*\n)*([^./\n\s]+)(\s*[\#\n].*)?$}s) {
$file_chset = untaint("$1");
} else {
die "Invalid charset $file_chset\n";
}
$Amavis::Conf::notify_sender_templ =
Amavis::Util::read_text("$dir/template-dsn.txt", $file_chset);
$Amavis::Conf::notify_virus_sender_templ =
Amavis::Util::read_text("$dir/template-virus-sender.txt", $file_chset);
$Amavis::Conf::notify_virus_admin_templ =
Amavis::Util::read_text("$dir/template-virus-admin.txt", $file_chset);
$Amavis::Conf::notify_virus_recips_templ =
Amavis::Util::read_text("$dir/template-virus-recipient.txt", $file_chset);
$Amavis::Conf::notify_spam_sender_templ =
Amavis::Util::read_text("$dir/template-spam-sender.txt", $file_chset);
$Amavis::Conf::notify_spam_admin_templ =
Amavis::Util::read_text("$dir/template-spam-admin.txt", $file_chset);
}
# # attempt to read a list of config files to use instead of the default one,
# # using an external helper script. Used by the Debian/Ubuntu distribution.
# sub find_config_files(@) {
# my(@dirs) = @_;
# local $ENV{PATH} = '/bin:/usr/bin';
# my(@config_files) = map { `run-parts --list "$_"` } @dirs;
# chomp(@config_files);
# # untaint - this data is secure as we check the files themselves later
# map { untaint($_) } @config_files;
# }
#use CDB_File;
#sub tie_hash($$) {
# my($hashref, $filename) = @_;
# CDB_File::create(%$hashref, $filename, "$filename.tmp$$")
# or die "Can't create cdb $filename: $!";
# my $cdb = tie(%$hashref,'CDB_File',$filename)
# or die "Tie to $filename failed: $!";
# $hashref;
#}
# read an associative array (=Perl hash) (as used in lookups) from a file;
# may be called from amavisd.conf
#
# Format: one key per line, anything from '#' to the end of line
# is considered a comment, but '#' within correctly quoted RFC 5321
# addresses is not treated as a comment introducer (e.g. a hash sign
# within "strange # \"foo\" address"@example.com is part of the string).
# Lines may contain a pair: key value, separated by whitespace,
# or key only, in which case a value 1 is implied. Trailing whitespace
# is discarded (iff $trim_trailing_space_in_lookup_result_fields),
# empty lines (containing only whitespace or comment) are ignored.
# Addresses (lefthand-side) are converted from RFC 5321 -quoted form
# into internal (raw) form and inserted as keys into a given hash.
# International domain names (IDN) in UTF-8 are encoded to ASCII.
# NOTE: the format is partly compatible with Postfix maps (not aliases):
# no continuation lines are honoured, Postfix maps do not allow
# RFC 5321 -quoted addresses containing whitespace, Postfix only allows
# comments starting at the beginning of a line.
#
# The $hashref argument is returned for convenience, so that one can do
# for example:
# $per_recip_whitelist_sender_lookup_tables = {
# '.my1.example.com' => read_hash({},'/var/amavis/my1-example-com.wl'),
# '.my2.example.com' => read_hash({},'/var/amavis/my2-example-com.wl') }
# or even simpler:
# $per_recip_whitelist_sender_lookup_tables = {
# '.my1.example.com' => read_hash('/var/amavis/my1-example-com.wl'),
# '.my2.example.com' => read_hash('/var/amavis/my2-example-com.wl') }
#
sub read_hash(@) {
unshift(@_,{}) if !ref $_[0]; # first argument is optional, defaults to {}
my($hashref, $filename, $keep_case) = @_;
my $lpcs = c('localpart_is_case_sensitive');
my $inp = IO::File->new;
$inp->open($filename,'<') or die "Can't open file $filename for reading: $!";
my $ln;
for ($! = 0; defined($ln=$inp->getline); $! = 0) {
chomp($ln);
# carefully handle comments, '#' within "" does not count as a comment
my $lhs = ''; my $rhs = ''; my $at_rhs = 0; my $trailing_comment = 0;
for my $t ( $ln =~ /\G ( " (?: \\. | [^"\\] )* " |
[^#" \t]+ | [ \t]+ | . )/xgs) {
if ($t eq '#') { $trailing_comment = 1; last }
if (!$at_rhs && $t =~ /^[ \t]+\z/) { $at_rhs = 1 }
else { ($at_rhs ? $rhs : $lhs) .= $t }
}
$rhs =~ s/[ \t]+\z// if $trailing_comment ||
$trim_trailing_space_in_lookup_result_fields;
next if $lhs eq '' && $rhs eq '';
my($source_route, $localpart, $domain) =
Amavis::rfc2821_2822_Tools::parse_quoted_rfc2821($lhs,1);
$localpart = lc($localpart) if !$lpcs;
my $addr = $localpart . idn_to_ascii($domain);
$hashref->{$addr} = $rhs eq '' ? 1 : $rhs;
# do_log(5, 'read_hash: address: <%s>: %s', $addr, $hashref->{$addr});
}
defined $ln || $! == 0 or # returning EBADF at EOF is a perl bug
$! == EBADF ? do_log(0,'Error reading from %s: %s', $filename,$!)
: die "Error reading from $filename: $!";
$inp->close or die "Error closing $filename: $!";
$hashref;
}
sub read_array(@) {
unshift(@_,[]) if !ref $_[0]; # first argument is optional, defaults to []
my($arrref, $filename, $keep_case) = @_;
my $inp = IO::File->new;
$inp->open($filename,'<') or die "Can't open file $filename for reading: $!";
my $ln;
for ($! = 0; defined($ln=$inp->getline); $! = 0) {
chomp($ln); my $lhs = '';
# carefully handle comments, '#' within "" does not count as a comment
for my $t ( $ln =~ /\G ( " (?: \\. | [^"\\] )* " |
[^#" \t]+ | [ \t]+ | . )/xgs) {
last if $t eq '#';
$lhs .= $t;
}
$lhs =~ s/[ \t]+\z//; # trim trailing whitespace
push(@$arrref, Amavis::rfc2821_2822_Tools::unquote_rfc2821_local($lhs))
if $lhs ne '';
}
defined $ln || $! == 0 or # returning EBADF at EOF is a perl bug
$! == EBADF ? do_log(0,'Error reading from %s: %s', $filename,$!)
: die "Error reading from $filename: $!";
$inp->close or die "Error closing $filename: $!";
$arrref;
}
# The read_cidr() reads a Postfix style CIDR file, (see cidr_table(5) man
# page), with postfix-style interpretation of comments and line continuations,
# returning a ref to an array or a ref to a hash (associative array ref).
#
# Empty or whitespace-only lines are ignored, as are lines whose first
# non-whitespace character is a '#'. A logical line starts with non-whitespace
# text. A line that starts with whitespace continues a logical line.
# The general form is: network_address/network_mask result
# where 'network_address' is an IPv4 address in a dot-quad form, or an IPv6
# address optionally enclosed in square brackets. The 'network_mask' along
# with a preceding slash is optional, as is the 'result' argument.
#
# If a network mask is omitted, a host address (not a network address)
# is assumed (i.e. a mask defaults to /32 for an IPv4 address, and
# to /128 for an IPv6 address).
#
# The read_cidr() returns a ref to an array or a ref to an hash (associative
# array) of network specifications, directly suitable for use as a lookup
# table in @client_ipaddr_policy and @mynetworks_maps, or for copying the
# array into @inet_acl or @mynetworks.
#
# When returned as an array the 'result' arguments are ignored, just the
# presence of a network specification matters. A '!' may precede the network
# specification, which will be interpreted as by lookup_ip_acl() as a negation,
# i.e. a match on such entry will return a false.
#
# When returned as a hash, the network specification is lowercased and used
# as a key, and the 'result' is stored as a value of a hash entry. A missing
# 'result' is replaced by 1.
#
# See also the lookup_ip_acl() for details on allowed IP address syntax
# and on the interpretation of array and hash type IP lookup tables.
#
sub read_cidr($;$) {
my($filename, $result) = @_;
# the $result arg may be a ref to an existing array or hash, in which case
# data will be added there - either as key/value pairs, or as array elements;
$result = [] if !defined $result; # missing $results arg yields an array
my $have_arry = ref $result eq 'ARRAY';
my $inp = IO::File->new;
$inp->open($filename,'<') or die "Can't open file $filename for reading: $!";
my($ln, $curr_line);
for ($! = 0; defined($ln=$inp->getline); $! = 0) {
next if $ln =~ /^ [ \t]* (?: \# | $ )/xs;
chomp($ln);
if ($ln =~ /^[ \t]/) { # a continuation line
$curr_line = '' if !defined $curr_line; # first line a continuation??
$curr_line .= $ln;
} else { # a new logical line starts
if (defined $curr_line) { # deal with the previous logical line
my($key,$val) = split(' ',$curr_line,2);
# $val is always defined, it is an empty string if missing
if ($have_arry) { push(@$result,$key) }
else { $result->{lc $key} = $val eq '' ? 1 : $val }
}
$curr_line = $ln;
}
}
if (defined $curr_line) { # deal with the last logical line
my($key,$val) = split(' ',$curr_line,2);
if ($have_arry) { push(@$result,$key) }
else { $result->{lc $key} = $val eq '' ? 1 : $val }
}
defined $ln || $! == 0 or # returning EBADF at EOF is a perl bug
$! == EBADF ? do_log(0,'Error reading from %s: %s', $filename,$!)
: die "Error reading from $filename: $!";
$inp->close or die "Error closing $filename: $!";
$result;
}
sub dump_hash($) {
my $hr = $_[0];
do_log(0, 'dump_hash: %s => %s', $_, $hr->{$_}) for (sort keys %$hr);
}
sub dump_array($) {
my $ar = $_[0];
do_log(0, 'dump_array: %s', $_) for @$ar;
}
# use Devel::Symdump;
# sub dump_subs() {
# my $obj = Devel::Symdump->rnew;
# # list of all subroutine names and their memory addresses
# my @a = map([$_, \&$_], $obj->functions, $obj->scalars,
# $obj->arrays, $obj->hashes);
# open(SUBLIST, ">/tmp/1.log") or die "Can't create a file: $!";
# for my $s (sort { $a->[1] <=> $b->[1] } @a) { # sorted by memory address
# printf SUBLIST ("%s %s\n", $s->[1], $s->[0]);
# }
# close(SUBLIST) or die "Can't close a file: $!";
# }
# (deprecated, only still used with Amavis::OS_Fingerprint)
sub dynamic_destination($$) {
my($method,$conn) = @_;
if ($method =~ /^(?:[a-z][a-z0-9.+-]*)?:/si) {
my(@list); $list[0] = ''; my $j = 0;
for ($method =~ /\G \[ (?: \\. | [^\]\\] )* \] | " (?: \\. | [^"\\] )* "
| : | [ \t]+ | [^:"\[ \t]+ | . /xgs) { # real parsing
if ($_ eq ':') { $list[++$j] = '' } else { $list[$j] .= $_ }
};
if ($list[1] =~ m{^/}) {
# presumably the second field is a Unix socket name, keep unchanged
} else {
my $new_method; my($proto,$relayhost,$relayport) = @list;
if ($relayhost eq '*') {
my $client_ip; $client_ip = $conn->client_ip if $conn;
$relayhost = "[$client_ip]" if defined $client_ip && $client_ip ne '';
}
if ($relayport eq '*') {
my $socket_port; $socket_port = $conn->socket_port if $conn;
$relayport = $socket_port + 1
if defined $socket_port && $socket_port ne '';
}
if ($relayhost eq '*' || $relayport eq '*') {
do_log(0,'dynamic destination expected, no client addr/port info: %s',
$method);
}
$list[1] = $relayhost; $list[2] = $relayport;
$new_method = join(':',@list);
if ($new_method ne $method) {
do_log(3, 'dynamic destination: %s -> %s', $method,$new_method);
$method = $new_method;
}
}
}
$method;
}
# collect unfinished recipients matching a $filter sub and a delivery
# method regexp; assumes all list elements of a delivery_method list
# use the same protocol name, hence only the first one is inspected
#
sub collect_equal_delivery_recips($$$) {
my($msginfo, $filter, $deliv_meth_regexp) = @_;
my(@per_recip_data_subset, $proto_sockname);
my(@per_recip_data) =
grep(!$_->recip_done && (!$filter || &$filter($_)) &&
grep(/$deliv_meth_regexp/,
(ref $_->delivery_method ? $_->delivery_method->[0]
: $_->delivery_method)),
@{$msginfo->per_recip_data});
if (@per_recip_data) {
# take the first remaining recipient as a model
$proto_sockname = $per_recip_data[0]->delivery_method;
defined $proto_sockname or die "undefined recipient's delivery_method";
my $proto_sockname_key = !ref $proto_sockname ? $proto_sockname
: join("\n", @$proto_sockname);
# collect recipients with the same delivery method as the first one
$per_recip_data_subset[0] = shift(@per_recip_data); # always equals self
push(@per_recip_data_subset,
grep((ref $_->delivery_method ? join("\n", @{$_->delivery_method})
: $_->delivery_method)
eq $proto_sockname_key, @per_recip_data) );
}
# return a ref to a filtered list of still-to-be-delivered recipient objects
# and a single string or a ref to a list of delivery methods common to
# these recipients
(\@per_recip_data_subset, $proto_sockname);
}
# get system supplementary groups by username.
# Borrowed from SpamAssassin.
sub get_user_groups {
my $user = shift;
return if not defined($user);
my $gid = (getpwnam($user))[3];
return if not defined($gid);
my @gids = $gid;
while (my($name,$gid,$members) = (getgrent())[0,2,3]) {
if (grep { $_ eq $user } split(/ /, $members)) {
push @gids, $gid;
}
}
endgrent;
return @gids;
}
1;