HEX

File: //usr/share/perl5/vendor_perl/Amavis/Util.pm
package Amavis::Util;
use strict;
use re 'taint';

BEGIN {
  require Exporter;
  use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
  $VERSION = '2.412';
  @ISA = qw(Exporter);
  @EXPORT_OK = qw(&untaint &untaint_inplace &min &max &minmax
                  &unique_list &unique_ref &format_time_interval
                  &is_valid_utf_8 &truncate_utf_8
                  &safe_encode &safe_encode_utf8 &safe_encode_utf8_inplace
                  &safe_decode &safe_decode_utf8 &safe_decode_latin1
                  &safe_decode_mime &q_encode &orcpt_encode &orcpt_decode
                  &xtext_encode &xtext_decode &proto_encode &proto_decode
                  &idn_to_ascii &idn_to_utf8 &clear_idn_cache
                  &mail_addr_decode &mail_addr_idn_to_ascii
                  &ll &do_log &do_log_safe &snmp_count &snmp_count64
                  &snmp_counters_init &snmp_counters_get &snmp_initial_oids
                  &debug_oneshot &update_current_log_level
                  &flush_captured_log &reposition_captured_log_to_end
                  &dump_captured_log &log_capture_enabled
                  &am_id &new_am_id &stir_random
                  &add_entropy &fetch_entropy_bytes
                  &generate_mail_id &make_password
                  &crunching_start_time &prolong_timer &get_deadline
                  &waiting_for_client &switch_to_my_time &switch_to_client_time
                  &sanitize_str &fmt_struct &freeze &thaw
                  &ccat_split &ccat_maj &cmp_ccat &cmp_ccat_maj
                  &setting_by_given_contents_category_all
                  &setting_by_given_contents_category &rmdir_recursively
                  &read_file &read_text &read_l10n_templates
                  &read_hash &read_array &dump_hash &dump_array
                  &dynamic_destination &collect_equal_delivery_recips);

}
use subs @EXPORT_OK;

use Errno qw(ENOENT EACCES EAGAIN ESRCH EBADF);
use IO::File qw(O_RDONLY O_WRONLY O_RDWR O_APPEND O_CREAT O_EXCL);
use Digest::MD5;  # 2.22 provides 'clone' method, no longer needed since 2.7.0
use MIME::Base64;
use Encode ();  # Perl 5.8  UTF-8 support
use Scalar::Util qw(tainted);

BEGIN {
    if (eval { require Net::LibIDN2 }) {
        *libidn_to_ascii = \&Net::LibIDN2::idn2_lookup_u8;
        *libidn_to_unicode = \&Net::LibIDN2::idn2_to_unicode_88;
    } elsif (eval { require Net::LibIDN }) {
        *libidn_to_ascii = sub { Net::LibIDN::idn_to_ascii($_[0], 'UTF-8') };
        *libidn_to_unicode = sub { Net::LibIDN::idn_to_unicode($_[0], 'UTF-8') };
    } else {
        die 'Neither Net::LibIDN2 nor Net::LibIDN module found';
    }
}

use Amavis::Conf qw(:platform $DEBUG c cr ca $mail_id_size_bits
                    $myversion $snmp_contact $snmp_location
                    $trim_trailing_space_in_lookup_result_fields);
use Amavis::DbgLog;
use Amavis::Log qw(amavis_log_id write_log);
use Amavis::rfc2821_2822_Tools;
use Amavis::Timing qw(section_time);

use vars qw($enc_ascii $enc_utf8 $enc_latin1 $enc_w1252 $enc_tainted
            $enc_taintsafe $enc_is_utf8_buggy);
BEGIN {
  $enc_ascii  = Encode::find_encoding('ascii');
  $enc_utf8   = Encode::find_encoding('UTF-8');  # same as utf-8-strict
  $enc_latin1 = Encode::find_encoding('ISO-8859-1');
  $enc_w1252  = Encode::find_encoding('Windows-1252');
  $enc_ascii  or die  "Amavis::Util: unknown encoding 'ascii'";
  $enc_utf8   or die  "Amavis::Util: unknown encoding 'UTF-8'";
  $enc_latin1 or die  "Amavis::Util: unknown encoding 'ISO-8859-1'";
  $enc_w1252  or warn "Amavis::Util: unknown encoding 'Windows-1252'";
  $enc_tainted = substr($ENV{PATH}.$ENV{HOME}.$ENV{AMAVIS_TEST_CONFIG}, 0,0);  # tainted empty string
  $enc_taintsafe = 1;  # guessing
  if (!tainted($enc_tainted)) {
    warn "Amavis::Util: can't obtain a tainted string";
  } else {
    # NOTE: [rt.cpan.org #85489] - Encode::encode turns on the UTF8 flag
    # on a passed argument. Give it a copy to avoid turning $enc_tainted
    # or $enc_ps into a UTF-8 string!

    # Encode::is_utf8 is always false on tainted in Perl 5.8, Perl bug #32687
    my $enc_ps = "\x{2029}";  # Paragraph Separator, utf8 flag on
    if (!Encode::is_utf8("$enc_ps $enc_tainted")) {
      $enc_is_utf8_buggy = 1;
      warn "Amavis::Util, Encode::is_utf8() fails to detect utf8 on tainted";
    }
    # test for Encode taint laundering bug [rt.cpan.org #84879], fixed in 2.50
    if (!tainted($enc_ascii->encode("$enc_ps $enc_tainted"))) {
      $enc_taintsafe = 0;
      warn "Amavis::Util, Encode::encode() taint laundering bug, ".
           "fixed in Encode 2.50";
    } elsif (!tainted($enc_ascii->decode("xx $enc_tainted"))) {
      $enc_taintsafe = 0;
      warn "Amavis::Util, Encode::decode() taint laundering bug, ".
           "fixed in Encode 2.50";
    }
    utf8::is_utf8("$enc_ps $enc_tainted")
      or die "Amavis::Util, utf8::is_utf8() fails to detect utf8 on tainted";
    !utf8::is_utf8("\xA0   $enc_tainted")
      or die "Amavis::Util, utf8::is_utf8() claims utf8 on tainted";
    my $t = "$enc_ps $enc_tainted";
    utf8::encode($t);
    tainted($t)
      or die "Amavis::Util, utf8::encode() taint laundering bug";
    !utf8::is_utf8($t)
      or die "Amavis::Util, utf8::encode() failed to clear utf8 flag";
  }
  1;
}

# Return untainted copy of a string (argument can be a string or a string ref)
#
sub untaint($) {
  return undef  if !defined $_[0];  # must return undef even in a list context!
  no re 'taint';
  local $1;  # avoids Perl taint bug: tainted global $1 propagates taintedness
  (ref($_[0]) ? ${$_[0]} : $_[0]) =~ /^(.*)\z/s;
  $1;
}

sub untaint_inplace($) {
  return undef  if !defined $_[0];  # must return undef even in a list context!
  no re 'taint';
  local $1;  # avoid Perl taint bug: tainted global $1 propagates taintedness
  $_[0] =~ /^(.*)\z/s;
  $_[0] = $1;
}

# Returns the smallest defined number from the list, or undef
#
sub min(@) {
  my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_;  # accept list, or a list ref
  my $m;  defined $_ && (!defined $m || $_ < $m) && ($m = $_)  for @$r;
  $m;
}

# Returns the largest defined number from the list, or undef
#
sub max(@) {
  my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_;  # accept list, or a list ref
  my $m;  defined $_ && (!defined $m || $_ > $m) && ($m = $_)  for @$r;
  $m;
}

# Returns a pair of the smallest and the largest defined number from the list
#
sub minmax(@) {
  my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_;  # accept list, or a list ref
  my $min; my $max;
  for (@$r) {
    if (defined $_) {
      $min = $_  if !defined $min || $_ < $min;
      $max = $_  if !defined $max || $_ > $max;
    }
  }
  ($min,$max);
}

# Returns a sublist of the supplied list of elements in an unchanged order,
# where only the first occurrence of each defined element is retained
# and duplicates removed
#
sub unique_list(@) {
  my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_;  # accepts list, or a list ref
  my %seen;  my(@result) = grep(defined($_) && !$seen{$_}++, @$r);
  @result;
}

# same as unique, except that it returns a ref to the resulting list
#
sub unique_ref(@) {
  my $r = @_ == 1 && ref($_[0]) ? $_[0] : \@_;  # accepts list, or a list ref
  my %seen;  my(@result) = grep(defined($_) && !$seen{$_}++, @$r);
  \@result;
}

sub format_time_interval($) {
  my $t = $_[0];
  return 'undefined'  if !defined $t;
  my $sign = '';  if ($t < 0) { $sign = '-'; $t = - $t };
  my $dd = int($t / (24*3600));  $t = $t - $dd*(24*3600);
  my $hh = int($t / 3600);       $t = $t - $hh*3600;
  my $mm = int($t / 60);         $t = $t - $mm*60;
  sprintf("%s%d %d:%02d:%02d", $sign, $dd, $hh, $mm, int($t+0.5));
}

# returns true if the provided string of octets represents a syntactically
# valid UTF-8 string, otherwise a false is returned
#
sub is_valid_utf_8($) {
# my $octets = $_[0];
  return undef if !defined $_[0];
  #
  # RFC 6532: UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4
  # RFC 3629 section 4: Syntax of UTF-8 Byte Sequences
  #   UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
  #   UTF8-1      = %x00-7F
  #   UTF8-2      = %xC2-DF UTF8-tail
  #   UTF8-3      = %xE0 %xA0-BF UTF8-tail /
  #                 %xE1-EC 2( UTF8-tail ) /
  #                 %xED %x80-9F UTF8-tail /
  #                   # U+D800..U+DFFF are utf16 surrogates, not legal utf8
  #                 %xEE-EF 2( UTF8-tail )
  #   UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) /
  #                 %xF1-F3 3( UTF8-tail ) /
  #                 %xF4 %x80-8F 2( UTF8-tail )
  #   UTF8-tail   = %x80-BF
  #
  # loose variant:
  #   [\x00-\x7F] |
  #   [\xC0-\xDF][\x80-\xBF] |
  #   [\xE0-\xEF][\x80-\xBF]{2} |
  #   [\xF0-\xF4][\x80-\xBF]{3}
  #
  $_[0] =~ /^ (?: [\x00-\x7F] |
                  [\xC2-\xDF] [\x80-\xBF] |
                  \xE0 [\xA0-\xBF] [\x80-\xBF] |
                  [\xE1-\xEC] [\x80-\xBF]{2} |
                  \xED [\x80-\x9F] [\x80-\xBF] |
                  [\xEE-\xEF] [\x80-\xBF]{2} |
                  \xF0 [\x90-\xBF] [\x80-\xBF]{2} |
                  [\xF1-\xF3] [\x80-\xBF]{3} |
                  \xF4 [\x80-\x8F] [\x80-\xBF]{2} )* \z/xs ? 1 : 0;
}

# cleanly chop a UTF-8 byte sequence to $max_len or less, RFC 3629;
# if $max_len is undefined just chop off any partial last character
#
sub truncate_utf_8($;$) {
  my($octets, $max_len) = @_;
  return $octets if !defined $octets;
  return ''      if defined $max_len && $max_len <= 0;
  substr($octets,$max_len) = ''
                 if defined $max_len && length($octets) > $max_len;
  # missing one or more UTF8-tail octets? chop the entire last partial char
  if ($octets =~ tr/\x00-\x7F//c) {  # triage - is non-ASCII
    $octets =~      s/[\xC0-\xDF]\z//s
      or $octets =~ s/[\xE0-\xEF][\x80-\xBF]{0,1}\z//s
      or $octets =~ s/[\xF0-\xF7][\x80-\xBF]{0,2}\z//s
      or $octets =~ s/[\xF8-\xFB][\x80-\xBF]{0,3}\z//s   # not strictly valid
      or $octets =~ s/[\xFC-\xFD][\x80-\xBF]{0,4}\z//s   # not strictly valid
      or $octets =~ s/ \xFE      [\x80-\xBF]{0,5}\z//sx; # not strictly valid
  }
  $octets;
}

# A wrapper for Encode::encode, avoiding a bug in Perl 5.8.0 which causes
# Encode::encode to loop and fill memory when given a tainted string.
# Also works around a CPAN bug #64642 in module Encode:
#   Tainted values have the taint flag cleared when encoded or decoded.
#   https://rt.cpan.org/Public/Bug/Display.html?id=64642
# Fixed in Encode 2.50 [rt.cpan.org #84879].
#
sub safe_encode($$;$) {
# my($encoding,$str,$check) = @_;
  my $encoding = shift;
  return undef  if !defined $_[0];  # must return undef even in a list context!
  my $enc = Encode::find_encoding($encoding);
  $enc  or die "safe_encode: unknown encoding '$encoding'";
  # the resulting UTF8 flag is always off
  return $enc->encode(@_)  if $enc_taintsafe || !tainted($_[0]);
  # Work around a taint laundering bug in Encode [rt.cpan.org #84879].
  # Propagate taintedness across taint-related bugs in module Encode
  # ( Encode::encode in Perl 5.8.0 fills up all available memory
  #   when given a tainted string with a non-encodeable character. )
  $enc_tainted . $enc->encode(untaint($_[0]), $_[1]);
}

# Encodes logical characters to UTF-8 octets, or returns a string of octets
# (with utf8 flag off) unchanged. Ensures the result is always a string of
# octets (utf8 flag off). Unlike safe_encode(), a non-ASCII string with
# utf8 flag off will be returned unchanged, so the result may not be a
# valid UTF-8 string!
#
sub safe_encode_utf8($) {
  my $str = $_[0];
  return undef  if !defined $str;  # must return undef even in a list context!
  utf8::encode($str)  if utf8::is_utf8($str);
  $str;
}

sub safe_encode_utf8_inplace($) {
  return undef  if !defined $_[0];  # must return undef even in a list context!
  utf8::encode($_[0])  if utf8::is_utf8($_[0]);
}

sub safe_decode_latin1($) {
  my $str = $_[0];
  return undef  if !defined $str;  # must return undef even in a list context!
  #
  # ->  http://en.wikipedia.org/wiki/Windows-1252
  # Windows-1252 character encoding is a superset of ISO 8859-1, but differs
  # from the IANA's ISO-8859-1 by using displayable characters rather than
  # control characters in the 80 to 9F (hex) range. [...]
  # It is very common to mislabel Windows-1252 text with the charset label
  # ISO-8859-1. A common result was that all the quotes and apostrophes
  # (produced by "smart quotes" in word-processing software) were replaced
  # with question marks or boxes on non-Windows operating systems, making
  # text difficult to read. Most modern web browsers and e-mail clients
  # treat the MIME charset ISO-8859-1 as Windows-1252 to accommodate
  # such mislabeling. This is now standard behavior in the draft HTML 5
  # specification, which requires that documents advertised as ISO-8859-1
  # actually be parsed with the Windows-1252 encoding.
  #
  if ($enc_taintsafe || !tainted($str)) {
    return ($enc_w1252||$enc_latin1)->decode($str);
  } else {  # work around bugs in Encode
    untaint_inplace($str);
    return $enc_tainted . ($enc_w1252||$enc_latin1)->decode($str);
  }
}

sub safe_decode_utf8($;$) {
  my($str,$check) = @_;
  return undef  if !defined $str;  # must return undef even in a list context!
  if ($enc_taintsafe || !tainted($str)) {
    return utf8::is_utf8($str) ? $str : $enc_utf8->decode($str, $check||0);
  } else {
    # Work around a taint laundering bug in Encode [rt.cpan.org #84879].
    # Propagate taintedness across taint-related bugs in module Encode.
    untaint_inplace($str);
    return $enc_tainted .
           (utf8::is_utf8($str) ? $str : $enc_utf8->decode($str, $check||0));
  }
}

sub safe_decode($$;$) {
  my($encoding,$str,$check) = @_;
  return undef  if !defined $str;  # must return undef even in a list context!
  my $enc = Encode::find_encoding($encoding);
  return $str  if !$enc;
  # if the $check argument in a call to Encode::decode() is present it must be
  # defined to avoid warning "Use of uninitialized value in subroutine entry" 
  return $enc->decode($str, $check||0)  if $enc_taintsafe || !tainted($str);
  # Work around a taint laundering bug in Encode [rt.cpan.org #84879].
  # Propagate taintedness across taint-related bugs in module Encode.
  untaint_inplace($str);
  $enc_tainted . $enc->decode($str, $check||0);
}

# Handle Internationalized Domain Names according to IDNA: RFC 5890, RFC 5891.
# Similar to ToASCII (RFC 3490), but does not fail on garbage.
# Takes a domain name (possibly with utf8 flag on) consisting of U-labels
# or A-labels or NR-LDH labels, converting each label to A-label, lowercased.
# Non- IDNA-valid strings are only encoded to UTF-8 octets but are otherwise
# unchanged. Result is in octets regardless of input, taintedness of the
# argument is propagated to the result.
#
my %idn_encode_cache;
sub clear_idn_cache() { %idn_encode_cache = () }
sub idn_to_ascii($) {
  # propagate taintedness of the argument, but not its utf8 flag
  return tainted($_[0]) ? $idn_encode_cache{$_[0]} . $enc_tainted
                        : $idn_encode_cache{$_[0]}
    if exists $idn_encode_cache{$_[0]};
  my $s = $_[0];
  my $t = tainted($s);  # taintedness of the argument
  return undef  if !defined $s;
  untaint_inplace($s)  if $t;
  # to octets if needed, not necessarily valid UTF-8
  utf8::encode($s)  if utf8::is_utf8($s);
  if ($s !~ tr/\x00-\x7F//c) {  # is all-ASCII (including IP address literal)
    $s = lc $s;
  } else {
    # Net::LibIDN(2) does not like a leading dot (or '@') in a valid domain name,
    # but we need it (e.g. in lookups, meaning subdomains are included), so
    # we have to carry a prefix across the call to Net::LibIDN::idn_to_ascii() or
    # Net::LibIDN2::idn2_lookup_u8() (wrapped in libidn_to_ascii() here).
    my $prefix; local($1);
    $prefix = $1  if $s =~ s/^([.\@])//s;  # strip a leading dot or '@'
    # to ASCII-compatible encoding (ACE)
    my $sa = libidn_to_ascii($s);
    $s = lc $sa  if defined $sa;
    $s = $prefix.$s  if $prefix;
  }
  $idn_encode_cache{$_[0]} = $s;
  $t ? $s.$enc_tainted : $s;  # propagate taintedness of the argument
}

# Handle Internationalized Domain Names according to IDNA: RFC 5890, RFC 5891.
# Implements ToUnicode (RFC 3490). ToUnicode always succeeds, because it just
# returns the original string if decoding fails. In particular, this means that
# ToUnicode has no effect on a label that does not begin with the ACE prefix.
# Takes a domain name (as a string of octets or logical characters)
# of "Internationalized labels" (A-labels, U-labels, or NR-LDH labels),
# converting each label to U-label. Result is a string of octets encoded
# as UTF-8 if input was valid.
#
sub idn_to_utf8($) {
  my $s = $_[0];
  return undef  if !defined $s;
  safe_encode_utf8_inplace($s);  # to octets (if not already)
  if ($s =~ /(?: ^  | \. ) xn-- [\x00-\x2D\x2F-\xFF]{0,58} [\x00-\x2C\x2F-\xFF]
             (?: \z | \. )/xsi) {  # contains XN-label
    my $su = libidn_to_unicode(lc $s);
    return $su  if defined $su;
  }
  $s;
}

# decode octets found in a mail header field body to a logical chars string
#
sub safe_decode_mime($) {
  my $str = $_[0];  # octets
  return undef  if !defined $str;
  my $chars;  # logical characters

  if ($str !~ tr/\x00-\x7F//c) {  # is all-ASCII
    # test for any RFC 2047 encoded-words
    # encoded-text: Any printable ASCII character other than "?" or SPACE
    # permissive: SPACE and other characters can be observed in Q encoded-word
    if ($str !~ m{ =\? [^?]* \? (?: [Bb] \? [A-Za-z0-9+/=]*? |
                                    [Qq] \? .*? ) \?= }xs) {
      return $str;  # good, keep as-is, all-ASCII with no encoded-words
    }
    # normal, all-ASCII with some encoded-words, try to decode encoded-words
    # using Encode::MIME::Header
    eval { $chars = safe_decode('MIME-Header',$str); 1 }  # RFC 2047
      and return $chars;
    # give up, is all-ASCII but not MIME, just return as-is
    return $str;
  }

  # contains at least some non-ASCII

  if ($str =~ m{ =\? [^?]* \? (?: [Bb] \? [A-Za-z0-9+/=]* |
                                  [Qq] \? [\x20-\x3E\x40-\x7F]* ) \?= }xs) {
    # strange/rare, non-ASCII, but also contains RFC 2047 encoded-words !?
    # decode any RFC 2047 encoded-words, attempt to decode the rest
    # as UTF-8 if valid, or as Windows-1252 (or ISO-8859-1) otherwise
    local($1);
    $str =~ s{ ( =\? [^?]* \? (?: [Bb] \? [A-Za-z0-9+/=]* |
                                  [Qq] \? [\x20-\x3E\x40-\x7F]* ) \?= ) |
               ( [^=]* | . )
             }{ my $s;
                if (defined $1) {
                  $s = $1;  # using Encode::MIME::Header
                  eval { $s = safe_decode('MIME-Header',$s) };
                } else {
                  $s = $2;
                  eval { $s = safe_decode_utf8($s, 1|8); 1 }
                  or do { $s = safe_decode_latin1($s) };
                }
                $s;
             }xgse;
    return $str;
  }

  # contains at least some non-ASCII and no RFC 2047 encoded-words

  # non-MIME-encoded KOI8 seems to be pretty common, attempt some guesswork
  if (length($str) >= 4 &&
      $str !~ tr/\x80-\xA2\xA5\xA8-\xAC\xAE-\xB2\xB5\xB8-\xBC\xBE-\xBF//) {
    # does *not* contain UTF8-tail octets (sans KOI8-U letters in that range)
    my $koi8_cyr_lett_cnt =  # count cyrillic letters
      $str =~ tr/\xA3\xA4\xA6\xA7\xAD\xB3\xB4\xB6\xB7\xBD\xC0-\xFF//;
    if ($koi8_cyr_lett_cnt >= length($str)*2/3 &&  # mostly cyrillic letters
        ($str =~ tr/A-Za-z//) <= 5 &&  # not many ASCII letters
        !is_valid_utf_8($str) ) {
      # try decoding as KOI8-U (like KOI8-R but with 8 extra letters)
      eval { $chars = safe_decode('KOI8-U',$str,1|8); 1; }
        and return $chars;  # hopefully the result makes sense
    }
  }

  # contains at least some non-ASCII, no RFC 2047 encoded-words, not KOI8

  if ($enc_taintsafe || !tainted($str)) {
    # FB_CROAK | LEAVE_SRC
    eval { $chars = $enc_utf8->decode($str,1|8); 1; }  # try strict UTF-8
      and return $chars;
    # fallback, assume Windows-1252 or ISO-8859-1
    # note that Windows-1252 is a proper superset of ISO-8859-1
    return ($enc_w1252||$enc_latin1)->decode($str);
  } else {  # work around bugs in Encode
    untaint_inplace($str);
    eval { $chars = $enc_utf8->decode($str,1|8); 1; }  # try strict UTF-8
      and return $enc_tainted . $chars;
    return $enc_tainted . ($enc_w1252||$enc_latin1)->decode($str);
  }
}

# Do the Q-encoding manually, the MIME::Words::encode_mimeword does not
# encode spaces and does not limit to 75 ch, which violates the RFC 2047
#
sub q_encode($$$) {
  my($octets,$encoding,$charset) = @_;
  my $prefix = '=?' . $charset . '?' . $encoding . '?';
  my $suffix = '?='; local($1,$2,$3);
  # FWS | utext (= NO-WS-CTL|rest of US-ASCII)
  $octets =~ /^ ( [\001-\011\013\014\016-\177]* [ \t] )?  (.*?)
                ( [ \t] [\001-\011\013\014\016-\177]* )? \z/xs;
  my($head,$rest,$tail) = ($1,$2,$3);
  # Q-encode $rest according to RFC 2047 (not for use in comments or phrase)
  $rest =~ s{([\000-\037\177\200-\377=?_])}{sprintf('=%02X',ord($1))}gse;
  $rest =~ tr/ /_/;   # turn spaces into _ (RFC 2047 allows it)
  my $s = $head; my $len = 75 - (length($prefix)+length($suffix)) - 2;
  while ($rest ne '') {
    $s .= ' '  if $s !~ /[ \t]\z/;  # encoded words must be separated by FWS
    $rest =~ /^ ( .{0,$len} [^=] (?: [^=] | \z ) ) (.*) \z/xs;
    $s .= $prefix.$1.$suffix; $rest = $2;
  }
  $s.$tail;
}

# encode "+", "=" and any character outside the range "!" (33) .. "~" (126)
#
sub xtext_encode($) {  # RFC 3461
  my $str = $_[0]; local($1);
  safe_encode_utf8_inplace($str);  # to octets (if not already)
  $str =~ s/([^\041-\052\054-\074\076-\176])/sprintf('+%02X',ord($1))/gse;
  $str;
}

# decode xtext-encoded string as per RFC 3461
#
sub xtext_decode($) {
  my $str = $_[0]; local($1);
  $str =~ s/\+([0-9a-fA-F]{2})/pack('C',hex($1))/gse;
  $str;
}

sub proto_encode($@) {
  my($attribute_name,@strings) = @_; local($1);
  for ($attribute_name,@strings) {
    # just in case, handle non-octet characters:
    s/([^\000-\377])/sprintf('\\x{%04x}',ord($1))/gse and
      do_log(-1,'proto_encode: non-octet character encountered: %s', $_);
  }
  $attribute_name =~    # encode all but alfanumerics, . _ + -
    s/([^0-9a-zA-Z._+-])/sprintf('%%%02x',ord($1))/gse;
  for (@strings) {      # encode % and nonprintables
    s/([^\041-\044\046-\176])/sprintf('%%%02x',ord($1))/gse;
  }
  $attribute_name . '=' . join(' ',@strings);
}

sub proto_decode($) {
  my $str = $_[0]; local($1);
  $str =~ s/%([0-9a-fA-F]{2})/pack('C',hex($1))/gse;
  $str;
}

# Expects an e-mail address as a string of octets, where a local part
# may be encoded as UTF-8, and the domain part may be an international
# domain name (IDN) consisting either of U-labels or A-labels or NR-LDH
# labels. Decodes A-labels to U-labels in domain name. If $result_as_octets
# is false decodes the resulting UTF-8 octets from previous step and returns
# a string of characters. If $result_as_octets is true the subroutine skips
# decoding of UTF-8 octets, the result will be a string of octets, only valid
# as UTF-8 if the provided $addr was a valid UTF-8 (garbage-in/garbage-out).
#
sub mail_addr_decode($;$) {
  my($addr, $result_as_octets) = @_;
  return undef  if !defined $addr;
  safe_encode_utf8_inplace($addr);  # to octets (if not already)
  local($1); my $domain;
  my $bracketed = $addr =~ s/^<(.*)>\z/$1/s;
  if ($addr =~ s{ \@ ( [^\@]* ) \z}{}xs) {
    $domain = $1;
    $domain = idn_to_utf8($domain)  if $domain =~ /(?:^|\.)xn--/si;
    if ($domain !~ tr/\x00-\x7F//c) {  # all-ASCII
      $domain = lc $domain;
    } elsif (!$result_as_octets) {  # non-ASCII, attempt decoding UTF-8
      # attempt decoding as strict UTF-8, otherwise fall back to Latin1
      # Not lowercased.
      eval { $domain = safe_decode_utf8($domain, 1|8); 1 }
      or do { $domain = safe_decode_latin1($domain) };
    }
  }
  # deal with localpart
  if (!$result_as_octets && $addr =~ tr/\x00-\x7F//c) {  # non-ASCII
    # attempt decoding as strict UTF-8, otherwise fall back to Latin1
    eval { $addr = safe_decode_utf8($addr, 1|8); 1 }
    or do { $addr = safe_decode_latin1($addr) };
  }
  $addr .= '@'.$domain  if defined $domain;  # put back the domain part
  $bracketed ? '<'.$addr.'>' : $addr;
}

# Expects an e-mail address as a string of octets or as logical characters
# (with utf8 flag on), where a local part may be encoded as UTF-8, and the
# domain part may be an international domain name (IDN) consisting either
# of U-labels or A-labels or NR-LDH. Leaves the localpart unchanged, encodes
# the domain name to ASCII-compatible encoding (ACE) if it is non-ASCII.
# The result is always in octets (UTF-8), domain part is lowercased.
#
sub mail_addr_idn_to_ascii($) {
  my $addr = $_[0];
  return undef  if !defined $addr;
  safe_encode_utf8_inplace($addr);  # to octets (if not already)
  local($1);
  my $bracketed = $addr =~ s/^<(.*)>\z/$1/s;
  $addr =~ s{ (\@ [^\@]*) \z }{ idn_to_ascii($1) }xse;
  $bracketed ? '<'.$addr.'>' : $addr;
}

# RFC 6533: encode an ORCPT mail address (as obtained from orcpt_decode,
# logical characters (utf8 flag may be on)) into one of the forms:
# utf-8-address, utf-8-addr-unitext, utf-8-addr-xtext, or as a legacy
# xtext (RFC 3461), returning a string of octets
#
sub orcpt_encode($;$$) {
  my($str, $smtputf8, $encode_for_smtp) = @_;
  return (undef,undef)  if !defined $str;

  # "Original-Recipient" ":" address-type ";" generic-address
  # address-type = atom
  # atom = [CFWS] 1*atext [CFWS]

  # RFC 3461: Due to limitations in the Delivery Status Notification format,
  # the value of the original recipient address prior to encoding as "xtext"
  # MUST consist entirely of printable (graphic and white space) characters
  # from the US-ASCII [4] repertoire.

  my $addr_type = '';  # expected 'rfc822' or 'utf-8', possibly empty
  local($1);  # get address-type (atom, up to a semicolon) and remove it
  if ($str =~ s{^[ \t]*([0-9A-Za-z!\#\$%&'*/=?^_`{|}~+-]*)[ \t]*;[ \t]*}{}s) {
    $addr_type = lc $1;
  }
  ll(5) && do_log(5, 'orcpt_encode %s, %s%s%s%s',
                  $addr_type, $str,
                  $smtputf8 ? ', smtputf8' : '',
                  $encode_for_smtp ? ', encode_for_smtp' : '',
                  utf8::is_utf8($str) ? ', is_utf8' : '');
  $str = $1  if $str =~ /^<(.*)>\z/s;

  if ($smtputf8 && utf8::is_utf8($str) &&
      ($addr_type eq 'utf-8' || $str =~ tr/\x00-\x7F//c)) {
    # for use in SMTPUTF8 (RCPT TO) or in message/global-delivery-status
    if ($encode_for_smtp && $str =~ tr{\x00-\x20+=\\}{}) {
      # contains +,=,\,SP,ctrl -> encode as utf-8-addr-unitext
      # HEXPOINT in EmbeddedUnicodeChar is 2 to 6 hexadecimal digits.
      $str =~ s{ ( [^\x21-\x2A\x2C-\x3C\x3E-\x5B\x5D-\x7E\x80-\xF4] ) }
               { sprintf('\\x{%02X}', ord($1)) }xgse;  # 2..6 uppercase hex!
    } else {
      # no restricted characters or not for SMTP -> keep as utf-8-address
      #
      # The utf-8-address form MAY be used in the ORCPT parameter when the
      # SMTP server also advertises support for SMTPUTF8 and the address
      # doesn't contain any ASCII characters not permitted in the ORCPT
      # parameter.  It SHOULD be used in a message/global-delivery-status
      # "Original-Recipient:" or "Final-Recipient:" DSN field, or in an
      # "Original-Recipient:" header field [RFC3798] if the message is a
      # SMTPUTF8 message.
    }
    safe_encode_utf8_inplace($str);  # to octets (if not already)
    $addr_type = 'utf-8';

  } else {
    # RFC 6533: utf-8-addr-xtext MUST be used in the ORCPT parameter
    # when the SMTP server doesn't advertise support for SMTPUTF8
    if ($str =~ tr/\x00-\x7F//c && utf8::is_utf8($str)) {
      # non-ASCII UTF-8, encode as utf-8-addr-xtext
      # RFC 6533: QCHAR = %x21-2a / %x2c-3c / %x3e-5b / %x5d-7e
      # HEXPOINT in EmbeddedUnicodeChar is 2 to 6 hexadecimal digits.
      $str =~ s{ ( [^\x21-\x2A\x2C-\x3C\x3E-\x5B\x5D-\x7E] ) }
               { sprintf('\\x{%02X}', ord($1)) }xgse;  # 2..6 uppercase hex!
      safe_encode_utf8_inplace($str);  # to octets (if not already)
      $addr_type = 'utf-8';
    } else {  # encode as legacy RFC 3461 xtext
      # encode +, =, \, SP, controls
      safe_encode_utf8_inplace($str);  # encode to octets first!
      $str =~ s{ ( [^\x21-\x2A\x2C-\x3C\x3E-\x5B\x5D-\x7E] ) }
               { sprintf('+%02X', ord($1)) }xgse;  # exactly two uppercase hex
      $addr_type = 'rfc822';
    }
  }
  ($addr_type, $str);
}

# Decode an encoded ORCPT e-mail address (a string of octets, encoded as
# xtext, utf-8-addr-xtext, utf-8-addr-unitext, or utf-8-address) as per
# RFC 3461 and RFC 6533. Result is presumably an RFC 5322 -encoded mail
# address, possibly as utf8-flagged characters string (if valid UTF-8),
# no angle brackets.
#
sub orcpt_decode($;$) {
  my($str, $smtputf8) = @_;
  return (undef,undef)  if !defined $str;

  my $addr_type = ''; local($1);
  # get address-type (atom, up to a semicolon) and remove it
  if ($str =~ s{^[ \t]*([0-9A-Za-z!\#\$%&'*/=?^_`{|}~+-]*)[ \t]*;[ \t]*}{}s) {
    $addr_type = lc $1;
  }

  if ($addr_type eq '') {
    # assumed not encoded (e.g. internally generated)
    if ($str =~ tr/\x00-\x7F//c && is_valid_utf_8($str) &&
        eval { $str = safe_decode_utf8($str, 1|8); 1 }) {
      $addr_type = 'utf-8';
    } else {
      $addr_type = 'rfc822';
    }

  } elsif ($addr_type ne 'utf-8') {  # presumably 'rfc822'
    # decode xtext-encoded string as per RFC 3461,
    # hexchar = ASCII "+" immediately followed by two UPPER CASE hex digits
    $str =~ s{ \+ ( [0-9A-F]{2} ) }{ pack('C',hex($1)) }xgse;
    # now have a string of octets, possibly with (invalid) 8bit characters

    # we may have a legacy encoding which should really be a utf-8 addr_type
    if ($smtputf8 && lc $addr_type eq 'rfc822' &&
        $str =~ tr/\x00-\x7F//c && is_valid_utf_8($str) &&
        eval { $str = safe_decode_utf8($str, 1|8); 1 }) {
      $addr_type = 'utf-8';
    }

  } elsif ($str !~ tr/\x00-\x7F//c) {  # address-type is 'utf-8', is all-ASCII
    # Looks like utf-8-addr-xtext or utf-8-addr-unitext.
    # Permissive decoding of EmbeddedUnicodeChar, as well as a legacy xtext:
    # RFC 6533: UTF-8 address type has 3 forms:
    #   utf-8-addr-xtext, utf-8-addr-unitext, and utf-8-address.
    $str =~ s{ \\ x \{ ( [0-9A-Fa-f]{2,6} ) \} |
               \+      ( [0-9A-F]{2} ) }
               { pack('U', hex(defined $1 ? $1 : $2)) }xgse;
    # RFC 6533 prohibits <NUL> and surrogates in EmbeddedUnicodeChar,
    # as well as encoded printable ASCII chars except xtext-specials +, =, \

  } elsif (is_valid_utf_8($str) &&
           eval { $str = safe_decode_utf8($str, 1|8); 1 }) {
    # Looks like a utf-8-address. Successfully decoded UTF-8 octets to chars.
    # permissive decoding of EmbeddedUnicodeChar, as well as a legacy xtext
    $str =~ s{ \\ x \{ ( [0-9A-Fa-f]{2,6} ) \} |
               \+      ( [0-9A-F]{2} ) }
               { pack('U', hex(defined $1 ? $1 : $2)) }xgse;

  } else {  # address-type is 'utf-8', non-ASCII, invalid UTF-8 string
    # RFC 6533: if an address is labeled with the UTF-8 address type
    # but does not conform to utf-8 syntax, then it MUST be copied into
    # the message/global-delivery-status field without alteration.
    # --> just leave $str unchanged as octets
  }

  # result in $str is presumably an RFC 5322 -encoded addr,
  # possibly as utf8-flagged characters, no angle brackets
  ($addr_type, $str);
}

# Mostly for debugging and reporting purposes:
# Convert nonprintable characters in the argument
# to \[rnftbe], or hex code, ( and '\' to '\\' ???),
# and Unicode characters to UTF-8, returning a sanitized string.
#
use vars qw(%quote_controls_map);
BEGIN {
  %quote_controls_map =
    ("\r" => '\\r', "\n" => '\\n', "\t" => '\\t', "\\" => '\\\\');

# leave out the <FF>, <BS> and <ESC>, these are too confusing in the log,
# better to just hand them over to hex quoting ( \xHH )
#   ("\r" => '\\r', "\n" => '\\n', "\f" => '\\f', "\t" => '\\t',
#    "\b" => '\\b', "\e" => '\\e', "\\" => '\\\\');

}
sub sanitize_str {
  my($str, $keep_eol) = @_;
  return ''  if !defined $str;
  safe_encode_utf8_inplace($str);  # to octets (if not already)
  # $str is now in octets, UTF8 flag is off
  local($1);
  if ($keep_eol) {
    # controls except LF, DEL, backslash
    $str =~ s/([\x00-\x09\x0B-\x1F\x7F\\])/
              $quote_controls_map{$1} || sprintf('\\x%02X', ord($1))/gse;
  } else {
    # controls, DEL, backslash
    $str =~ s/([\x00-\x1F\x7F\\])/
              $quote_controls_map{$1} || sprintf('\\x%02X', ord($1))/gse;
  }
  $str;
}

# Set or get Amavis internal task id (also called: log id).
# This task id performs a similar function as queue-id in MTA responses.
# It may only be used in generating text part of SMTP responses,
# or in generating log entries. It is only unique within a limited timespan.
use vars qw($amavis_task_id);  # internal task id
#                   (accessible via am_id() and later also as $msginfo->log_id)

sub am_id(;$) {
  if (@_) {         # set, if argument is present
    $amavis_task_id = $_[0];
    amavis_log_id($amavis_task_id);
    $0 = c('myprogram_name') .
         (!defined $amavis_task_id ? '' : " ($amavis_task_id)");
  }
  $amavis_task_id;  # return current value
}

sub new_am_id($;$$) {
  my($str, $cnt, $seq) = @_;
  my $id = defined $str ? $str : sprintf('%05d', $$);
  $id .= sprintf('-%02d', $cnt)  if defined $cnt;
  $id .= '-'.$seq  if defined $seq && $seq > 1;
  am_id($id);
}

use vars qw($entropy);  # MD5 ctx (128 bits, 32 hex digits or 22 base64 chars)
sub add_entropy(@) {  # arguments may be strings or array references
  $entropy = Digest::MD5->new  if !defined $entropy;
  my $s = join(',', map((!defined $_ ? 'U' : ref eq 'ARRAY' ? @$_ : $_), @_));
  utf8::encode($s)  if utf8::is_utf8($s);
# do_log(5,'add_entropy: %s',$s);
  $entropy->add($s);
}

sub fetch_entropy_bytes($) {
  my $n = $_[0];  # number of bytes to collect
  my $result = '';
  for (; $n > 0; $n--) {
    # collect as few bits per MD5 iteration as possible (RFC 4086 sect 6.2.1)
    # let's settle for 8 bits for practical reasons; fewer would be better
    my $digest = $entropy->digest;  # 16 bytes; also destroys accumulator
    $result .= substr($digest,0,1);  # take 1 byte
    $entropy->reset; $entropy->add($digest);  # cycle it back
  }
# ll(5) && do_log(5,'fetch_entropy_bytes %s',
#                  join(' ', map(sprintf('%02x',$_), unpack('C*',$result))));
  $result;
}

# read number of bytes from a /dev/urandom device
#
sub read_random_bytes($$) {
  # my($buff,$required_bytes) = @_;
  $_[0] = '';
  my $required_bytes = $_[1];
  my $fname = '/dev/urandom';  # nonblocking device!
  if ($required_bytes > 0) {
    my $fh = IO::File->new;
    $fh->open($fname,O_RDONLY)  # does a sysopen()
      or die "Can't open $fname: $!";
    $fh->binmode or die "Can't set $fname to binmode: $!";
    my $nbytes = $fh->sysread($_[0], $required_bytes);
    defined $nbytes  or die "Error reading from $fname: $!";
    $nbytes >= $required_bytes  or die "Less data read than requested: $!";
    $fh->close or die "Error closing $fname: $!";
  }
  undef;
}

# stir/initialize perl's random generator and our entropy pool;
# to be called at startup of the main process and each child processes
#
sub stir_random() {
  my $random_bytes;
  eval {
    read_random_bytes($random_bytes,16);  1;
  } or do {
    my $eval_stat = $@ ne '' ? $@ : "errno=$!";  chomp $eval_stat;
    do_log(0, 'read_random_bytes error: %s', $eval_stat);
    undef $random_bytes;
  };
  srand();  # let perl give it a try first, then stir-in some additional bits
  add_entropy($random_bytes, Time::HiRes::gettimeofday, $$, rand());
  #
  # must prevent all child processes working with the same inherited random
  # seed, otherwise modules like File::Temp will step on each other's toes
  my $r = unpack('L', fetch_entropy_bytes(4)) ^ int(rand(0xffffffff));
  srand($r & 0x7fffffff);
}

# generate a reasonably unique (long-term) id based on collected entropy.
# The result is a pair of a (mostly public) mail_id, and a secret id,
# where mail_id == b64(md5(secret_bin)). The secret id could be used to
# authorize releasing quarantined mail. Both the mail_id and secret id are
# strings of characters [A-Za-z0-9-_], with an additional restriction
# for mail_id which must begin and end with an alphanumeric character.
# The number of bits in a mail_id is configurable through $mail_id_size_bits
# and defaults to 72, yielding a 12-character base64url-encoded string.
# The number of bits must be an integral multiple of 24, so that no base64
# trailing padding characters '=' are needed (RFC 4648).
# Note the difference in base64-like encodings:
#   amavisd almost-base64: 62 +, 63 -   (old, no longer used since 2.7.0)
#   RFC 4648 base64:       62 +, 63 /   (not used here)
#   RFC 4648 base64url:    62 -, 63 _
# Generally, RFC 5322 controls, SP and specials must be avoided: ()<>[]:;@\,."
# With version 2.7.0 of amavisd we switched from almost-base64 to base64url
# to avoid having to quote a '+' in regular expressions and in URL.
#
sub generate_mail_id() {
  my($id_b64, $secret_bin);
  # 72 bits =  9 bytes = 12 b64 chars
  # 96 bits = 12 bytes = 16 b64 chars
  $mail_id_size_bits > 0 &&
  $mail_id_size_bits == int $mail_id_size_bits &&
  $mail_id_size_bits % 24 == 0
    or die "\$mail_id_size_bits ($mail_id_size_bits) must be a multiple of 24";
  for (my $j=0; $j<100; $j++) {  # provide some sanity loop limit just in case
    $secret_bin = fetch_entropy_bytes($mail_id_size_bits/8);
    # mail_id is computed as md5(secret), rely on unidirectionality of md5
    $id_b64 = Digest::MD5->new->add($secret_bin)->b64digest;  # b64(md5(sec))
    add_entropy($id_b64,$j);  # fold it back into accumulator
    substr($id_b64, $mail_id_size_bits/6) = '';  # b64, crop to size
    # done if it starts and ends with an alfanumeric character
    last  if $id_b64 =~ /^[A-Za-z0-9].*[A-Za-z0-9]\z/s;
    # retry on less than 7% of cases
    do_log(5,'generate_mail_id retry: %s', $id_b64);
  }
  $id_b64 =~ tr{+/}{-_};  # base64 -> RFC 4648 base64url [A-Za-z0-9-_]
  if (!wantarray) {  # not interested in secret
    $secret_bin = 'X' x length($secret_bin);  # can't hurt to wipe out
    return $id_b64;
  }
  my $secret_b64 = encode_base64($secret_bin,''); # $mail_id_size_bits/6 chars
  $secret_bin = 'X' x length($secret_bin);  # can't hurt to wipe out
  $secret_b64 =~ tr{+/}{-_};  # base64 -> RFC 4648 base64url [A-Za-z0-9-_]
# do_log(5,'generate_mail_id: %s %s', $id_b64, $secret_b64);
  ($id_b64, $secret_b64);
}

# Returns a password that may be used for scrambling of a message being
# released from a quarantine or mangled, with intention of preventing an
# automatic or undesired implicit opening of a potentially dangerous message.
# The first argument may be: a plain string, which is simply passed on
# to the result, or: a code reference (to be evaluated in a scalar context),
# allowing for lazy evaluation of a supplied password generating code,
# or: undef, which causes a generation of a simple 4-digit PIN-like random
# password. The second argument is just passed on unchanged to the supplied
# subroutine and is expected to be a $msginfo object.
#
sub make_password($$) {
  my($password,$msginfo) = @_;
  if (ref $password eq 'CODE') {
    eval {
      $password = &$password($msginfo);
      chomp $password; $password =~ s/^[ \t]+//; $password =~ s/[ \t]+\z//;
      untaint_inplace($password)  if $password =~ /^[A-Za-z0-9:._=+-]*\z/;
      1;
    } or do {
      my $eval_stat = $@ ne '' ? $@ : "errno=$!";  chomp $eval_stat;
      do_log(-1, 'password generating subroutine failed, '.
                 'supplying a default: %s', $@);
      $password = undef;
    };
  }
  if (!defined $password) {  # create a 4-digit random string
    my $r;
    do {
      $r = unpack('S',fetch_entropy_bytes(2));  # 0 .. 65535
      # ditch useless samples beyond 60000
    } until $r < 65536 - (65536 % 10000);
    $password = sprintf('%04d', $r % 10000);
    $r = 0;  # clear the IV field of a scalar (the undef() doesn't do so)
  }
  $password;
}

use vars qw(@counter_names);
# elements may be counter names (increment is 1), or pairs: [name,increment],
# or triples: [name,value,type], where type can be: C32, C64, INT, TIM or OID
sub snmp_counters_init() { @counter_names = () }
sub snmp_count(@) { push(@counter_names, @_) }
sub snmp_count64(@) { push(@counter_names, map(ref $_ ?$_ :[$_,1,'C64'], @_)) }
sub snmp_counters_get() { \@counter_names }

sub snmp_initial_oids() {
  return [
    ['sysDescr',    'STR', $myversion],                       # 0..255 octets
    ['sysObjectID', 'OID', '1.3.6.1.4.1.15312.2'],
  # iso.org.dod.internet.private.enterprise.ijs.amavis
    ['sysUpTime',   'INT', int(time)],  # to be converted to TIM
  # later it must be converted to timeticks (10ms units since start)
    ['sysContact',  'STR', safe_encode_utf8($snmp_contact)],  # 0..255 octets
  # Network Unicode format (Net-Unicode) RFC 5198, instead of NVT ASCII
    ['sysName',     'STR', idn_to_utf8(c('myhostname'))],     # 0..255 octets
    ['sysLocation', 'STR', safe_encode_utf8($snmp_location)], # 0..255 octets
    ['sysServices', 'INT', 64],  # application
  ];
}

use vars qw($debug_oneshot);
sub debug_oneshot(;$$) {
  if (@_) {
    my $new_debug_oneshot = shift;
    if (($new_debug_oneshot ? 1 : 0) != ($debug_oneshot ? 1 : 0)) {
      do_log(0, 'DEBUG_ONESHOT: TURNED '.($new_debug_oneshot ? 'ON' : 'OFF'));
      do_log(0, shift)  if @_;  # caller-provided extra log entry, usually
                                # the one that caused debug_oneshot call
    }
    $debug_oneshot = $new_debug_oneshot;
  }
  $debug_oneshot;
}

use vars qw($dbg_log);
sub log_capture_enabled(;$) {
  if (@_) {
    my $new_state = $_[0];
    if (!$dbg_log && $new_state) {
      $dbg_log = Amavis::DbgLog->new;
    } elsif ($dbg_log && !$new_state) {
      undef $dbg_log;  # calls its destructor
    }
  }
  $dbg_log ? 1 : 0;
}

use vars qw($current_config_log_level
            $current_config_syslog_ident
            $current_config_syslog_facility);
# keeping current settings avoids the most frequent calls to c()
sub update_current_log_level() {
  $current_config_log_level       = c('log_level') || 0;
  $current_config_syslog_ident    = c('syslog_ident');
  $current_config_syslog_facility = c('syslog_facility');
}

# is message log level below the current log level (i.e. eligible for logging)?
#
sub ll($) {
  (($DEBUG || $debug_oneshot) && $_[0] > 0 ? 0 : $_[0])
     <= $current_config_log_level
  || $dbg_log;
}

# write a log entry (optimized, called often)
#
sub do_log($$;@) {
# my($level,$errmsg,@args) = @_;
  my $level = $_[0];
# if (ll($level)) {  # inlined and reorderd the ll() call for speed
  if ( $level <= $current_config_log_level ||
       ( ($DEBUG || $debug_oneshot) && $level > 0
         && 0 <= $current_config_log_level ) ||
       $dbg_log ) {
    my $errmsg;  # the $_[1] is expected to be ASCII or UTF-8 octets (not char)
    if (@_ <= 2) {  # no arguments to sprintf
      $errmsg = $_[1];
    } elsif (@_ == 3) {  # a single argument to sprintf, optimized common case
      if (utf8::is_utf8($_[2])) {
        my $arg1 = $_[2]; utf8::encode($arg1);
        $errmsg = sprintf($_[1], $arg1);
      } else {
        $errmsg = sprintf($_[1], $_[2]);
      }
    } else {
      # treat $errmsg as sprintf format string if additional args are provided;
      # encode arguments individually to avoid mojibake when UTF8-flagged and
      # non- UTF8-flagged strings are concatenated;
      my @args = @_[2..$#_];
      for (@args) { utf8::encode($_) if utf8::is_utf8($_) }
      $errmsg = sprintf($_[1], @args);
    }
    local($1);
    # protect controls, DEL, and backslash; make sure to leave UTF-8 untouched
    $errmsg =~ s/([\x00-\x1F\x7F\\])/
                 $quote_controls_map{$1} || sprintf('\\x%02X', ord($1))/gse;
    $dbg_log->write_dbg_log($level,$errmsg)  if $dbg_log;
    $level = 0  if ($DEBUG || $debug_oneshot) && $level > 0;
    if ($level <= $current_config_log_level) {
      write_log($level,$errmsg);
###   $Amavis::zmq_obj->write_log($level,$errmsg)  if $Amavis::zmq_obj;
    }
  }
  1;
}

# equivalent to do_log, but protected by eval so that it can't bail out
#
sub do_log_safe($$;@) {
  # ignore failures while keeping perlcritic happy
  eval { do_log(shift,shift,@_) } or 1;
  1;
}

sub flush_captured_log() {
  $dbg_log->flush
    or die "Can't flush debug log file: $!"  if $dbg_log;
}

sub reposition_captured_log_to_end() {
  $dbg_log->reposition_to_end
    or die "Can't reposition debug log file to its end: $!"  if $dbg_log;
}

sub dump_captured_log($$) {
  my($dump_log_level, $enable_log_capture_dump) = @_;
  $dbg_log->dump_captured_log($dump_log_level,
                 $enable_log_capture_dump && ll($dump_log_level))  if $dbg_log;
}

# $timestamp_of_last_reception: a Unix time stamp when an MTA client send the
# last command to us, the most important of which is the reception of a final
# dot in SMTP session, which is a time when a client started to wait for our
# response;  this timestamp, along with a c('child_timeout'), make a deadline
# time for our processing
#
# $waiting_for_client: which timeout is running:
#   false: processing is in our courtyard,  true: waiting for a client
#
use vars qw($timestamp_of_last_reception $waiting_for_client);

sub waiting_for_client(;$) {
  $waiting_for_client = $_[0]  if @_;
  $waiting_for_client;
}

sub get_deadline(@) {
  my($which_section, $allowed_share, $reserve, $max_time) = @_;
  # $allowed_share ... factor between 0 and 1 of the remaining time till a
  #                    deadline, to be allocated to the task that follows
  # $reserve  ... try finishing up $reserve seconds before the deadline;
  # $max_time ... upper limit in seconds for the timer interval
  my($timer_interval, $timer_deadline, $time_to_deadline);
  my $child_t_o = c('child_timeout');
  if (!$child_t_o) {
    do_log(2, 'get_deadline %s - ignored, child_timeout not set',
              $which_section);
  } elsif (!defined $timestamp_of_last_reception) {
    do_log(2, 'get_deadline %s - ignored, master deadline not known',
              $which_section);
  } else {
    my $now = Time::HiRes::time;
    $time_to_deadline = $timestamp_of_last_reception + $child_t_o - $now;
    $timer_interval = $time_to_deadline;
    if (!defined $allowed_share) {
      $allowed_share = 0.6;
      $timer_interval *= $allowed_share;
    } elsif ($allowed_share <= 0) {
      $timer_interval = 0;
    } elsif ($allowed_share >= 1) {
      # leave it unchanged
    } else {
      $timer_interval *= $allowed_share;
    }
    $reserve = 4  if !defined $reserve;
    if ($reserve > 0 && $timer_interval > $time_to_deadline - $reserve) {
      $timer_interval = $time_to_deadline - $reserve;
    }
    if ($timer_interval < 8) {  # be generous, allow at least 6 seconds
      $timer_interval = max(6, min(8,$time_to_deadline));
    }
    my $j = int($timer_interval);
    $timer_interval = $timer_interval > $j ? $j+1 : $j;  # ceiling
    if (defined $max_time && $max_time > 0 && $timer_interval > $max_time) {
      $timer_interval = $max_time;
    }
    ll(5) && do_log(5,'get_deadline %s - deadline in %.1f s, set to %.3f s',
                      $which_section, $time_to_deadline, $timer_interval);
    $timer_deadline = $now + $timer_interval;
  }
  !wantarray ? $timer_interval
             : ($timer_interval, $timer_deadline, $time_to_deadline);
}

sub prolong_timer($;$$$) {
  my($which_section, $allowed_share, $reserve, $max_time) = @_;
  my($timer_interval, $timer_deadline, $time_to_deadline) = get_deadline(@_);
  if (defined $timer_interval) {
    my $prev_timer = alarm($timer_interval);  # restart/prolong the timer
    ll(5) && do_log(5,'prolong_timer %s: timer %d, was %d, deadline in %.1f s',
              $which_section, $timer_interval, $prev_timer, $time_to_deadline);
  }
  !wantarray ? $timer_interval
             : ($timer_interval, $timer_deadline, $time_to_deadline);
}

sub switch_to_my_time($) {      # processing is in our courtyard
  my $msg = $_[0];
  $waiting_for_client = 0;
  $timestamp_of_last_reception = Time::HiRes::time;
  my $child_t_o = c('child_timeout');
  if (!$child_t_o) {
    alarm(0);
  } else {
    prolong_timer( 'switch_to_my_time(' . $msg . ')' );
  }
}

sub switch_to_client_time($) {  # processing is now in client's hands
  my $msg = $_[0];
  my $interval = c('smtpd_timeout');
  $interval = 5  if $interval < 5;
  ll(5) && do_log(5, 'switch_to_client_time %d s, %s', $interval,$msg);
  undef $timestamp_of_last_reception;
  alarm($interval); $waiting_for_client = 1;
}

# pretty-print a structure for logging purposes: returns a string
#
sub fmt_struct($);  # prototype
sub fmt_struct($) {
  my $arg = $_[0];
  my $r = ref $arg;
  !$r ?
    (defined($arg) ? '"'.$arg.'"' : 'undef')
  : $r eq 'ARRAY' ?
      '[' . join(',', map(fmt_struct($_), @$arg)) . ']'
  : $r eq 'HASH' ?
      '{' . join(',', map($_.'=>'.fmt_struct($arg->{$_}), keys %$arg)) . '}'
  : $arg;
};

# used by freeze: protect % and ~, as well as NUL and \200 for good measure
#
sub st_encode($) {
  my $str = $_[0]; local($1);
  { # concession on a perl 5.20.0 bug [perl #122148] (fixed in 5.20.1)
    # - just warn, do not abort
    use warnings NONFATAL => qw(utf8);
    $str =~ s/([%~\000\200])/sprintf('%%%02X',ord($1))/gse;
  };
  $str;
}

# simple Storable::freeze lookalike
#
sub freeze($);  # prototype
sub freeze($) {
  my $obj = $_[0]; my $ty = ref($obj);
  if (!defined($obj))     { 'U' }
  elsif (!$ty)            { join('~', '',  st_encode($obj))  }  # string
  elsif ($ty eq 'SCALAR') { join('~', 'S', st_encode(freeze($$obj))) }
  elsif ($ty eq 'REF')    { join('~', 'R', st_encode(freeze($$obj))) }
  elsif ($ty eq 'ARRAY')  { join('~', 'A', map(st_encode(freeze($_)),@$obj)) }
  elsif ($ty eq 'HASH') {
    join('~', 'H',
         map {(st_encode($_),st_encode(freeze($obj->{$_})))} sort keys %$obj)
  } else { die "Can't freeze object type $ty" }
}

# simple Storable::thaw lookalike
#
sub thaw($);  # prototype
sub thaw($) {
  my $str = $_[0];
  return undef  if !defined $str;  # must return undef even in a list context!
  my($ty,@val) = split(/~/,$str,-1);
  s/%([0-9a-fA-F]{2})/pack('C',hex($1))/gse  for @val;
  if    ($ty eq 'U') { undef }
  elsif ($ty eq '')  { $val[0] }
  elsif ($ty eq 'S') { my $obj = thaw($val[0]); \$obj }
  elsif ($ty eq 'R') { my $obj = thaw($val[0]); \$obj }
  elsif ($ty eq 'A') { [map(thaw($_),@val)] }
  elsif ($ty eq 'H') {
    my $hr = {};
    while (@val) { my $k = shift @val; $hr->{$k} = thaw(shift @val) }
    $hr;
  } else { die "Can't thaw object type $ty" }
}

# accepts either a single contents category (a string: "maj,min" or "maj"),
# or a list of contents categories, in which case only the first element
# is considered; returns a passed pair: (major_ccat, minor_ccat)
#
sub ccat_split($) {
  my $ccat = $_[0]; my $major; my $minor;
  $ccat = $ccat->[0]  if ref $ccat;  # pick the first element if given a list
  ($major,$minor) = split(/,/,$ccat,-1)  if defined $ccat;
  !wantarray ? $major : ($major,$minor);
}

# accepts either a single contents category (a string: "maj,min" or "maj"),
# or a list of contents categories, in which case only the first element
# is considered; returns major_ccat
#
sub ccat_maj($) {
  my $ccat = $_[0]; my $major; my $minor;
  $ccat = $ccat->[0]  if ref $ccat;  # pick the first element if given a list
  ($major,$minor) = split(/,/,$ccat,-1)  if defined $ccat;
  $major;
}

# compare numerically two strings of the form "maj,min" or just "maj", where
# maj and min are numbers, representing major and minor contents category
#
sub cmp_ccat($$) {
  my($a_maj,$a_min) = split(/,/, $_[0], -1);
  my($b_maj,$b_min) = split(/,/, $_[1], -1);
  $a_maj == $b_maj ? $a_min <=> $b_min : $a_maj <=> $b_maj;
}

# similar to cmp_ccat, but consider only the major category of both arguments
#
sub cmp_ccat_maj($$) {
  my($a_maj,$a_min) = split(/,/, $_[0], -1);
  my($b_maj,$b_min) = split(/,/, $_[1], -1);
  $a_maj <=> $b_maj;
}

# get a list of settings corresponding to all listed contents categories,
# ordered from the most important category to the least;  @ccat is a list of
# relevant contents categories for which a query is made, it MUST already be
# sorted in descending order;  this is a classical subroutine, not a method!
#
sub setting_by_given_contents_category_all($@) {
  my($ccat,@settings_href_list) = @_; my(@r);
  if (@settings_href_list) {
    for my $e ((!defined $ccat ? () : ref $ccat ?@$ccat :$ccat), CC_CATCHALL) {
      if (grep(defined($_) && exists($_->{$e}), @settings_href_list)) {
        # supports lazy evaluation (a setting may be a subroutine)
        my(@slist) = map { !defined($_) || !exists($_->{$e}) ? undef :
                           do { my $s = $_->{$e}; ref($s) eq 'CODE' ? &$s : $s}
                         } @settings_href_list;
        push(@r, [$e,@slist]);  # a tuple: [corresponding ccat, settings list]
      }
    }
  }
  @r;  # a list of tuples
}

# similar to setting_by_given_contents_category_all(), but only the first
# (the most relevant) setting is returned, without a corresponding ccat
#
sub setting_by_given_contents_category($@) {
  my($ccat,@settings_href_list) = @_; my(@slist);
  if (@settings_href_list) {
    for my $e ((!defined $ccat ? () : ref $ccat ?@$ccat :$ccat), CC_CATCHALL) {
      if (grep(defined($_) && exists($_->{$e}), @settings_href_list)) {
        # supports lazy evaluation (setting may be a subroutine)
        @slist = map { !defined($_) || !exists($_->{$e}) ? undef :
                       do { my $s = $_->{$e}; ref($s) eq 'CODE' ? &$s : $s }
                     } @settings_href_list;
        last;
      }
    }
  }
  !wantarray ? $slist[0] : @slist;  # only the first entry
}

# Removes a directory, along with its contents
#
# The readdir() is entitled to fail if the directory changes underneath,
# so do the deletions by chunks: read a limited set of filenames into
# memory, close directory, delete these files, and repeat.
# The current working directory must not be within directories which are
# to be deleted, otherwise rmdir can fail with 'Invalid argument' (e.g.
# on Solaris 10).
#
sub rmdir_recursively($;$);  # prototype
sub rmdir_recursively($;$) {
  my($dir, $exclude_itself) = @_;
  ll(4) && do_log(4, 'rmdir_recursively: %s, excl=%s', $dir,$exclude_itself);
  my($f, @rmfiles, @rmdirs);  my $more = 1;  my $dir_chmoded = 0;
  while ($more) {
    local(*DIR);  $more = 0;
    my $errn = opendir(DIR,$dir) ? 0 : 0+$!;
    if ($errn == EACCES && !$dir_chmoded) {
      # relax protection on directory, then try again
      do_log(3,'rmdir_recursively: enabling read access to directory %s',$dir);
      chmod(0750,$dir)
        or do_log(-1, "Can't change protection-1 on dir %s: %s", $dir, $!);
      $dir_chmoded = 1;
      $errn = opendir(DIR,$dir) ? 0 : 0+$!;  # try again
    }
    if ($errn) { die "Can't open directory $dir: $!" }
    my $cnt = 0;
    # avoid slurping the whole directory contents into memory
    while (defined($f = readdir(DIR))) {
      next  if $f eq '.' || $f eq '..';
      my $fname = $dir . '/' . $f;
      $errn = lstat($fname) ? 0 : 0+$!;
      if ($errn == EACCES && !$dir_chmoded) {
        # relax protection on the directory and retry
        do_log(3,'rmdir_recursively: enabling access to files in dir %s',$dir);
        chmod(0750,$dir)
          or do_log(-1, "Can't change protection-2 on dir %s: %s", $dir, $!);
        $dir_chmoded = 1;
        $errn = lstat($fname) ? 0 : 0+$!;  # try again
      }
      if ($errn) { do_log(-1, "Can't access file \"%s\": $!", $fname,$!) }
      if (-d _) { push(@rmdirs,$f) } else { push(@rmfiles,$f) }
      $cnt++;
      if ($cnt >= 1000) {
        do_log(3,'rmdir_recursively: doing %d files and %d dirs for now in %s',
                 scalar(@rmfiles), scalar(@rmdirs), $dir);
        $more = 1;
        last;
      }
    }
    # fixed by perl5.20: readdir() now only sets $! on error.  $! is no longer
    # set to EBADF when then terminating undef is read from the directory
    # unless the system call sets $!. [perl #118651]
    closedir(DIR) or die "Error closing directory $dir: $!";
    my $cntf = scalar(@rmfiles);
    for my $f (@rmfiles) {
      my $fname = $dir . '/' . untaint($f);
      if (unlink($fname)) {
        # ok
      } elsif ($! == EACCES && !$dir_chmoded) {
        # relax protection on the directory, then try again
        do_log(3,'rmdir_recursively: enabling write access to dir %s',$dir);
        my $what = -l _ ? 'symlink' :-d _ ? 'directory' :'non-regular file';
        chmod(0750,$dir)
          or do_log(-1, "Can't change protection-3 on dir %s: %s", $dir, $!);
        $dir_chmoded = 1;
        unlink($fname) or die "Can't remove $what $fname: $!";
      }
    }
    undef @rmfiles;
    section_time("unlink-$cntf-files")  if $cntf > 0;
    for my $d (@rmdirs) {
      rmdir_recursively($dir . '/' . untaint($d));
    }
    undef @rmdirs;
  }
  if (!$exclude_itself) {
    rmdir($dir) or die "rmdir_recursively: Can't remove directory $dir: $!";
    section_time('rmdir');
  }
  1;
}

# efficiently read a file (in binmode) into a provided string;
# either an open file handle may be given, or a filename
#
sub read_file($$) {
  my($fname,$strref) = @_;
  my($fh, $file_size, $nbytes);
  if (ref $fname) {
    $fh = $fname;  # assume a file handle was given
  } else {  # a filename
    $fh = IO::File->new;
    $fh->open($fname,O_RDONLY)  # does a sysopen
      or die "Can't open file $fname for reading: $!";
    $fh->binmode or die "Can't set file $fname to binmode: $!";
  }
  my(@stat_list) = stat($fh);
  @stat_list or die "Failed to access file: $!";
  $file_size = -s _  if -f _;
  if ($file_size) {
    # preallocate exact storage size, avoids realloc/copying while growing
    $$strref = ''; vec($$strref, $file_size + 32768, 8) = 0;
  }
  $$strref = '';
#*** handle EINTR
  while ( $nbytes=sysread($fh, $$strref, 32768, length $$strref) ) { }
  defined $nbytes or die "Error reading from $fname: $!";
  if (!ref $fname) { $fh->close or die "Error closing $fname: $!" }
  $strref;
}

# read a text file, returning its contents as a string - suitable for
# calling from amavisd.conf
#
sub read_text($;$) {
  my($fname, $encoding) = @_;
  my $fh = IO::File->new;
  $fh->open($fname,'<') or die "Can't open file $fname for reading: $!";
  if (defined($encoding) && $encoding ne '') {
    binmode($fh, ":encoding($encoding)")
      or die "Can't set :encoding($encoding) on file $fname: $!";
  }
  my $nbytes; my $str = '';
  while (($nbytes = $fh->read($str, 16384, length($str))) > 0) { }
  defined $nbytes or die "Error reading from $fname: $!";
  $fh->close or die "Error closing $fname: $!";
  my $result = $str; undef $str;  # shrink allocated storage to actual size
  $result;
}

# attempt to read all user-visible replies from a l10n dir
# This function auto-fills $notify_sender_templ, $notify_virus_sender_templ,
# $notify_virus_admin_templ, $notify_virus_recips_templ,
# $notify_spam_sender_templ and $notify_spam_admin_templ from files named
# template-dsn.txt, template-virus-sender.txt, template-virus-admin.txt,
# template-virus-recipient.txt, template-spam-sender.txt,
# template-spam-admin.txt.  If this is available, it uses the charset
# file to do automatic charset conversion. Used by the Debian distribution.
#
sub read_l10n_templates($;$) {
  my $dir = $_[0];
  if (@_ > 1)  # compatibility with Debian
    { my($l10nlang, $l10nbase) = @_; $dir = "$l10nbase/$l10nlang" }
  my $file_chset = Amavis::Util::read_text("$dir/charset");
  local($1,$2);
  if ($file_chset =~ m{^(?:\#[^\n]*\n)*([^./\n\s]+)(\s*[\#\n].*)?$}s) {
    $file_chset = untaint("$1");
  } else {
    die "Invalid charset $file_chset\n";
  }
  $Amavis::Conf::notify_sender_templ =
    Amavis::Util::read_text("$dir/template-dsn.txt", $file_chset);
  $Amavis::Conf::notify_virus_sender_templ =
    Amavis::Util::read_text("$dir/template-virus-sender.txt", $file_chset);
  $Amavis::Conf::notify_virus_admin_templ =
    Amavis::Util::read_text("$dir/template-virus-admin.txt", $file_chset);
  $Amavis::Conf::notify_virus_recips_templ =
    Amavis::Util::read_text("$dir/template-virus-recipient.txt", $file_chset);
  $Amavis::Conf::notify_spam_sender_templ =
    Amavis::Util::read_text("$dir/template-spam-sender.txt", $file_chset);
  $Amavis::Conf::notify_spam_admin_templ =
    Amavis::Util::read_text("$dir/template-spam-admin.txt", $file_chset);
}

# # attempt to read a list of config files to use instead of the default one,
# # using an external helper script. Used by the Debian/Ubuntu distribution.
# sub find_config_files(@) {
#   my(@dirs) = @_;
#   local $ENV{PATH} = '/bin:/usr/bin';
#   my(@config_files) = map { `run-parts --list "$_"` } @dirs;
#   chomp(@config_files);
#   # untaint - this data is secure as we check the files themselves later
#   map { untaint($_) } @config_files;
# }

#use CDB_File;
#sub tie_hash($$) {
# my($hashref, $filename) = @_;
# CDB_File::create(%$hashref, $filename, "$filename.tmp$$")
#   or die "Can't create cdb $filename: $!";
# my $cdb = tie(%$hashref,'CDB_File',$filename)
#   or die "Tie to $filename failed: $!";
# $hashref;
#}

# read an associative array (=Perl hash) (as used in lookups) from a file;
# may be called from amavisd.conf
#
# Format: one key per line, anything from '#' to the end of line
# is considered a comment, but '#' within correctly quoted RFC 5321
# addresses is not treated as a comment introducer (e.g. a hash sign
# within "strange # \"foo\" address"@example.com is part of the string).
# Lines may contain a pair: key value, separated by whitespace,
# or key only, in which case a value 1 is implied. Trailing whitespace
# is discarded (iff $trim_trailing_space_in_lookup_result_fields),
# empty lines (containing only whitespace or comment) are ignored.
# Addresses (lefthand-side) are converted from RFC 5321 -quoted form
# into internal (raw) form and inserted as keys into a given hash.
# International domain names (IDN) in UTF-8 are encoded to ASCII.
# NOTE: the format is partly compatible with Postfix maps (not aliases):
#   no continuation lines are honoured, Postfix maps do not allow
#   RFC 5321 -quoted addresses containing whitespace, Postfix only allows
#   comments starting at the beginning of a line.
#
# The $hashref argument is returned for convenience, so that one can do
# for example:
#   $per_recip_whitelist_sender_lookup_tables = {
#     '.my1.example.com' => read_hash({},'/var/amavis/my1-example-com.wl'),
#     '.my2.example.com' => read_hash({},'/var/amavis/my2-example-com.wl') }
# or even simpler:
#   $per_recip_whitelist_sender_lookup_tables = {
#     '.my1.example.com' => read_hash('/var/amavis/my1-example-com.wl'),
#     '.my2.example.com' => read_hash('/var/amavis/my2-example-com.wl') }
#
sub read_hash(@) {
  unshift(@_,{})  if !ref $_[0];  # first argument is optional, defaults to {}
  my($hashref, $filename, $keep_case) = @_;
  my $lpcs = c('localpart_is_case_sensitive');
  my $inp = IO::File->new;
  $inp->open($filename,'<') or die "Can't open file $filename for reading: $!";
  my $ln;
  for ($! = 0; defined($ln=$inp->getline); $! = 0) {
    chomp($ln);
    # carefully handle comments, '#' within "" does not count as a comment
    my $lhs = ''; my $rhs = ''; my $at_rhs = 0; my $trailing_comment = 0;
    for my $t ( $ln =~ /\G ( " (?: \\. | [^"\\] )* " |
                             [^#" \t]+ | [ \t]+ | . )/xgs) {
      if ($t eq '#') { $trailing_comment = 1; last }
      if (!$at_rhs && $t =~ /^[ \t]+\z/) { $at_rhs = 1 }
      else { ($at_rhs ? $rhs : $lhs) .= $t }
    }
    $rhs =~ s/[ \t]+\z//  if $trailing_comment ||
                             $trim_trailing_space_in_lookup_result_fields;
    next  if $lhs eq '' && $rhs eq '';
    my($source_route, $localpart, $domain) =
      Amavis::rfc2821_2822_Tools::parse_quoted_rfc2821($lhs,1);
    $localpart = lc($localpart)  if !$lpcs;
    my $addr = $localpart . idn_to_ascii($domain);
    $hashref->{$addr} = $rhs eq '' ? 1 : $rhs;
    # do_log(5, 'read_hash: address: <%s>: %s', $addr, $hashref->{$addr});
  }
  defined $ln || $! == 0  or   # returning EBADF at EOF is a perl bug
    $! == EBADF ? do_log(0,'Error reading from %s: %s', $filename,$!)
                : die "Error reading from $filename: $!";
  $inp->close or die "Error closing $filename: $!";
  $hashref;
}

sub read_array(@) {
  unshift(@_,[])  if !ref $_[0];  # first argument is optional, defaults to []
  my($arrref, $filename, $keep_case) = @_;
  my $inp = IO::File->new;
  $inp->open($filename,'<') or die "Can't open file $filename for reading: $!";
  my $ln;
  for ($! = 0; defined($ln=$inp->getline); $! = 0) {
    chomp($ln); my $lhs = '';
    # carefully handle comments, '#' within "" does not count as a comment
    for my $t ( $ln =~ /\G ( " (?: \\. | [^"\\] )* " |
                             [^#" \t]+ | [ \t]+ | . )/xgs) {
      last  if $t eq '#';
      $lhs .= $t;
    }
    $lhs =~ s/[ \t]+\z//;  # trim trailing whitespace
    push(@$arrref, Amavis::rfc2821_2822_Tools::unquote_rfc2821_local($lhs))
      if $lhs ne '';
  }
  defined $ln || $! == 0  or   # returning EBADF at EOF is a perl bug
    $! == EBADF ? do_log(0,'Error reading from %s: %s', $filename,$!)
                : die "Error reading from $filename: $!";
  $inp->close or die "Error closing $filename: $!";
  $arrref;
}

# The read_cidr() reads a Postfix style CIDR file, (see cidr_table(5) man
# page), with postfix-style interpretation of comments and line continuations,
# returning a ref to an array or a ref to a hash (associative array ref).
#
# Empty or whitespace-only lines are ignored, as are lines whose first
# non-whitespace character is a '#'. A logical line starts with non-whitespace
# text. A line that starts with whitespace continues a logical line.
# The general form is:  network_address/network_mask  result
# where 'network_address' is an IPv4 address in a dot-quad form, or an IPv6
# address optionally enclosed in square brackets. The 'network_mask' along
# with a preceding slash is optional, as is the 'result' argument.
#
# If a network mask is omitted, a host address (not a network address)
# is assumed (i.e. a mask defaults to /32 for an IPv4 address, and
# to /128 for an IPv6 address).
#
# The read_cidr() returns a ref to an array or a ref to an hash (associative
# array) of network specifications, directly suitable for use as a lookup
# table in @client_ipaddr_policy and @mynetworks_maps, or for copying the
# array into @inet_acl or @mynetworks.
#
# When returned as an array the 'result' arguments are ignored, just the
# presence of a network specification matters. A '!' may precede the network
# specification, which will be interpreted as by lookup_ip_acl() as a negation,
# i.e. a match on such entry will return a false.
#
# When returned as a hash, the network specification is lowercased and used
# as a key, and the 'result' is stored as a value of a hash entry. A missing
# 'result' is replaced by 1.
#
# See also the lookup_ip_acl() for details on allowed IP address syntax
# and on the interpretation of array and hash type IP lookup tables.
#
sub read_cidr($;$) {
  my($filename, $result) = @_;
  # the $result arg may be a ref to an existing array or hash, in which case
  # data will be added there - either as key/value pairs, or as array elements;
  $result = [] if !defined $result;  # missing $results arg yields an array
  my $have_arry = ref $result eq 'ARRAY';
  my $inp = IO::File->new;
  $inp->open($filename,'<') or die "Can't open file $filename for reading: $!";
  my($ln, $curr_line);
  for ($! = 0; defined($ln=$inp->getline); $! = 0) {
    next  if $ln =~ /^ [ \t]* (?: \# | $ )/xs;
    chomp($ln);
    if ($ln =~ /^[ \t]/) {  # a continuation line
      $curr_line = ''  if !defined $curr_line;  # first line a continuation??
      $curr_line .= $ln;
    } else {  # a new logical line starts
      if (defined $curr_line) {  # deal with the previous logical line
        my($key,$val) = split(' ',$curr_line,2);
        # $val is always defined, it is an empty string if missing
        if ($have_arry) { push(@$result,$key) }
        else { $result->{lc $key} = $val eq '' ? 1 : $val }
      }
      $curr_line = $ln;
    }
  }
  if (defined $curr_line) {  # deal with the last logical line
    my($key,$val) = split(' ',$curr_line,2);
    if ($have_arry) { push(@$result,$key) }
    else { $result->{lc $key} = $val eq '' ? 1 : $val }
  }
  defined $ln || $! == 0  or   # returning EBADF at EOF is a perl bug
    $! == EBADF ? do_log(0,'Error reading from %s: %s', $filename,$!)
                : die "Error reading from $filename: $!";
  $inp->close or die "Error closing $filename: $!";
  $result;
}

sub dump_hash($) {
  my $hr = $_[0];
  do_log(0, 'dump_hash: %s => %s', $_, $hr->{$_})  for (sort keys %$hr);
}

sub dump_array($) {
  my $ar = $_[0];
  do_log(0, 'dump_array: %s', $_)  for @$ar;
}

# use Devel::Symdump;
# sub dump_subs() {
#   my $obj = Devel::Symdump->rnew;
#   # list of all subroutine names and their memory addresses
#   my @a = map([$_, \&$_], $obj->functions, $obj->scalars,
#                           $obj->arrays, $obj->hashes);
#   open(SUBLIST, ">/tmp/1.log") or die "Can't create a file: $!";
#   for my $s (sort { $a->[1] <=> $b->[1] } @a) {  # sorted by memory address
#     printf SUBLIST ("%s %s\n", $s->[1], $s->[0]);
#   }
#   close(SUBLIST) or die "Can't close a file: $!";
# }

# (deprecated, only still used with Amavis::OS_Fingerprint)
sub dynamic_destination($$) {
  my($method,$conn) = @_;
  if ($method =~ /^(?:[a-z][a-z0-9.+-]*)?:/si) {
    my(@list); $list[0] = ''; my $j = 0;
    for ($method =~ /\G \[ (?: \\. | [^\]\\] )* \] | " (?: \\. | [^"\\] )* "
                        | : | [ \t]+ | [^:"\[ \t]+ | . /xgs) {  # real parsing
      if ($_ eq ':') { $list[++$j] = '' } else { $list[$j] .= $_ }
    };
    if ($list[1] =~ m{^/}) {
      # presumably the second field is a Unix socket name, keep unchanged
    } else {
      my $new_method; my($proto,$relayhost,$relayport) = @list;
      if ($relayhost eq '*') {
        my $client_ip;  $client_ip = $conn->client_ip if $conn;
        $relayhost = "[$client_ip]"  if defined $client_ip && $client_ip ne '';
      }
      if ($relayport eq '*') {
        my $socket_port;  $socket_port = $conn->socket_port if $conn;
        $relayport = $socket_port + 1
          if defined $socket_port && $socket_port ne '';
      }
      if ($relayhost eq '*' || $relayport eq '*') {
        do_log(0,'dynamic destination expected, no client addr/port info: %s',
                  $method);
      }
      $list[1] = $relayhost;  $list[2] = $relayport;
      $new_method = join(':',@list);
      if ($new_method ne $method) {
        do_log(3, 'dynamic destination: %s -> %s', $method,$new_method);
        $method = $new_method;
      }
    }
  }
  $method;
}

# collect unfinished recipients matching a $filter sub and a delivery
# method regexp;  assumes all list elements of a delivery_method list
# use the same protocol name, hence only the first one is inspected
#
sub collect_equal_delivery_recips($$$) {
  my($msginfo, $filter, $deliv_meth_regexp) = @_;
  my(@per_recip_data_subset, $proto_sockname);

  my(@per_recip_data) =
    grep(!$_->recip_done && (!$filter || &$filter($_)) &&
         grep(/$deliv_meth_regexp/,
              (ref $_->delivery_method ? $_->delivery_method->[0]
                                       : $_->delivery_method)),
         @{$msginfo->per_recip_data});
  if (@per_recip_data) {
    # take the first remaining recipient as a model
    $proto_sockname = $per_recip_data[0]->delivery_method;
    defined $proto_sockname  or die "undefined recipient's delivery_method";
    my $proto_sockname_key = !ref $proto_sockname ? $proto_sockname
                                                : join("\n", @$proto_sockname);
    # collect recipients with the same delivery method as the first one
    $per_recip_data_subset[0] = shift(@per_recip_data);  # always equals self
    push(@per_recip_data_subset,
         grep((ref $_->delivery_method ? join("\n", @{$_->delivery_method})
                                       : $_->delivery_method)
               eq $proto_sockname_key,  @per_recip_data) );
  }
  # return a ref to a filtered list of still-to-be-delivered recipient objects
  # and a single string or a ref to a list of delivery methods common to
  # these recipients
  (\@per_recip_data_subset, $proto_sockname);
}

# get system supplementary groups by username.
# Borrowed from SpamAssassin.
sub get_user_groups {
  my $user = shift;
  return if not defined($user);
  my $gid = (getpwnam($user))[3];
  return if not defined($gid);
  my @gids = $gid;
  while (my($name,$gid,$members) = (getgrent())[0,2,3]) {
    if (grep { $_ eq $user } split(/ /, $members)) {
      push @gids, $gid;
    }
  }
  endgrent;
  return @gids;
}

1;