HEX

File: //usr/share/perl5/vendor_perl/Amavis/Out/EditHeader.pm
# SPDX-License-Identifier: GPL-2.0-or-later

package Amavis::Out::EditHeader;

# Accumulates instructions on what header fields need to be added
# to a header section, which deleted, or how to change existing ones.
# A call to write_header() then performs these edits on the fly.

use strict;
use re 'taint';

BEGIN {
  require Exporter;
  use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
  $VERSION = '2.412';
  @ISA = qw(Exporter);
  @EXPORT_OK = qw(&hdr);
}
use Errno qw(EBADF);
use Encode ();
use MIME::Words;

use Amavis::Conf qw(:platform c cr ca);
use Amavis::rfc2821_2822_Tools qw(wrap_string);
use Amavis::Timing qw(section_time);
use Amavis::Util qw(ll do_log min max q_encode
                    safe_encode safe_encode_utf8_inplace);

sub new {
  my $class = $_[0];
  bless { prepend=>[], append=>[], addrcvd=>[], edit=>{} }, $class;
}

sub prepend_header {
  my $self = shift;
  unshift(@{$self->{prepend}}, hdr(@_));
}

sub append_header {
  my $self = shift;
  push(@{$self->{append}}, hdr(@_));
}

sub append_header_above_received {
  my $self = shift;
  push(@{$self->{addrcvd}}, hdr(@_));
}

# now a synonym for append_header_above_received()  (old semantics: prepend
# or append, depending on setting of $append_header_fields_to_bottom)
#
sub add_header {
  my $self = shift;
  push(@{$self->{addrcvd}}, hdr(@_));
}

# delete all header fields with a $field_name
#
sub delete_header {
  my($self, $field_name) = @_;
  $self->{edit}{lc $field_name} = [undef];
}

# all header fields with $field_name will be edited by a supplied subroutine
#
sub edit_header {
  my($self, $field_name, $field_edit_sub) = @_;
  # $field_edit_sub will be called with 2 args: a field name and a field body;
  # It should return a pair consisting of a replacement field body (no field
  # name and no colon, with or without a trailing NL), and a boolean 'verbatim'
  # (false in its absence). An undefined replacement field body indicates a
  # deletion of the entire header field. A value true in the second returned
  # element indicates that a verbatim replacement is desired (i.e. no other
  # changes are allowed on a replacement body such as folding or encoding).
  !defined($field_edit_sub) || ref($field_edit_sub) eq 'CODE'
    or die "edit_header: arg#3 must be undef or a subroutine ref";
  $field_name = lc $field_name;
  if (!exists($self->{edit}{$field_name})) {
    $self->{edit}{$field_name} = [$field_edit_sub];
  } else {
    do_log(5, "INFO: multiple header edits: %s", $field_name);
    push(@{$self->{edit}{$field_name}}, $field_edit_sub);
  }
}

# copy all header edits from another header-edits object into this one
#
sub inherit_header_edits($$) {
  my($self, $other_edits) = @_;
  if (defined $other_edits) {
    for (qw(prepend addrcvd append)) {
      unshift(@{$self->{$_}}, @{$other_edits->{$_}})  if $other_edits->{$_};
    }
    my $o_edit = $other_edits->{edit};
    if ($o_edit) {
      for my $fn (keys %$o_edit) {
        if (!exists($self->{edit}{$fn})) {
          $self->{edit}{$fn} = [ @{$o_edit->{$fn}} ];  # copy list
        } else {
          unshift(@{$self->{edit}{$fn}}, @{$o_edit->{$fn}});
        }
      }
    }
  }
}

# Conditioning of a header field to be added.
# Insert space after colon if not present, RFC 2047 -encode if field body
# contains non-ASCII characters, fold long lines if needed, prepend space
# before each NL if missing, append NL if missing. Header lines with only
# spaces are not allowed. (RFC 5322: Each line of characters MUST be no more
# than 998 octets(!) (RFC 6532), and SHOULD be no more than 78 characters(!)
# (RFC 6532), excluding the CRLF). $structured==0 indicates an unstructured
# header field, folding may be inserted at any existing whitespace character
# position; $structured==1 indicates that folding is only allowed at positions
# indicated by \n in the provided header body, original \n will be removed.
# With $structured==2 folding is preserved, wrapping step is skipped.
#
sub hdr {
  my($field_name, $field_body, $structured, $wrap_char, $smtputf8) = @_;
  safe_encode_utf8_inplace($field_name);  # to octets (if not already)
  $field_name =~ tr/\x21-\x39\x3B-\x7E/?/c;  # printable ASCII except ':'
  my $field_body_is_utf8 = utf8::is_utf8($field_body);
  local($1);
  if ($field_body !~ tr/\x00-\x7F//c) {  # is all-ASCII
    # no encoding necessary, just clear the utf8 flag if set
    if ($field_body_is_utf8) {
      do_log(5,'header encoded (utf8:Y) (all-ASCII): %s: %s',
               $field_name, $field_body);
      safe_encode_utf8_inplace($field_body);  # to octets (if not already)
    } else {
      do_log(5,'header encoded (all-ASCII): %s: %s', $field_name, $field_body);
    }
  } elsif ($smtputf8) {  # UTF-8 in header field bodies is allowed
    safe_encode_utf8_inplace($field_body)  if $field_body_is_utf8;
    ll(5) && do_log(5,'header encoded (utf8:%s) to UTF-8 (SMTPUTF8): %s: %s',
                      $field_body_is_utf8?'Y':'N', $field_name, $field_body);
  } elsif ($field_name =~ /^(?: Subject | Comments |
                            (?:Resent-)? (?: From|Sender|To|Cc ) )\z/six &&
           $field_body !~ /^[\t\n\x20-\x7F]*\z/  # but printable or HT or LF
       # consider also:  | X- (?! Envelope- (?:From|To)\z )
  ) {  # encode according to RFC 2047
    # actually RFC 2047 also allows encoded-words in rfc822 extension
    # message header fields (now: optional header fields), within comments
    # in structured header fields, or within 'phrase' (e.g. in From, To, Cc);
    # we are being sloppy here!
    $field_body =~ s/\n(?=[ \t])//gs;  # unfold
    chomp($field_body);
    my $chset = c('hdr_encoding');
    my $field_body_octets = safe_encode($chset, $field_body);
    ll(5) && do_log(5,'header encoded (utf8:%s) to %s, %s: %s -> %s',
                      $field_body_is_utf8?'Y':'N', $chset,
                      $field_name, $field_body, $field_body_octets);
    my $qb = c('hdr_encoding_qb');
    my $encoder_func = uc $qb eq 'Q' ? \&q_encode
                                     : \&MIME::Words::encode_mimeword;
    $field_body = join("\n", map { /^[\001-\011\013\014\016-\177]*\z/ ? $_
                                     : &$encoder_func($_,$qb,$chset) }
                                 split(/\n/, $field_body_octets, -1));
  } else {  # should have been all-ASCII, or UTF-8 with SMTPUTF8 - but anyway:
    safe_encode_utf8_inplace($field_body)  if $field_body_is_utf8;
    ll(5) && do_log(5,'header encoded (utf8:%s) to UTF-8: %s: %s',
                      $field_body_is_utf8?'Y':'N', $field_name, $field_body);
  }
  my $str = $field_name . ':';
  $str .= ' '  if $field_body =~ /^[^ \t]/;  # insert space, looks nicer
  $str .= $field_body;
  if ($structured == 2) {  # already folded, keep it that way, sanitize
    1 while $str =~ s/^([ \t]*)\n/$1/;  # prefixed by whitespace lines?
    $str =~ s/\n(?=[ \t]*(\n|\z))//g;   # whitespace lines within or at end
    $str =~ s/\n(?![ \t])/\n /g;  # insert a space at line folds if missing
  } else {
    $str = wrap_string($str, 78, '', $wrap_char, $structured
                      )  if $structured==1 || length($str) > 78;
  }
  if (length($str) > 998) {
    my(@lines) = split(/\n/,$str);  my $trunc = 0;
    for (@lines) {
      if (length($_) > 998) { substr($_,998-3) = '...'; $trunc = 1 }
    }
    if ($trunc) {
      do_log(0, "INFO: truncating long header field (len=%d): %s[...]",
             length($str), substr($str,0,100) );
      $str = join("\n",@lines);
    }
  }
  $str =~ s{\n*\z}{\n}s;  # ensure a single final NL
  ll(5) && do_log(5, 'header: %s', $str);
  $str;
}

# Copy mail header section to the supplied method while adding, removing,
# or changing certain header fields as required, and append an empty line
# (header/body separator). Returns a number of original 'Received:'
# header fields to make a simple loop detection possible (as required
# by RFC 5321 (ex RFC 2821) section 6.3).
# Leaves input file positioned at the beginning of a body.
#
sub write_header($$$$) {
  my($self, $msginfo, $out_fh, $noninitial_submission) = @_;
  my $received_cnt = 0;
  my($fix_whitespace_lines, $fix_long_header_lines, $fix_bare_cr) = (0,0,0);
  if ($noninitial_submission && c('allow_fixing_improper_header')) {
    $fix_bare_cr = 1;
    $fix_long_header_lines = 1  if c('allow_fixing_long_header_lines');
    $fix_whitespace_lines  = 1  if c('allow_fixing_improper_header_folding');
  }
  my(@header); my $pos = 0; my $header_in_array = 0;
  my $msg = $msginfo->mail_text;
  my $msg_str_ref = $msginfo->mail_text_str;  # have an in-memory copy?
  $msg = $msg_str_ref  if ref $msg_str_ref;
  if (!defined $msg) {
    # empty mail
    $header_in_array = 1;
  } elsif (ref $msg eq 'SCALAR') {
    $header_in_array = 1;
    $pos = min($msginfo->skip_bytes, length($$msg));
    if ($pos >= length($$msg)) {  # empty message
      $pos = length($$msg);
    } elsif (substr($$msg,$pos,1) eq "\n") {  # empty header section
      $pos++;
    } else {
      my $ind = index($$msg, "\n\n", $pos);  # find header/body separator
      if ($ind < 0) {  # no body
        @header = split(/^/m, substr($$msg, $pos));
        $pos = length($$msg);
      } else {  # normal, nonempty header section and nonempty body
        @header = split(/^/m, substr($$msg, $pos, $ind+1-$pos));
        $pos = $ind+2;
      }
    }
    # $pos now points to the first byte of a body
  } elsif ($msg->isa('MIME::Entity')) {
    $header_in_array = 1;
    $fix_whitespace_lines = 1;  # fix MIME::Entity artifacts
    @header = @{$msg->header};
  } else {  # a file handle assumed
    $pos = $msginfo->skip_bytes;
    $msg->seek($pos,0)  or die "Can't rewind mail file: $!";
  }
  ll(5) && do_log(5, 'write_header: %s, %s', $header_in_array, $out_fh);
  # preallocate some storage
  my $str = ''; vec($str,8192,8) = 0; $str = '';
  $str .= $_  for @{$self->{prepend}};
  $str .= $_  for @{$self->{addrcvd}};
  my($ill_white_cnt, $ill_long_cnt, $ill_bare_cr) = (0,0,0);
  local($1,$2); my $curr_head; my $next_head; my $eof = 0;
  for (;;) {
    if ($eof) {
      $next_head = "\n";  # fake a missing header/body separator line
    } elsif ($header_in_array) {
      for (;;) {  # get next nonempty line or eof
        if (!@header) { $eof = 1; $next_head = "\n"; last }
        $next_head = shift @header;
        # ensure NL at end, faster than m/\n\z/
        $next_head .= "\n"  if substr($next_head,-1,1) ne "\n";
        last  if !$fix_whitespace_lines || $next_head !~ /^[ \t]*\n\z/s;
        $ill_white_cnt++;
      }
    } else {
      $! = 0; $next_head = $msg->getline;
      if (defined $next_head) {
        $pos += length($next_head);
      } else {
        $eof = 1; $next_head = "\n";
        $! == 0  or                # returning EBADF at EOF is a perl bug
          $! == EBADF ? do_log(0,"Error reading mail header section: $!")
                      : die "Error reading mail header section: $!";
      }
    }
    if ($next_head =~ /^[ \t]/) {
      $curr_head .= $next_head;  # folded
    } else {  # new header field
      if (!defined($curr_head)) {
        # no previous complete header field (we are at the first hdr field)
      } elsif ($curr_head !~ /^([!-9;-\176]+)[ \t]*:(.*)\z/s) {  # parse
        # invalid header field, but we'll write it anyway
      } else {  # count, edit, or delete
        # obsolete RFC 822 syntax allowed whitespace before colon
        my($field_name, $field_body) = ($1, $2);
        my $field_name_lc = lc $field_name;
        $received_cnt++  if $field_name_lc eq 'received';
        if (exists($self->{edit}{$field_name_lc})) {
          chomp($field_body);
          ### $field_body =~ s/\n(?=[ \t])//gs;  # unfold
          my $edit = $self->{edit}{$field_name_lc};  # listref of edits
          for my $e (@$edit) {  # possibly multiple (iterative) edits
            my($new_fbody,$verbatim);
            ($new_fbody,$verbatim) =
              &$e($field_name,$field_body)  if defined $e;
            if (!defined($new_fbody)) {
              ll(5) && do_log(5, 'deleted: %s:%s', $field_name, $field_body);
              $curr_head = undef; last;
            }
            $curr_head = $verbatim ? ($field_name . ':' . $new_fbody)
                                   : hdr($field_name, $new_fbody, 0, undef,
                                         $msginfo->smtputf8);
            chomp($curr_head); $curr_head .= "\n";
            $curr_head =~ /^([!-9;-\176]+)[ \t]*:(.*)\z/s;
            $field_body = $2; chomp($field_body);  # carry to next iteration
          }
        }
      }
      if (defined $curr_head) {
        if ($fix_bare_cr) { # sanitize header sect. by removing CR characters
          $curr_head =~ tr/\r//d  and $ill_bare_cr++;
        }
        if ($fix_whitespace_lines) {  # unfold illegal all-whitespace lines
          $curr_head =~ s/\n(?=[ \t]*\n)//g  and $ill_white_cnt++;
        }
        if ($fix_long_header_lines) {  # truncate long header lines to 998 ch
          $curr_head =~ s{^(.{995}).{4,}$}{$1...}gm  and $ill_long_cnt++;
        }
        # use buffering to reduce number of calls to datasend()
        if (length($str) > 16384) {
          $out_fh->print($str) or die "sending mail header: $!";
          $str = '';
        }
        $str .= $curr_head;
      }
      last  if $next_head eq "\n";   # header/body separator
      last  if substr($next_head,0,2) eq '--';  # mime sep. (missing h/b sep.)
      $curr_head = $next_head;
    }
  }
  do_log(0, "INFO: unfolded %d illegal all-whitespace ".
            "continuation lines", $ill_white_cnt)  if $ill_white_cnt;
  do_log(0, "INFO: truncated %d header line(s) longer than 998 characters",
            $ill_long_cnt)  if $ill_long_cnt;
  do_log(0, "INFO: removed bare CR from %d header line(s)",
            $ill_bare_cr)  if $ill_bare_cr;
  $str .= $_  for @{$self->{append}};
  $str .= "\n";  # end of header section - a separator line
  $out_fh->print($str) or die "sending mail header final: $!";
  section_time('write-header');
  ($received_cnt, $pos);
}
1;