File: //usr/share/perl5/vendor_perl/Amavis/Unpackers/MIME.pm
# SPDX-License-Identifier: GPL-2.0-or-later
package Amavis::Unpackers::MIME;
use strict;
use re 'taint';
BEGIN {
require Exporter;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
$VERSION = '2.412';
@ISA = qw(Exporter);
@EXPORT_OK = qw(&mime_decode);
}
use subs @EXPORT_OK;
use Errno qw(ENOENT EACCES);
use IO::File qw(O_RDONLY O_WRONLY O_CREAT O_EXCL);
use MIME::Parser;
use MIME::Words;
use Digest::MD5;
use Digest::SHA;
# use Scalar::Util qw(tainted);
use Amavis::Conf qw(:platform c cr ca $TEMPBASE $MAXFILES);
use Amavis::Timing qw(section_time);
use Amavis::Util qw(snmp_count untaint ll do_log
safe_decode safe_decode_latin1
safe_encode safe_encode_utf8_inplace);
use Amavis::Unpackers::NewFilename qw(consumed_bytes);
use Amavis::Unpackers::OurFiler;
use Amavis::Unpackers::Part;
# save MIME preamble and epilogue (if nontrivial) as extra (pseudo)parts
#
sub mime_decode_pre_epi($$$$$) {
my($pe_name, $pe_lines, $tempdir, $parent_obj, $placement) = @_;
if (defined $pe_lines && @$pe_lines) {
do_log(5, "mime_decode_%s: %d lines", $pe_name, scalar(@$pe_lines));
if (@$pe_lines > 5 || "@$pe_lines" !~ m{^[A-Za-z0-9/\@:;,. \t\n_-]*\z}s) {
my $newpart_obj =
Amavis::Unpackers::Part->new("$tempdir/parts",$parent_obj,1);
$newpart_obj->mime_placement($placement);
$newpart_obj->name_declared($pe_name);
my $newpart = $newpart_obj->full_name;
my $outpart = IO::File->new;
# O_WRONLY etc. can become tainted in Perl5.8.9 [perlbug #62502]
$outpart->open($newpart, untaint(O_CREAT|O_EXCL|O_WRONLY), 0640)
or die "Can't create $pe_name file $newpart: $!";
binmode($outpart,':bytes') or die "Can't cancel :utf8 mode: $!";
my $len;
for (@$pe_lines) {
$outpart->print($_) or die "Can't write $pe_name to $newpart: $!";
$len += length($_);
}
$outpart->close or die "Error closing $pe_name $newpart: $!";
$newpart_obj->size($len);
consumed_bytes($len, "mime_decode_$pe_name", 0, 1);
}
}
}
sub ambiguous_content {
my $entity = shift;
if ($entity->can('ambiguous_content')) {
return $entity->ambiguous_content;
} else {
return unless $entity->is_multipart;
my $content_type = $entity->head->get('Content-Type');
if ($content_type && $content_type =~ m{^multipart/\w+(.+)}x) {
my ($params, $num) = ($1, 0);
while ($params =~ m{\G ; \s+ (?<param>\w+) = (?: \w+ | "(?:\\.|[^"\\])*" )}gx) {
$num++ if lc($+{param}) eq 'boundary';
}
return $num > 1;
}
return;
}
}
# traverse MIME::Entity object depth-first,
# extracting preambles and epilogues as extra (pseudo)parts, and
# filling-in additional information into Amavis::Unpackers::Part objects
#
sub mime_traverse($$$$$); # prototype
sub mime_traverse($$$$$) {
my($entity, $tempdir, $parent_obj, $depth, $placement) = @_;
mime_decode_pre_epi('preamble', $entity->preamble,
$tempdir, $parent_obj, $placement);
my($mt, $et) = ($entity->mime_type, $entity->effective_type);
my $part; my $head = $entity->head; my $body = $entity->bodyhandle;
if (!defined($body)) { # a MIME container only contains parts, no bodypart
# create pseudo-part objects for MIME containers (e.g. multipart/* )
$part = Amavis::Unpackers::Part->new(undef,$parent_obj,1);
$part->attributes_add('B') if ambiguous_content($entity);
# $part->type_short('no-file');
do_log(2, "%s %s Content-Type: %s", $part->base_name, $placement, $mt);
} else { # does have a body part (i.e. not a MIME container)
# base64 encoding represents line-endings in a canonical CRLF form, so it
# must be converted to a local representation for text parts when decoding;
# RFC 2045 explicitly prohibits encoding CR and LF of a canonical CRLF pair
# in quoted-printable encoding of textual parts, but some mail generating
# software ignores this requirement, so we have to normalize line endings
# (turn CRLF to \n) for both the base64 and the quoted-printable encodings
my $encoding = $head->mime_encoding;
my $normalize_line_endings =
$mt =~ m{^(?:text|message)(?:/|\z)}i &&
($encoding eq 'base64' || $encoding eq 'quoted-printable');
my $digest_ctx; # body-part digester context object, or undef
# choose a message digest: MD5: 128 bits, SHA family: 160..512 bits
# Use SHA1 for SpamAssassin bayes compatibility!
my $digest_algorithm = c('mail_part_digest_algorithm');
if (defined $digest_algorithm) {
$digest_ctx = uc $digest_algorithm eq 'MD5' ? Digest::MD5->new
: Digest::SHA->new($digest_algorithm);
}
my $size;
my $fn = $body->path;
if (!defined $fn) {
# body part resides in memory only
if (!$digest_ctx) {
$size = length($body->as_string);
} else {
my $buff = $body->as_string;
$size = length $buff;
$buff =~ s{\015(?=\012|\z)}{}gs if $normalize_line_endings;
$digest_ctx->add($buff);
}
} else {
# body part resides on a file
my $msg; my $errn = lstat($fn) ? 0 : 0+$!;
if ($errn == ENOENT) { $msg = "does not exist" }
elsif ($errn) { $msg = "is inaccessible: $!" }
elsif (!-r _) { $msg = "is not readable" }
elsif (!-f _) { $msg = "is not a regular file" }
else {
$size = -s _;
if ($size == 0) {
do_log(4,"mime_traverse: file %s is empty", $fn);
} elsif ($digest_ctx) {
my $fh = IO::File->new;
$fh->open($fn,O_RDONLY) # does a sysopen
or die "Can't open file $fn for reading: $!";
$fh->binmode or die "Can't set file $fn to binmode: $!";
my($nbytes,$buff);
while ($nbytes=sysread($fh,$buff,32768)) {
$buff =~ s{\015(?=\012|\z)}{}gs if $normalize_line_endings;
$digest_ctx->add($buff);
}
defined $nbytes or die "Error reading file $fn: $!";
}
}
do_log(-1,"WARN: mime_traverse: file %s %s", $fn,$msg) if defined $msg;
}
consumed_bytes($size, 'mime_decode', 0, 1);
# retrieve Amavis::Unpackers::Part object (if any), stashed into head obj
$part = Amavis::Unpackers::OurFiler::get_amavisd_part($head);
if (defined $part) {
$part->size($size);
if (defined($size) && $size==0) {
$part->type_short('empty'); $part->type_long('empty');
}
my $digest;
if ($digest_ctx) {
$digest = $digest_ctx->hexdigest;
# store as a hex digest, followed by Content-Type
$part->digest($digest . ':' . lc($mt||''));
}
if (ll(2)) { # pretty logging
my $filename = $head->recommended_filename;
$encoding = 'QP' if $encoding eq 'quoted-printable';
do_log(2, "%s %s Content-Type: %s, %s, size: %d%s%s",
$part->base_name, $placement, $mt, $encoding, $size,
defined $digest ? ", $digest_algorithm digest: $digest" : '',
defined $filename ? ", name: $filename" : '');
}
my $old_parent_obj = $part->parent;
if ($parent_obj ne $old_parent_obj) { # reparent if necessary
ll(5) && do_log(5,"reparenting %s from %s to %s", $part->base_name,
$old_parent_obj->base_name, $parent_obj->base_name);
my $ch_ref = $old_parent_obj->children;
$old_parent_obj->children([grep($_ ne $part, @$ch_ref)]);
$ch_ref = $parent_obj->children;
push(@$ch_ref,$part); $parent_obj->children($ch_ref);
$part->parent($parent_obj);
}
}
}
if (defined $part) {
$part->mime_placement($placement);
$part->type_declared($mt eq $et ? $mt : [$mt, $et]);
$part->attributes_add('U','C') if $mt =~ m{/.*encrypted}si ||
$et =~ m{/.*encrypted}si;
my %rn_seen;
my @rn; # recommended file names, both raw and RFC 2047 / RFC 2231 decoded
for my $attr_name ('content-disposition.filename', 'content-type.name') {
my $val_raw = $head->mime_attr($attr_name);
next if !defined $val_raw || $val_raw eq '';
my $val_dec = ''; # decoded, represented as native Perl characters
eval {
my(@chunks) = MIME::Words::decode_mimewords($val_raw);
for my $pair (@chunks) {
my($data,$encoding) = @$pair;
if (!defined $encoding || $encoding eq '') {
$val_dec .= safe_decode_latin1($data); # assumes ISO-8859-1
} else {
$encoding =~ s/\*[^*]*\z//s; # strip RFC 2231 language suffix
$val_dec .= safe_decode($encoding,$data);
}
}
1;
} or do {
do_log(3, "mime_traverse: decoding MIME words failed: %s", $@);
};
if ($val_dec ne '' && !$rn_seen{$val_dec}) {
push(@rn,$val_dec); $rn_seen{$val_dec} = 1;
}
if (!$rn_seen{$val_raw}) {
push(@rn,$val_raw); $rn_seen{$val_raw} = 1;
}
}
$part->name_declared(@rn==1 ? $rn[0] : \@rn) if @rn;
my $val = $head->mime_attr('content-type.report-type');
safe_encode_utf8_inplace($val);
$part->report_type($val) if defined $val && $val ne '';
}
mime_decode_pre_epi('epilogue', $entity->epilogue,
$tempdir, $parent_obj, $placement);
my $item_num = 0;
for my $e ($entity->parts) { # recursive descent
$item_num++;
mime_traverse($e, $tempdir, $part, $depth+1, "$placement/$item_num");
}
}
# Break up mime parts, return a MIME::Entity object
#
sub mime_decode($$$) {
my($msg, $tempdir, $parent_obj) = @_;
# $msg may be an open file handle, or a file name, or a string ref
my $parser = MIME::Parser->new;
# File::Temp->new defaults to /tmp or a current directory, ignoring TMPDIR
$parser->tmp_dir($TEMPBASE) if $parser->UNIVERSAL::can('tmp_dir');
$parser->filer(
Amavis::Unpackers::OurFiler->new("$tempdir/parts", $parent_obj) );
$parser->ignore_errors(1); # also is the default
# if bounce killer is enabled, extract_nested_messages must be off,
# otherwise we lose headers of attached message/rfc822 or message/global
$parser->extract_nested_messages(0);
# $parser->extract_nested_messages("NEST"); # parse embedded message/rfc822
# "NEST" complains with "part did not end with expected boundary" when
# the outer message is message/partial and the inner message is chopped
$parser->extract_uuencode(1); # to enable or not to enable ???
$parser->max_parts($MAXFILES) if defined $MAXFILES && $MAXFILES > 0 &&
$parser->UNIVERSAL::can('max_parts');
snmp_count('OpsDecByMimeParser');
my $entity;
{ local($1,$2,$3,$4,$5,$6); # avoid Perl 5.8.* bug, $1 can get tainted !
if (!defined $msg) {
$entity = $parser->parse_data('');
} elsif (!ref $msg) { # assume $msg is a file name
do_log(4, "Extracting mime components from file %s", $msg);
$entity = $parser->parse_open("$tempdir/parts/$msg");
} elsif (ref $msg eq 'SCALAR') {
do_log(4, "Extracting mime components from a string");
# parse_data() should be avoided with IO::File 1.09 or older:
# it uses a mode '>:' to force a three-argument open(), but a mode
# with a colon is only recognized starting with IO::File 1.10,
# which comes with perl 5.8.1
IO::File->VERSION(1.10); # required minimal version
$entity = $parser->parse_data($msg); # takes a ref to a string
} elsif (ref $msg) { # assume an open file handle
do_log(4, "Extracting mime components from a file");
$msg->seek(0,0) or die "Can't rewind mail file: $!";
$entity = $parser->parse($msg);
}
}
my $mime_err;
my(@mime_errors) = $parser->results->errors; # a list!
if (@mime_errors) {
# $mime_err = $mime_errors[0]; # only show the first error
$mime_err = join('; ',@mime_errors); # show all errors
}
if (defined $mime_err) {
$mime_err=~s/\s+\z//; $mime_err=~s/[ \t\r]*\n+/; /g; $mime_err=~s/\s+/ /g;
substr($mime_err,250) = '[...]' if length($mime_err) > 250;
do_log(1, "WARN: MIME::Parser %s", $mime_err) if $mime_err ne '';
} elsif (!defined($entity)) {
$mime_err = "Unable to parse, perhaps message contains too many parts";
do_log(1, "WARN: MIME::Parser %s", $mime_err);
$entity = '';
}
mime_traverse($entity, $tempdir, $parent_obj, 0, '1') if $entity;
section_time('mime_decode');
($entity, $mime_err);
}
1;