File: //usr/share/perl5/vendor_perl/Amavis/Expand.pm
# SPDX-License-Identifier: GPL-2.0-or-later
package Amavis::Expand;
use strict;
use re 'taint';
BEGIN {
require Exporter;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
$VERSION = '2.412';
@ISA = qw(Exporter);
@EXPORT_OK = qw(&expand &tokenize);
}
use subs @EXPORT_OK;
use Amavis::Util qw(ll do_log);
# Given a string reference and a hashref of predefined (builtin) macros,
# expand() performs a macro expansion and returns a ref to a resulting string.
#
# This is a simple, yet fully fledged macro processor with proper lexical
# analysis, call stack, quoting levels, user supplied and builtin macros,
# three builtin flow-control macros: selector, regexp selector and iterator,
# a macro-defining macro and a macro '#' that eats input to the next newline.
# Also recognized are the usual \c and \nnn forms for specifying special
# characters, where c can be any of: r, n, f, b, e, a, t.
# Details are described in file README.customize, practical examples of use
# are in the supplied notification messages;
# Author: Mark Martinec <Mark.Martinec@ijs.si>, 2002, 2006
use vars qw(%builtins_cached %lexmap %esc);
use vars qw($lx_lb $lx_lbS $lx_lbT $lx_lbA $lx_lbC $lx_lbE $lx_lbQQ
$lx_rbQQ $lx_rb $lx_sep $lx_h $lx_ph);
BEGIN {
no warnings 'qw'; # avoid "Possible attempt to put comments in qw()"
my(@lx_str) = qw( [ [? [~ [@ [: [= [" "] ] | # %#
%0 %1 %2 %3 %4 %5 %6 %7 %8 %9); # lexical elem.
# %lexmap maps string to reference in order to protect lexels
$lexmap{$_} = \$_ for @lx_str; # maps lexel strings to references
($lx_lb, $lx_lbS, $lx_lbT, $lx_lbA, $lx_lbC, $lx_lbE, $lx_lbQQ, $lx_rbQQ,
$lx_rb, $lx_sep, $lx_h, $lx_ph) = map($lexmap{$_}, @lx_str);
%esc = (n => \"\n", r => "\r", f => "\f", b => "\b",
e => "\e", a => "\a", t => "\t");
# NOTE that \n is specific, it is represented by a ref to a newline and not
# by a newline itself; this makes it possible for a macro '#' to skip input
# to a true newline from source, making it possible to comment-out entire
# lines even if they contain "\n" tokens
1;
}
# make an object out of the supplied list of tokens
sub newmacro { my $class = shift; bless [@_], $class }
# turn a ref to a list of tokens into a single plain string
sub tokens_list_to_str($) { join('', map(ref($_) ? $$_ : $_, @{$_[0]})) }
sub tokenize($;$) {
my($str_ref,$tokens_ref) = @_; local($1);
$tokens_ref = [] if !defined $tokens_ref;
# parse lexically, replacing lexical element strings with references,
# unquoting backslash-quoted characters and %%, and dropping \NL and \_
@$tokens_ref = map {
exists $lexmap{$_} ? $lexmap{$_} # replace with ref
: $_ eq "\\\n" || $_ eq "\\_" ? '' # drop \NEWLINE and \_
: $_ eq '%%' ? '%' # %% -> %
: /^(%\#?.)\z/s ? \"$1" # unknown builtins
: /^\\([0-7]{1,3})\z/ ? chr(oct($1)) # \nnn
: /^\\(.)\z/s ? (exists($esc{$1}) ? $esc{$1} : $1) # \r, \n, \f, ...
: /^(_ [A-Z]+ (?: \( [^)]* \) )? _)\z/xs ? \"$1" # SpamAssassin-compatible
: $_ }
$$str_ref =~ /\G \# | \[ [?~\@:="]? | "\] | \] | \| | % \#? . | \\ [^0-7] |
\\ [0-7]{1,3} | _ [A-Z]+ (?: \( [^)]* \) )? _ |
[^\[\]\\|%\n#"_]+ | [^\n]+? | \n /xgs;
$tokens_ref;
}
sub evalmacro($$;@) {
my($macro_type,$builtins_href,@args) = @_;
my @result; local($1,$2);
if ($macro_type == $lx_lbS) { # selector built-in macro
my $sel = tokens_list_to_str(shift(@args));
if ($sel eq '') { $sel = 0 } # quick
elsif ($sel =~ /^\s*\z/) { $sel = 0 }
elsif ($sel =~ /^\s*(\d+)\s*\z/) { $sel = 0+$1 } # decimal to numeric
else { $sel = 1 }
# provide an empty second alternative if we only have one specified
if (@args < 2) {} # keep $sel beyond $#args
elsif ($sel > $#args) { $sel = $#args } # use last alternative
@result = @{$args[$sel]} if $sel >= 0 && $sel <= $#args;
} elsif ($macro_type == $lx_lbT) { # regexp built-in macro
# args: string, regexp1, then1, regexp2, then2, ... regexpN, thenN[, else]
my $str = tokens_list_to_str(shift(@args)); # collect the first argument
my($match,@repl);
while (@args >= 2) { # at least a regexp and a 'then' argument still there
@repl = ();
my $regexp = tokens_list_to_str(shift(@args)); # collect a regexp arg
if ($regexp eq '') {
# braindamaged Perl: empty string implies the last successfully
# matched regular expression; we must avoid this
$match = 1;
} else {
eval { # guard against invalid regular expression
local($1,$2,$3,$4,$5,$6,$7,$8,$9);
$match = $str=~/$regexp/ ? 1 : 0;
@repl = ($1,$2,$3,$4,$5,$6,$7,$8,$9) if $match;
1;
} or do {
my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
die $eval_stat if $eval_stat =~ /^timed out\b/; # resignal timeout
do_log(2,"invalid macro regexp arg: %s", $eval_stat);
$match = 0; @repl = ();
};
}
if ($match) { last } else { shift(@args) } # skip 'then' arg if no match
}
if (@args > 0) {
unshift(@repl,$str); # prepend the whole string as a %0
# formal arg lexels %0, %1, ... %9 are replaced by captured substrings
@result = map(!ref || $$_!~/^%([0-9])\z/ ? $_ : $repl[$1], @{$args[0]});
}
} elsif ($macro_type == $lx_lb) { # iterator macro
my($cvar_r,$sep_r,$body_r); my $cvar; # give meaning to arguments
if (@args >= 3) { ($cvar_r,$body_r,$sep_r) = @args }
else { ($body_r,$sep_r) = @args; $cvar_r = $body_r }
# find the iterator name
for (@$cvar_r) { if (ref && $$_ =~ /^%(.)\z/s) { $cvar = $1; last } }
my $name = $cvar; # macro name is usually the same as the iterator name
if (@args >= 3 && !defined($name)) {
# instead of iterator like %x, the first arg may be a long macro name,
# in which case the iterator name becomes a hard-wired 'x'
$name = tokens_list_to_str($cvar_r);
$name =~ s/^[ \t\n]+//; $name =~ s/[ \t\n]+\z//; # trim whitespace
if ($name eq '') { $name = undef } else { $cvar = 'x' }
}
if (exists($builtins_href->{$name})) {
my $s = $builtins_href->{$name};
if (UNIVERSAL::isa($s,'Amavis::Expand')) { # dynamically defined macro
my(@margs) = ($name); # no arguments beyond %0
my(@res) = map(!ref || $$_ !~ /^%([0-9])\z/ ? $_
: ref($margs[$1]) ? @{$margs[$1]} : (), @$s);
$s = tokens_list_to_str(\@res);
} elsif (ref($s) eq 'CODE') {
if (exists($builtins_cached{$name})) {
$s = $builtins_cached{$name};
} else {
while (ref($s) eq 'CODE') { $s = &$s($name) }
$builtins_cached{$name} = $s;
}
}
my $ind = 0;
for my $val (ref($s) ? @$s : $s) { # do substitutions in the body
push(@result, @$sep_r) if ++$ind > 1 && ref($sep_r);
push(@result, map(ref && $$_ eq "%$cvar" ? $val : $_, @$body_r));
}
}
} elsif ($macro_type == $lx_lbE) { # define a new macro
my $name = tokens_list_to_str(shift(@args)); # first arg is a macro name
$name =~ s/^[ \t\n]+//; $name =~ s/[ \t\n]+\z//; # trim whitespace on name
delete $builtins_cached{$name};
$builtins_href->{$name} = Amavis::Expand->newmacro(@{$args[0]});
} elsif ($macro_type == $lx_lbA || $macro_type == $lx_lbC || # macro call
$$macro_type =~ /^%(\#)?(.)\z/s) {
my $name; my $cardinality_only = 0;
if ($macro_type == $lx_lbA || $macro_type == $lx_lbC) {
$name = tokens_list_to_str($args[0]); # arg %0 is a macro name
$name =~ s/^[ \t\n]+//; $name =~ s/[ \t\n]+\z//; # trim whitespace
} else { # simple macro call %x or %#x
$name = $2;
$cardinality_only = 1 if defined $1;
}
my $s = $builtins_href->{$name};
if (!ref($s)) { # macro expands to a plain string
if (!$cardinality_only) { @result = $s }
else { @result = $s !~ /^\s*\z/ ? 1 : 0 }; # %#x => nonwhite=1, other 0
} elsif (UNIVERSAL::isa($s,'Amavis::Expand')) { # dynamically defined macro
$args[0] = $name; # replace name with a stringified and trimmed form
# expanding a dynamically-defined macro produces a list of tokens;
# formal argument lexels %0, %1, ... %9 are replaced by actual arguments
@result = map(!ref || $$_ !~ /^%([0-9])\z/ ? $_
: ref($args[$1]) ? @{$args[$1]} : (), @$s);
if ($cardinality_only) { # macro call form %#x
@result = tokens_list_to_str(\@result) !~ /^\s*\z/ ? 1 : 0;
}
} else { # subroutine or array ref
if (ref($s) eq 'CODE') {
if (exists($builtins_cached{$name}) && @args <= 1) {
$s = $builtins_cached{$name};
} elsif (@args <= 1) {
while (ref($s) eq 'CODE') { $s = &$s($name) } # callback
$builtins_cached{$name} = $s;
} else {
shift(@args); # discard original form of a macro name
while (ref($s) eq 'CODE') # subroutine callback
{ $s = &$s($name, map(tokens_list_to_str($_), @args)) }
}
}
if ($cardinality_only) { # macro call form %#x
# for array: number of elements; for scalar: nonwhite=1, other 0
@result = ref($s) ? scalar(@$s) : $s !~ /^\s*\z/ ? 1 : 0;
} else { # macro call %x evaluates to the value of macro x
@result = ref($s) ? join(', ',@$s) : $s;
}
}
}
\@result;
}
sub expand($$) {
my($str_ref,$builtins_href) = @_;
# $str_ref ... a ref to a source string to be macro expanded;
# $builtins_href ... a hashref, mapping builtin macro names
# to macro values: strings or array refs
my(@tokens);
if (ref($str_ref) eq 'ARRAY') { @tokens = @$str_ref }
else { tokenize($str_ref,\@tokens) }
my $call_level = 0; my $quote_level = 0;
my(@arg); # stack of arguments lists to nested calls, [0] is top of stack
my(@macro_type); # call stack of macro types (leading lexels) of nested calls
my(@implied_q); # call stack: is implied quoting currently active?
# 0 (not active) or 1 (active); element [0] stack top
my(@open_quote); # quoting stack: opening quote lexel for each quoting level
%builtins_cached = (); my $whereto; local($1,$2);
# preallocate some storage
my $output_str = ''; vec($output_str,2048,8) = 0; $output_str = '';
while (@tokens) {
my $t = shift(@tokens);
# do_log(5, "TOKEN: %s", ref($t) ? "<$$t>" : "'$t'");
if (!ref($t)) { # a plain string, no need to check for quoting levels
if (defined $whereto) { push(@$whereto,$t) } else { $output_str .= $t }
} elsif ($quote_level > 0 && substr($$t,0,1) eq '[') {
# go even deeper into quoting
$quote_level += ($t == $lx_lbQQ) ? 2 : 1; unshift(@open_quote,$t);
if (defined $whereto) { push(@$whereto,$t) } else { $output_str .= $$t }
} elsif ($t == $lx_lbQQ) { # just entering a [" ... "] quoting context
$quote_level += 2; unshift(@open_quote,$t);
# drop a [" , thus stripping one level of quotes
} elsif (substr($$t,0,1) eq '[') {
# $lx_lb $lx_lbS lx_lbT $lx_lbA $lx_lbC $lx_lbE
$call_level++; # open a macro call, start collecting arguments
unshift(@arg, [[]]); unshift(@macro_type, $t); unshift(@implied_q, 0);
$whereto = $arg[0][0];
if ($t == $lx_lb) { # iterator macro implicitly quotes all arguments
$quote_level++; unshift(@open_quote,$t); $implied_q[0] = 1;
}
} elsif ($quote_level <= 1 && $call_level>0 && $t == $lx_sep) { # next arg
unshift(@{$arg[0]}, []); $whereto = $arg[0][0];
if ($macro_type[0]==$lx_lbS && @{$arg[0]} == 2) {
# selector macro implicitly quotes arguments beyond first argument
$quote_level++; unshift(@open_quote,$macro_type[0]); $implied_q[0] = 1;
}
} elsif ($quote_level > 1 && ($t == $lx_rb || $t == $lx_rbQQ)) {
$quote_level -= ($open_quote[0] == $lx_lbQQ) ? 2 : 1;
shift(@open_quote); # pop the quoting stack
if ($t == $lx_rb || $quote_level > 0) { # pass-on if still quoted
if (defined $whereto) { push(@$whereto,$t) } else { $output_str .= $$t}
}
} elsif ($call_level > 0 && ($t == $lx_rb || $t == $lx_rbQQ)) { # evaluate
$call_level--; my $m_type = $macro_type[0];
if ($t == $lx_rbQQ) { # fudge for compatibility: treat "] as two chars
if (defined $whereto) { push(@$whereto,'"') } else { $output_str.='"' }
}
if ($implied_q[0] && $quote_level > 0) {
$quote_level -= ($open_quote[0] == $lx_lbQQ) ? 2 : 1;
shift(@open_quote); # pop the quoting stack
}
my $result_ref = evalmacro($m_type, $builtins_href, reverse @{$arg[0]});
shift(@macro_type); shift(@arg); shift(@implied_q); # pop the call stack
$whereto = $call_level > 0 ? $arg[0][0] : undef;
if ($m_type == $lx_lbC) { # neutral macro call, result implicitly quoted
if (defined $whereto) { push(@$whereto, @$result_ref) }
else { $output_str .= tokens_list_to_str($result_ref) }
} else { # active macro call, push result back to input for reprocessing
unshift(@tokens, @$result_ref);
}
} elsif ($quote_level > 0 ) { # still protect %x and # macro calls
if (defined $whereto) { push(@$whereto,$t) } else { $output_str .= $$t }
} elsif ($t == $lx_h) { # discard tokens up to and including a newline
while (@tokens) { last if shift(@tokens) eq "\n" }
} elsif ($$t =~ /^%\#?.\z/s) { # neutral simple macro call %x or %#x
my $result_ref = evalmacro($t, $builtins_href);
if (defined $whereto) { push(@$whereto,@$result_ref) }
# else { $output_str .= tokens_list_to_str($result_ref) }
else { $output_str .= join('', map(ref($_) ? $$_ : $_, @$result_ref)) }
} elsif ($$t =~ /^_ ([A-Z]+) (?: \( ( [^)]* ) \) )? _\z/xs) {
# neutral simple SA-like macro call, $1 is name, $2 is a single! argument
my $result_ref = evalmacro($lx_lbC, $builtins_href, [$1],
!defined($2) ? () : [$2] );
if (defined $whereto) { push(@$whereto, @$result_ref) }
else { $output_str .= tokens_list_to_str($result_ref) }
} else { # misplaced top-level lexical element
if (defined $whereto) { push(@$whereto,$t) } else { $output_str .= $$t }
}
}
%builtins_cached = (); # clear memory
\$output_str;
}
1;