File: //usr/share/perl5/vendor_perl/Amavis/Lookup/RE.pm
# SPDX-License-Identifier: GPL-2.0-or-later
package Amavis::Lookup::RE;
use strict;
use re 'taint';
BEGIN {
require Exporter;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
$VERSION = '2.412';
@ISA = qw(Exporter);
}
use Amavis::Util qw(ll do_log fmt_struct);
# Make an object out of the supplied lookup list
# to make it distinguishable from simple ACL array
sub new($$) { my $class = shift; bless [@_], $class }
# lookup_re() performs a lookup for an e-mail address or other key string
# against a list of regular expressions.
#
# A full unmodified e-mail address is always used, so splitting to localpart
# and domain or lowercasing is NOT performed. The regexp is powerful enough
# that this can be accomplished by its own mechanisms. The routine is useful
# for other RE tests besides the usual e-mail addresses, such as looking for
# banned file names.
#
# Each element of the list can be a ref to a pair, or directly a regexp
# ('Regexp' object created by a qr operator, or just a (less efficient)
# string containing a regular expression). If it is a pair, the first
# element is treated as a regexp, and the second provides a value in case
# the regexp matches. If not a pair, the implied result of a match is 1.
#
# The regular expression is taken as-is, no implicit anchoring or setting
# case insensitivity is done, so do use a qr'(?i)^user\@example\.com$',
# and not a sloppy qr'user@example.com', which can easily backfire.
# Also, if qr is used with a delimiter other than ' (apostrophe), make sure
# to quote the @ and $ when they are not introducing a variable name.
#
# The pattern allows for capturing of parenthesized substrings, which can
# then be referenced from the result string using the $1, $2, ... notation,
# as with a Perl m// operator. The number after a $ may be a multi-digit
# decimal number. To avoid possible ambiguity a ${n} or $(n) form may be used
# Substring numbering starts with 1. Nonexistent references evaluate to empty
# strings. If any substitution is done, the result inherits the taintedness
# of $addr. Keep in mind that $ and @ characters needs to be backslash-quoted
# in qq() strings. Example:
# $virus_quarantine_to = new_RE(
# [ qr'^(.*)\@example\.com$'i => 'virus-${1}@example.com' ],
# [ qr'^(.*)(\@[^\@]*)?$'i => 'virus-${1}${2}' ] );
#
# Example (equivalent to the example in lookup_acl):
# $acl_re = Amavis::Lookup::RE->new(
# qr'\@me\.ac\.uk$'i, [qr'[\@.]ac\.uk$'i=>0], qr'\.uk$'i );
# ($r,$k) = $acl_re->lookup_re('user@me.ac.uk');
# or $r = lookup(0, 'user@me.ac.uk', $acl_re);
#
# 'user@me.ac.uk' matches me.ac.uk, returns true and search stops
# 'user@you.ac.uk' matches .ac.uk, returns false (because of =>0)
# and search stops
# 'user@them.co.uk' matches .uk, returns true and search stops
# 'user@some.com' does not match anything, falls through and
# returns false (undef)
#
# As a special allowance, the $addr argument may be a ref to a list of search
# keys. At each step in traversing the supplied regexp list, all elements of
# @$addr are tried. If any of them matches, the search stops. This is currently
# used in banned names lookups, where all attributes of a part are given as a
# list @$addr, as a loop on attributes must be an inner loop.
#
sub lookup_re($$;$%) {
my($self, $addr,$get_all,%options) = @_;
local($1,$2,$3,$4); my(@matchingkey,@result);
$addr .= $options{AppendStr} if defined $options{AppendStr};
for my $e (@$self) { # try each regexp in the list
my($key,$r);
if (ref($e) eq 'ARRAY') { # a pair: (regexp,result)
($key,$r) = ($e->[0], @$e < 2 ? 1 : $e->[1]);
} else { # a single regexp (not a pair), implies result 1
($key,$r) = ($e, 1);
}
# braindamaged Perl: empty string implies the last successfully
# matched regular expression; we must avoid this:
$key = qr{(?:)} if !defined $key || $key eq '';
my(@rhs); # match, capturing parenthesized subpatterns into @rhs
if (!ref($addr)) { @rhs = $addr =~ /$key/ }
else { for (@$addr) { @rhs = /$key/; last if @rhs } } # inner loop
if (@rhs) { # regexp matches
# do the righthand side replacements if any $n, ${n} or $(n) is specified
if (defined($r) && !ref($r) && index($r,'$') >= 0) { # triage
my $any = $r =~ s{ \$ ( (\d+) | \{ (\d+) \} | \( (\d+) \) ) }
{ my $j=$2+$3+$4; $j<1 ? '' : $rhs[$j-1] }xgse;
# bring taintedness of input to the result
$r .= substr($addr,0,0) if $any;
}
push(@result,$r); push(@matchingkey,$key);
last if !$get_all;
}
}
if (!ll(5)) {
# don't bother preparing log report which will not be printed
} elsif (!@result) {
do_log(5, "lookup_re(%s), no matches", fmt_struct($addr));
} else { # pretty logging
if (!$get_all) { # first match wins
do_log(5, 'lookup_re(%s) matches key "%s", result=%s',
fmt_struct($addr), $matchingkey[0], fmt_struct($result[0]));
} else { # want all matches
do_log(5, "lookup_re(%s) matches keys: %s", fmt_struct($addr),
join(', ', map { sprintf('"%s"=>%s',
$matchingkey[$_], fmt_struct($result[$_]))
} (0..$#result)));
}
}
if (!$get_all) { !wantarray ? $result[0] : ($result[0], $matchingkey[0]) }
else { !wantarray ? \@result : (\@result, \@matchingkey) }
}
1;