/*************************************************
*     Exim - an Internet mail transport agent    *
*************************************************/

/* Copyright (c) University of Cambridge 1995 - 1997 */
/* See the file NOTICE for conditions of use and distribution. */

/* Functions for matching strings */


#include "exim.h"



/*************************************************
*           Generalized string match             *
*************************************************/

/* We are passed the subject and pattern as strings, and a pointer to a pointer
to a regular expression block. If the pattern is a regular expression and the
pointer points to NULL, set up a new re_block and compile the r.e. For
non regular expression, if the first character of the pattern is *, the match
is on the tail of the item. If the pattern starts with <searchtype>; then
do a file lookup, using the remainder as the file name. Keep a list of open
files for efficiency.

Arguments:
  s            the subject string to be checked
  pattern      the pattern to check it against, either literal, starting with *,
               starting with ^ for a r.e., or a lookup specification
  chain_add    address of anchor of a chain of compiled r.e. blocks; can be
               NULL if no regular expression processing is required
  expand_setup if < 0, don't set up any numeric expansion variables;
               if = 0, set $0 to whole subject, and either
                 $1 to what matches * or
                 $1, $2, ... to r.e. bracketed items
               if > 0, don't set $0, but do set either
                 $n to what matches *, or
                 $n, $n+1, ... to r.e. bracketed items
               (n = expand_setup)
  use_partial  if FALSE, override any partial- search types
  value        if not NULL, and a file lookup was done, return the result
                 here instead of discarding it; else set it to point to NULL

Returns:       TRUE if matched; FALSE if not
*/

BOOL
match_check_string(char *s, char *pattern, re_block **chain_ad,
  int expand_setup, BOOL use_partial, char **value)
{
BOOL yield;
re_block *p;

if (value != NULL) *value = NULL;

/* If required to set up $0, initialize the data but don't turn on by setting
expand_nmax until the match is assured. */

expand_nmax = -1;
if (expand_setup == 0)
  {
  expand_nstring[0] = s;
  expand_nlength[0] = (int)strlen(s);
  }
else if (expand_setup > 0) expand_setup--;

/* No regex pointer given, or pattern is not a regular expression. */

if (chain_ad == NULL || pattern[0] != '^')
  {
  int search_type = 0;      /* keep picky compiler happy */
  int partial = -1;
  int len;
  char *error, *key, *result, *semicolon;
  void *handle;

  /* Tail match */

  if (pattern[0] == '*')
    {
    int patlen = (int)strlen(++pattern);
    int slen = (int)strlen(s);
    if (patlen > slen) return FALSE;
    yield = strncmp(s + slen - patlen, pattern, patlen) == 0;
    if (yield && expand_setup >= 0)
      {
      expand_nstring[++expand_setup] = s;
      expand_nlength[expand_setup] = slen - patlen;
      expand_nmax = expand_setup;
      }
    return yield;
    }


  /* Exact string match */

  if ((semicolon = strchr(pattern, ';')) == NULL)
    {
    yield = strcmp(s, pattern) == 0;
    if (yield && expand_setup >= 0) expand_nmax = expand_setup;
    return yield;
    }


  /* The remaining possibilities are various matches by file lookup; for
  single-key lookups the key is unprocessed, but for query-type lookups
  there has to be some processing to get the key into the query.

  If the pattern starts with "partial-" (with an optional number before
  the -) then set up for doing partial matches by widening and narrowing
  the domain if the original doesn't match. This does not as yet apply to
  NIS+ searches. [Needs thought.] */

  if (strncmp(pattern, "partial", 7) == 0)
    {
    char *orig_pattern = pattern;
    pattern += 7;
    if (isdigit(*pattern))
      {
      partial = 0;
      while (isdigit(*pattern))
        partial = partial * 10 + *pattern++ - '0';
      }
    else partial = 2;
    if (*pattern++ != '-')
      log_write(0, LOG_PANIC_DIE, "malformed partial search type in string "
        "match: %s", orig_pattern);
    }

  /* Set up the length of the search type name */

  len = semicolon - pattern;

  /* If the lookup type ends in "*" it requests ultimate default lookup.
  This is independent of "partial", but is encoded as part of the same value.
  */

  if (semicolon[-1] == '*')
    {
    partial += 1024;
    len--;
    }

  /* Now determine the kind of lookup */

  key = s;

  if (len == 7 && strncmp(pattern, "lsearch", 7) == 0)
    search_type = stype_lsearch;
  else if (len == 3 && strncmp(pattern, "dbm", 3) == 0)
    search_type = stype_dbm;
  else log_write(0, LOG_PANIC_DIE, "unknown search type in string match: %s",
    pattern);

  /* Now do the actual lookup; throw away the actual data returned; partial
  matching is all handled inside search_find(). However, partial matching is
  not appropriate for certain lookups (e.g. when looking up user@domain for
  sender rejection). */

  if (!use_partial) partial = -1;
  if (pattern != NULL)                /* NULL => query type */
    {
    pattern = semicolon + 1;
    while (isspace(*pattern)) pattern++;
    }
  handle = search_open(pattern, search_type, &error);
  if (handle == NULL) log_write(0, LOG_PANIC_DIE, "%s", error);
  result = search_find(handle, pattern, key, search_type,
    partial, &expand_setup, &error);

  /* Free the key if not the same as the original string; free the result,
  unless it was asked for, and return appropriately. */

  if (key != s) store_free(key);
  if (result == NULL) return FALSE;
  if (value == NULL) store_free(result); else *value = result;
  expand_nmax = expand_setup;
  return TRUE;
  }


/* Regular expression match: compile if necessary */

p = *chain_ad;
if (p == NULL)
  {
  p = store_malloc(sizeof(re_block));
  *chain_ad = p;
  p->next = NULL;
  p->re = regex_must_compile(pattern);
  }

/* Perform a regular expression match and set up $ variables if required. */

return (expand_setup < 0)?
  pcre_exec(p->re, NULL, s, (int)strlen(s), pcre_eopt, NULL, 0) >= 0
  :
  regex_match_and_setup(p->re, s, 0, expand_setup);
}



/*************************************************
*            Match in colon-separated list       *
*************************************************/

/*
Arguments:
  s              string to search for
  list           colon separated list of patterns, or NULL
  chain_ad       address of anchor of chain or r.e. blocks for holding compiled
                 regular expressions for this colon-separated list; NULL if no
                 r.e. processing required
  at_is_primary  if TRUE, a list entry of "@" is interpreted as the primary
                 name for the host

Returns:         TRUE if matched; FALSE if not
*/

BOOL
match_isinlist(char *s, char *list, re_block **chain_ad, BOOL at_is_primary)
{
char *ss;
char buffer[1024];

for (ss = string_nextinlist(&list, ':', buffer, sizeof(buffer));
     ss != NULL;
     ss = string_nextinlist(&list, ':', buffer, sizeof(buffer)))
  {
  if (*ss == '@' && ss[1] == 0 && at_is_primary) ss = primary_hostname_lc;
  if (match_check_string(s, ss, chain_ad, -1, TRUE, NULL)) return TRUE;
  if (ss[0] == '^' && *chain_ad != NULL) chain_ad = &((*chain_ad)->next);
  }

return FALSE;
}




/*************************************************
*  Match in colon-separated list and return data *
*************************************************/

/* This operates exactly like match_isinlist(), but if there is a file lookup
that returns data, it gets passed back.

Arguments:
  s              string to search for
  list           colon separated list of patterns, or NULL
  chain_ad       address of anchor of chain or r.e. blocks for holding compiled
                 regular expressions for this colon-separated list; NULL if no
                 r.e. processing required
  at_is_primary  if TRUE, a list entry of "@" is interpreted as the primary
                 name for the host
  valueptr       pointer to where any lookup data is to be passed back

Returns:         TRUE if matched; FALSE if not
*/

BOOL
match_isinlist_get(char *s, char *list, re_block **chain_ad, BOOL at_is_primary,
  char **valueptr)
{
char *ss;
char buffer[1024];

for (ss = string_nextinlist(&list, ':', buffer, sizeof(buffer));
     ss != NULL;
     ss = string_nextinlist(&list, ':', buffer, sizeof(buffer)))
  {
  if (*ss == '@' && ss[1] == 0 && at_is_primary) ss = primary_hostname_lc;
  if (match_check_string(s, ss, chain_ad, -1, TRUE, valueptr)) return TRUE;
  if (ss[0] == '^' && *chain_ad != NULL) chain_ad = &((*chain_ad)->next);
  }

return FALSE;
}




/*************************************************
*          Match host to net list                *
*************************************************/

/* Given a host address, in textual form, and a munged netlist, check for
whether the host address matches any of the entries. Each entry is either
already converted to binary, or is a pointer to a file that has to be searched
(which is less efficient, of course).

The conversion of the subject address from text to binary is done by host_aton
both here and when the binary list is set up, so we don't have to worry about
byte order differences. This code works with both IPv4 and IPv6 addresses.

Arguments:
  address     points to the text form of the host address
  netlist     points to a chain of ip_net_items

Returns:      TRUE if the host matches any net item
*/

BOOL
match_net_isinlist(char *address, ip_net_item *netlist)
{
int i;
int x[4];
int n = host_aton(address, x);
BOOL matched = FALSE;

/* Convert IPv4 addresses given in IPv6 compatible mode, which represent
connections from IPv4 hosts to IPv6 hosts, that is, addresses of the form
::ffff:<v4address>, to IPv4 format. */

if (n == 4 && x[0] == 0 && x[1] == 0 && x[2] == 0xffff)
  {
  n = 1;
  x[0] = x[3];
  }

for (; netlist != NULL; netlist = netlist->next)
  {
  /* Handle the case when a file has to be searched serially */

  if (netlist->filename != NULL)
    {
    char buffer[128];
    FILE *f = os_fopen(netlist->filename, "r");
    if (f == NULL)
      log_write(0, LOG_MAIN|LOG_PANIC_DIE, "failed to open %s when attempting "
        "to check an IP network", netlist->filename);

    while (fgets(buffer, sizeof(buffer), f) != NULL)
      {
      int size, address[4], mask[4];
      char *ss;
      char *s = buffer;

      if ((ss = strchr(buffer, '#')) != NULL) *ss = 0;
      while (isspace(*s)) s++;
      if (*s == 0) continue;

      size = (int)strlen(s);
      while (size > 0 && isspace(s[size-1])) size--;
      s[size] = 0;

      size = host_amton(s, address, mask, &ss);
      if (size == 0)
        log_write(0, LOG_MAIN|LOG_PANIC_DIE, "malformed IP network item in "
          "%s: %s: %s", netlist->filename, s, ss);

      if (n != size) continue;
      matched = TRUE;
      for (i = 0; i < n; i++)
        {
        if ((x[i] & mask[i]) != address[i]) { matched = FALSE; break; }
        }
      if (matched) break;
      }
    fclose(f);
    }

  /* Handle the case when we have the binary data to hand */

  else
    {
    if (n != netlist->size) continue;
    matched = TRUE;
    for (i = 0; i < n; i++)
      {
      if ((x[i] & netlist->mask[i]) != netlist->address[i])
        { matched = FALSE; break; }
      }
    }

  /* In either case, we are done if we've matched. */

  if (matched) return TRUE;
  }

return FALSE;
}




/*************************************************
*           Do file existence tests              *
*************************************************/

/* This function is given a colon-separated list of files whose existence
is to be tested. The string is first expanded, and the resulting file names
must be absolute, but "!" may appear precede an (absolute) file name to
indicate that non-existence is what is wanted. If an item in the list contains
no / characters, it is interpreted as a user name or uid to be used when
checking subsequent files. A gid can also be provided with a comma separator.

Argument:
  s        a colon-separated list of files whose existence is to be tested,
           or NULL; a leading "!" causes the test for that file to be for
           non-existence. A leading "+" causes an EACCES from the stat()
           call to be treated as if it were ENOENT.

Returns:   OK if s == NULL or all files' existence is as required;
           DEFER if the existence of at least one of the files is
             unclear (an error other than non-existence occurred);
           FAIL otherwise.
           In all cases, errno contains what it was set to by the final
           call to stat(), or 0 if there were no files in the list.
*/

int
match_exists(char *s)
{
char *ss, *file, *listptr;
char buffer[1024];
int yield = OK;
int save_errno = 0;
struct stat statbuf;

if (s == NULL) return OK;
ss = expand_string(s);
if (ss == NULL)
  log_write(0, LOG_MAIN|LOG_PANIC_DIE, "expansion of %s failed: %s", s,
    expand_string_message);

listptr = ss;

for (file = string_nextinlist(&listptr, ':', buffer, sizeof(buffer));
     file != NULL;
     file = string_nextinlist(&listptr, ':', buffer, sizeof(buffer)))
  {
  int rc;
  BOOL invert = FALSE;
  BOOL eperm_is_absence = FALSE;

  if (*file == '!')
    {
    invert = TRUE;
    file++;
    }

  if (*file == '+')
    {
    eperm_is_absence = TRUE;
    file++;

    if (*file == '!')
      {
      invert = TRUE;
      file++;
      }
    }

  /* If the first character is not '/' then it's a non-absolute file
  name, which is a disaster. */

  if (*file != '/')
    {

    /* The presence of a / further in the string is taken as implying
    a non-absolute path. */

    if (strchr(file, '/') != NULL)
      log_write(0, LOG_MAIN|LOG_PANIC_DIE, "file name for existence test is "
        "not fully qualified: %s", file);

    continue;
    }

  /* Do the test */

  errno = 0;
  rc = stat(file, &statbuf);
  save_errno = errno;

  DEBUG(9) debug_printf("test existence of %s\n  "
    "required %s, EACCES => %s\n  %s\n",
    file,
    invert? "absent" : "present",
    eperm_is_absence? "absent" : "unknown",
    strerror(errno));

  if (rc < 0 && save_errno != ENOENT && save_errno != ENOTDIR &&
      (!eperm_is_absence || save_errno != EACCES))
    {
    yield = DEFER;
    break;
    }

  if ((rc < 0) != invert)
    {
    yield = FAIL;
    break;
    }
  }

store_free(ss);
errno = save_errno;
return yield;
}



/*************************************************
*    Test whether address matches address list   *
*************************************************/

/* This function is given an address and a string list of things to
match it against. The list may contain individual addresses, regular
expressions, and lookup specifications. The address to check can consist of
just a domain, which will then match only domain items or items specified as
*@domain. The value of the second argument must then be given as 0.

Arguments:
  address       address to test
  domain        offset to the domain in the address
  list          string list to check against
  chain_ad      pointer to chain of compiled re's for caching
  expand_setup  controls setting up of $n variables - passed through
                to match_check_string (q.v.)
  separator     separator character for the list; may be 0 for one item

Returns:      TRUE if the address matches something in the list
*/

BOOL
match_address_list(char *address, int domain, char *list, re_block **chain_ad,
  int expand_setup, int separator)
{
char *localpart, *test_address, *p;
char *listptr = list;
char buffer[1024];
int llen;

/* Ensure the domain is lower-cased before doing any comparisons. */

strcpy(big_buffer, address);
address = big_buffer;
for (p = address + domain; *p != 0; p++) *p = tolower(*p);

/* The local part follows the colon in a source-routed address; otherwise
it starts at the beginning. */

localpart = (address[0] == '@')? strchr(address, ':') + 1 : address;

/* Compute the length of the local part; if domain == 0 (which won't happen for
a source route) the length is zero. */

llen = (domain == 0)? 0 : domain - (localpart - address) - 1;

/* If expand_setup is zero, we need to set up $0 to the whole thing, in
case there is a match. Can't use the built-in facilities of match_check_string,
as we may just be calling that for part of the address (the domain). */

if (expand_setup == 0)
  {
  expand_nstring[0] = localpart;
  expand_nlength[0] = (int)strlen(localpart);  /* Whole length */
  expand_setup++;
  }

/* Loop for each address in the list. */

for (test_address = string_nextinlist(&listptr, separator, buffer,
       sizeof(buffer));
     test_address != NULL;
     test_address = string_nextinlist(&listptr, separator, buffer,
       sizeof(buffer)))
  {
  int expand_inc = 0;
  char *sdomain;

  /* Handle a regular expression, which must match the entire
  incoming address. Note that localpart will be pointing to the
  end part of a source-routed address. */

  if (test_address[0] == '^')
    {
    if (match_check_string(localpart, test_address, chain_ad, expand_setup,
      TRUE, NULL)) return TRUE;
    chain_ad = &((*chain_ad)->next);
    continue;
    }

  /* If not a regular expression, either part may begin with an
  asterisk, and both parts must match. If there's no '@' in the
  pattern, then it is just a domain and treated as if it had
  *@ on the front. */

  sdomain = strrchr(test_address, '@');

  /* No @ => assume user matches; set domain = whole thing */

  if (sdomain == NULL) sdomain = test_address;

  /* Check the local part if one is given in the list. A null local part
  is treated as '*'. */

  else
    {
    int sllen = sdomain - test_address;
    sdomain += 1;

    /* If the domain in the pattern is apparently null, and the local part in
    the pattern ends in "@", then we have a pattern of the form <something>@@,
    which is to be interpreted as <something>@primary_hostname. */

    if (*sdomain == 0 && test_address[sllen - 1] == '@')
      {
      sdomain = primary_hostname_lc;
      sllen--;
      }

    /* Else if the local part in the pattern is precisely "@", and a local part
    is given in the source, and the domain part of the pattern is a lookup,
    then lookup the domain, and use the data returned as a list of local
    parts. Sadly, we have to compile any regular expressions each time.
    If the last item in the list is ">something" it gives another key to
    look up, thus providing a chaining facility. */

    else if (llen > 0 && sllen == 1 && test_address[0] == '@' &&
        strchr(sdomain, ';') != NULL)
      {
      BOOL rc;
      int watchdog = 50;
      char *result, *list, *ss;
      char *key = address + domain;
      char buffer[1024];

      /* Loop for handling chaings */

      while (key != NULL && watchdog-- > 0)
        {
        rc = match_check_string(key, sdomain, NULL, -1, TRUE, &result);
        if (key != address + domain) store_free(key);
        if (!rc) break;  /* causes continue to next toplevel item */

        /* Temporarily terminate after the local part */

        localpart[llen] = 0;
        list = result;

        /* Check for chaining from the last item */

        ss = strrchr(list, ':');
        if (ss == NULL) ss = list; else ss++;
        while (isspace(*ss)) ss++;
        if (*ss == '>')
          {
          *ss++ = 0;
          while (isspace(*ss)) ss++;
          key = string_copy(ss);
          }
        else key = NULL;

        /* Look up the local parts provided by the list */

        rc = FALSE;
        for (ss = string_nextinlist(&list, ':', buffer, sizeof(buffer));
             ss != NULL;
             ss = string_nextinlist(&list, ':', buffer, sizeof(buffer)))
          {
          re_block *reblock = NULL;
          if (*buffer == 0) continue;
          rc =  match_check_string(localpart, buffer, &reblock, -1, TRUE, NULL);
          if (reblock != NULL) store_free(reblock);
          if (rc) break;
          }

        /* Rejoin the local part to the domain, free the looked-up data,
        return if we matched anything, else continue with another lookup
        if there was an ">" item. */

        localpart[llen] = '@';
        store_free(result);
        if (rc) return TRUE;
        }

      /* End of chain loop; panic if too many times */

      if (watchdog <= 0)
        log_write(0, LOG_MAIN|LOG_PANIC_DIE, "Loop detected in lookup of "
          "local part of %s in %s", address, sdomain);

      /* Proceed to the next item on the original address list. */

      continue;
      }

    /* Otherwise, if there is a local part in the pattern, check it against
    the subject local part; if there is no subject local part, the match will
    fail unless the pattern's local part is "*". */

    if (sllen > 0)
      {
      if (test_address[0] == '*')
        {
        int cllen = sllen - 1;
        if (llen < cllen ||
          strncmpic(localpart+llen-cllen, test_address + 1, cllen) != 0)
            continue;
        if (expand_setup > 0)
          {
          expand_nstring[expand_setup] = localpart;
          expand_nlength[expand_setup] = llen - cllen;
          expand_inc = 1;
          }
        }
      else if (llen != sllen || strncmpic(localpart, test_address, llen) != 0)
        continue;
      }
    }

  /* If the local part matched, check the domain using the generalized
  function, which supports file lookups. */

  if (match_check_string(address + domain, sdomain, NULL,
    expand_setup + expand_inc, TRUE, NULL)) return TRUE;

  /* If we have no match and the pattern is a single lookup pattern
  without a local part, then try the entire address, but do not do any
  partial matching, which won't be appropriate. */

  if (sdomain == test_address && strchr(test_address, ';') != NULL &&
    match_check_string(localpart, test_address, NULL, -1, FALSE, NULL))
      return TRUE;
  }

return FALSE;
}



/*************************************************
*              Match sender to list              *
*************************************************/

/* This function is given a colon-separated list to be matched against the
sender of the message. The list is first expanded. The address

Argument:
  s        a colon-separated list (after expansion)

Returns:   TRUE if the sender matches
*/

BOOL
match_sender(char *s, re_block **chain_add)
{
BOOL yield;
char *ss;

if (s == NULL || sender_address == NULL || sender_address[0] == 0)
  return FALSE;

ss = expand_string(s);
if (ss == NULL)
  log_write(0, LOG_MAIN|LOG_PANIC_DIE, "expansion of %s failed: %s", s,
    expand_string_message);

yield = match_address_list(sender_address,
  strchr(sender_address, '@') - sender_address + 1,
  ss,
  chain_add,
  FALSE,
  ':');

store_free(ss);
return yield;
}

/* End of match.c */
