/*  ----------------------------------------------------------------<Prolog>-
    Name:       sflfind.c
    Title:      Fast string searching functions
    Package:    Standard Function Library (SFL)

    Written:    96/04/24  iMatix SFL project team <sfl@imatix.com>
    Revised:    97/09/29

    Copyright:  Copyright (c) 1991-98 iMatix
    License:    This is free software; you can redistribute it and/or modify
                it under the terms of the SFL License Agreement as provided
                in the file LICENSE.TXT.  This software is distributed in
                the hope that it will be useful, but without any warranty.
 ------------------------------------------------------------------</Prolog>-*/

#include "prelude.h"                    /*  Universal header file            */
#include "sflfind.h"                    /*  Prototypes for functions         */


/*  ---------------------------------------------------------------------[<]-
    Function: strfind

    Synopsis: Searches for a pattern in a string using the Boyer-Moore-
    Horspool-Sunday algorithm.  The string and pattern are null-terminated
    strings.  Returns a pointer to the pattern if found within the string,
    or NULL if the pattern was not found.  If you repeatedly scan for the
    same pattern, use the repeat_find argument.  If this is TRUE, the
    function does not re-parse the pattern.  You must of course call the
    function with repeat_find equal to FALSE the first time.  This function
    is meant to handle  character data, and is most effective when you work
    with large strings.  To search binary data use memfind().  Will not work
    on multibyte characters.

    Examples:
    char *result;

    result = strfind ("abracadabra", "cad", FALSE);
    if (result)
        puts (result);
    ---------------------------------------------------------------------[>]-*/

char *
strfind (const char *string,            /*  String containing data           */
         const char *pattern,           /*  Pattern to search for            */
         Bool repeat_find)              /*  Same pattern as last time        */
{
    return (char *) memfind ((byte *) string,  strlen (string),
                             (byte *) pattern, strlen (pattern),
                             repeat_find);
}


/*  ---------------------------------------------------------------------[<]-
    Function: memfind

    Synopsis: Searches for a pattern in a block of memory using the Boyer-
    Moore-Horspool-Sunday algorithm.  The block and pattern may contain any
    values; you must explicitly provide their lengths.  Returns a pointer to
    the pattern if found within the block, or NULL if the pattern was not
    found.  If you repeatedly scan for the same pattern, use the repeat_find
    argument.  If this is TRUE, the function does not re-parse the pattern.
    This function is meant to handle binary data.  If you need to search
    strings, use the strfind() function. Original algorithm published by
    BOYER, R., and S. MOORE. 1977. "A Fast String Searching Algorithm." CACM,
    20, 762-72.  Simplifications by HORSPOOL, R. N. 1980.  "Practical Fast
    Searching in Strings." Software - Practice and Experience, 10, 501-06.
    Further improvements by HUME, A., and D. M. SUNDAY. 1991.  "Fast String
    Searching." AT&T Bell Labs Computing Science Technical Report No. 156.
    Finally, implemented in C by P. Hintjens.
    ---------------------------------------------------------------------[>]-*/

byte *
memfind (const byte *block,             /*  Block containing data            */
         size_t block_size,             /*  Size of block in bytes           */
         const byte *pattern,           /*  Pattern to search for            */
         size_t pattern_size,           /*  Size of pattern block            */
         Bool   repeat_find)            /*  Same pattern as last time        */
{
    static int
        shift [256];                    /*  Shift for each byte              */
    size_t
        byte_nbr,                       /*  Count through block              */
        match_size,                     /*  Size of matched part             */
        limit;                          /*  We stop when we hit limit        */
    const byte
        *match_ptr;                     /*  Scan through block               */

    /*  Build the shift table unless we're continuing a previous search      */
    if (!repeat_find)
      {
        for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
            shift [byte_nbr] = pattern_size + 1;
        for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
            shift [(byte) pattern [byte_nbr]] = pattern_size - byte_nbr;
      }

    /*  Pattern must be smaller or equal in size to string                   */
    if (block_size < pattern_size)
        return (NULL);                  /*  Otherwise it's not found         */

    /*  Search for the block, each time jumping up by the amount             */
    /*  computed in the shift table                                          */
    limit = block_size - pattern_size + 1;
    for (byte_nbr = 0;
         byte_nbr < limit;
         byte_nbr += shift [block [byte_nbr + pattern_size]])
      {
        /*  If the first byte matches, start comparing                       */
        if (block [byte_nbr] == *pattern)
          {
            match_ptr  = block + byte_nbr;
            match_size = 0;
            while (*match_ptr++ == pattern [match_size])
              {
                match_size++;
                /*  If we found a match, return the start address            */
                if (match_size == pattern_size)
                    return ((byte *) block + byte_nbr);
              }
          }
      }
    return (NULL);                      /*  Found nothing                    */
}


/*  ---------------------------------------------------------------------[<]-
    Function: txtfind

    Synopsis: Searches for a case-insensitive text pattern in a string
    using the Boyer-Moore-Horspool-Sunday algorithm.  The string and
    pattern are null-terminated strings.  Returns a pointer to the pattern
    if found within the string, or NULL if the pattern was not found.
    Will match strings irrespective of case.  To match exact strings, use
    strfind().  Will not work on multibyte characters.

    Examples:
    char *result;

    result = txtfind ("AbracaDabra", "cad");
    if (result)
        puts (result);
    ---------------------------------------------------------------------[>]-*/

char *
txtfind (const char *string,            /*  String containing data           */
         const char *pattern)           /*  Pattern to search for            */
{
    static int
        shift [256];                    /*  Shift for each byte              */
    size_t
        byte_nbr,                       /*  Count through block              */
        match_size,                     /*  Size of matched part             */
        string_size,                    /*  Length of string to search       */
        pattern_size,                   /*  Length of pattern to look for    */
        limit;                          /*  We stop when we hit limit        */
    const char
        *match_ptr;                     /*  Scan through block               */

    string_size  = strlen (string);     /*  Length of string to search       */
    pattern_size = strlen (pattern);    /*  Length of pattern to look for    */

    /*  Build the shift table                                                */
    for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
        shift [byte_nbr] = pattern_size + 1;
    for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
        shift [(byte) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;

    /*  Pattern must be smaller or equal in size to string                   */
    if (string_size < pattern_size)
        return (NULL);                  /*  Otherwise it's not found         */

    /*  Search for the string, each time jumping up by the amount            */
    /*  computed in the shift table                                          */
    limit = string_size - pattern_size + 1;
    for (byte_nbr = 0;
         byte_nbr < limit;
         byte_nbr += shift [(byte) tolower (string [byte_nbr + pattern_size])])
      {
        /*  If the first byte matches, start comparing                       */
        if (tolower (string [byte_nbr]) == tolower (*pattern))
          {
            match_ptr  = string + byte_nbr;
            match_size = 0;
            while (tolower (*match_ptr) == tolower (pattern [match_size]))
              {
                match_ptr++;
                match_size++;
                /*  If we found a match, return the start address            */
                if (match_size == pattern_size)
                    return ((char *) string + byte_nbr);
              }
          }
      }
    return (NULL);                      /*  Found nothing                    */
}
