/*
    FLUIdS - local search system
    Copyright (C) 1998, 2000  VVK (valera@sbnet.ru), CNII Center, Moscow

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/


#include "zdefs.h"
#include "_pstring.h" /* <string.h> */
#include <stdlib.h>

#include "zchars.h"

#include "html.h"

struct entitydef_t
{
  char *name;
  int length;
  int number;
  char *value;
  int valueLength;
};

static struct entitydef_t entityTable[] =
{
  { "quot",   4,  34, "\"",   1 },
  { "amp",    3,  38, "&",    1 },
  { "lt",     2,  60, "<",    1 },
  { "gt",     2,  62, ">",    1 },
  { "nbsp",   4, 160, " ",    1 },
  { "iexcl",  5, 161, "!",    1 },
  { "cent",   4, 162, " c ",  3 },
  { "pound",  5, 163, " p ",  3 },
  { "curren", 6, 164, " ",    1 },
  { "yen",    3, 165, " y ",  3 },
  { "brvbar", 6, 166, "|",    1 },
  { "sect",   4, 167, "#",    1 },
  { "uml",    3, 168, " ",    1 },
  { "copy",   4, 169, "(c)",  3 },
  { "ordf",   4, 170, " ",    1 },
  { "laquo",  5, 171, "<<",   2 },
  { "not",    3, 172, "!",    1 },
  { "shy",    3, 173, " ",    1 },
  { "reg",    3, 174, "(r)",  3 },
  { "macr",   4, 175, " ",    1 },
  { "deg",    3, 176, " ",    1 },
  { "plusmn", 6, 177, "+/-",  3 },
  { "sup2",   4, 178, "^2",   2 },
  { "sup3",   4, 179, "^3",   2 },
  { "acute",  5, 180, "`",    1 },
  { "micro",  5, 181, " ",    1 },
  { "para",   4, 182, "#",    1 },
  { "middot", 6, 183, ".",    1 },
  { "cedil",  5, 184, " ",    1 },
  { "sup1",   4, 185, "^1",   2 },
  { "ordm",   4, 186, " ",    1 },
  { "raquo",  5, 187, ">>",   2 },
  { "frac14", 6, 188, " 1/4", 4 },
  { "frac12", 6, 189, " 1/2", 4 },
  { "frac34", 6, 190, " 3/4", 4 },
  { "iquest", 6, 191, "?",    1 },
  { "Aacute", 6, 192, "A",    1 },
  { "Agrave", 6, 193, "A",    1 },
  { "Acirc",  5, 194, "A",    1 },
  { "Atilde", 6, 195, "A",    1 },
  { "Aring",  5, 196, "A",    1 },
  { "Auml",   4, 197, "A",    1 },
  { "AElig",  5, 198, "A",    1 },
  { "Ccedil", 6, 199, "C",    1 },
  { "Eacute", 6, 200, "E",    1 },
  { "Egrave", 6, 201, "E",    1 },
  { "Ecirc",  5, 202, "E",    1 },
  { "Euml",   4, 203, "E",    1 },
  { "Iacute", 6, 204, "I",    1 },
  { "Igrave", 6, 205, "I",    1 },
  { "Icirc",  5, 206, "I",    1 },
  { "Iuml",   4, 207, "I",    1 },
  { "ETH",    3, 208, "E",    1 },
  { "Ntilde", 6, 209, "N",    1 },
  { "Oacute", 6, 210, "O",    1 },
  { "Ograve", 6, 211, "O",    1 },
  { "Ocirc",  5, 212, "O",    1 },
  { "Otilde", 6, 213, "O",    1 },
  { "Ouml",   4, 214, "O",    1 },
  { "times",  5, 215, "*",    1 },
  { "Oslash", 6, 216, "O",    1 },
  { "Uacute", 6, 217, "U",    1 },
  { "Ugrave", 6, 218, "U",    1 },
  { "Ucirc",  5, 219, "U",    1 },
  { "Uuml",   4, 220, "U",    1 },
  { "Yacute", 6, 221, "Y",    1 },
  { "THORN",  5, 222, "D",    1 },
  { "szlig",  5, 223, "s",    1 },
  { "aacute", 6, 224, "a",    1 },
  { "agrave", 6, 225, "a",    1 },
  { "acirc",  5, 226, "a",    1 },
  { "atilde", 6, 227, "a",    1 },
  { "aring",  5, 228, "a",    1 },
  { "auml",   4, 229, "a",    1 },
  { "aelig",  5, 230, "a",    1 },
  { "ccedil", 6, 231, "c",    1 },
  { "eacute", 6, 232, "e",    1 },
  { "egrave", 6, 233, "e",    1 },
  { "ecirc",  5, 234, "e",    1 },
  { "euml",   4, 235, "e",    1 },
  { "iacute", 6, 236, "i",    1 },
  { "igrave", 6, 237, "i",    1 },
  { "icirc",  5, 238, "i",    1 },
  { "iuml",   4, 239, "i",    1 },
  { "eth",    3, 240, "e",    1 },
  { "ntilde", 6, 241, "n",    1 },
  { "oacute", 6, 242, "o",    1 },
  { "ograve", 6, 243, "o",    1 },
  { "ocirc",  5, 244, "o",    1 },
  { "otilde", 6, 245, "o",    1 },
  { "ouml",   4, 246, "o",    1 },
  { "div",    3, 247, "/",    1 },
  { "oslash", 6, 248, "o",    1 },
  { "uacute", 6, 249, "u",    1 },
  { "ugrave", 6, 250, "u",    1 },
  { "ucirc",  5, 251, "u",    1 },
  { "uuml",   4, 252, "u",    1 },
  { "yacute", 6, 253, "y",    1 },
  { "thorn",  5, 254, "d",    1 },
  { "yuml",   4, 255, "y",    1 },
  { NULL,     0,   0, NULL,   0 }
};

static int entitySorted[] =
{
  42,
  36,
  38,
  37,
  40,
  39,
  41,
  43,
  52,
  44,
  46,
  45,
  47,
  48,
  50,
  49,
  51,
  53,
  54,
  56,
  55,
  60,
  57,
  58,
  66,
  61,
  63,
  62,
  64,
  65,
  68,
  70,
  24,
  74,
  69,
   1,
  72,
  71,
  73,
  10,
  75,
  28,
   6,
  13,
   8,
  20,
  91,
  76,
  78,
  77,
  84,
  79,
  33,
  32,
  34,
   3,
  80,
  82,
   5,
  81,
  35,
  83,
  15,
   2,
  19,
  25,
  27,
   4,
  16,
  85,
  86,
  88,
  87,
  14,
  30,
  92,
  89,
  90,
  26,
  21,
   7,
   0,
  31,
  18,
  11,
  17,
  29,
  22,
  23,
  67,
  98,
  59,
  93,
  95,
  94,
  12,
  96,
  97,
   9,
  99
};

char *convertEntities( char *string )
{
  char *from, *to;

  for( from = to = string; *from != '\0'; )
    if( *from == '&' )
    {
      struct entitydef_t *cur, *tmp;

      if( from[1] == '#' )
      {
        if( isDigit( from[2] ) )
        {
          if( isDigit( from[3] ) )
          {
            char numline[4];
            int number;

            numline[0] = from[2];
            numline[1] = from[3];

            if( isDigit( from[4] ) && (from[2] < '2' || from[2] == '2' &&
                 (from[3] < '5' || (from[3] == '5' && from[4] <= '5'))) )
            {
              numline[2] = from[4];
              numline[3] = '\0';
              from += 5;
            }
            else
            {
              numline[2] = '\0';
              from += 4;
            }

            number = (int) strtol( numline, NULL, 10);
            if( number <= 127 )
              *to ++ = (number <= ' ') ? ' ' : number;
            else
            {
              if( number >= 160 /* && number <= 255 */ )
                cur = &entityTable[4+number-160];
              else
                for( cur = entityTable; cur->name != NULL; cur++)
                  if( number == cur->number ) break;

              if( cur->number == '\0' )
                *to ++ = ' ';
              else
              {
                strcpy( to, cur->value);
                to += cur->valueLength;
              }
            }
          }
          else
          {
            *to ++ = ' ';
            from += 3;
          }
          if( *from == ';' ) from++;
        }
        else
          *to ++ = *from ++;
      }
      else
      {
        int lower, upper, midpoint, cmp;

        for( lower=0, upper=99, cur = NULL; lower <= upper; )
        {
          midpoint = (lower + upper) >> 1;
          tmp = &entityTable[entitySorted[midpoint]];
          cmp = strncmp( from+1, tmp->name, tmp->length);

          if( cmp == 0 )
          {
            cur = tmp;
            break;
          }
          if( cmp > 0 )
            lower = midpoint + 1;
          else
            upper = midpoint - 1;
        }

        if( cur != NULL )
        {
          strcpy( to, cur->value);
          to += cur->valueLength;
          from += cur->length + 1;
          if( *from == ';' ) from++;
        }
        else
          *to ++ = *from ++;
      }
    }
    else
      *to ++ = *from ++;

  *to = '\0';
  return string;
}
