/* reg.c */
/*****************************************************************************
					s@o@wsq

							mK\n
*****************************************************************************/

/* Copyright (C) 1996  Katsuyuki Okabe <hgc02147@niftyserve.or.jp>
 */

#include "xtr.h"
#if WITH_REGEX
#include <regex.h>
#else
#include <rx.h>
#endif


#define RE_SYNTAX_XTR													\
    (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_CHAR_CLASSES						\
     | RE_CONTEXT_INDEP_OPS       | RE_DOT_NOT_NULL						\
     | RE_INTERVALS               | RE_NO_BK_BRACES						\
     | RE_NO_BK_PARENS            | RE_NO_BK_VBAR						\
     | RE_NO_EMPTY_RANGES         | RE_UNMATCHED_RIGHT_PAREN_ORD		\
	 | RE_NO_GNU_OPS)

#ifndef RE_NO_GNU_OPS
#define RE_NO_GNU_OPS 0
#endif

#ifndef CHAR_BIT
#define CHAR_BIT 8
#endif

#define R_MASK (R_ICASE | R_IZENHAN)

struct hash_node {
	struct re_pattern_buffer re;
	struct hash_node *next;
	unsigned int mode;
	int l;
	char s[1];
};

#define HASH_SIZE 256

static struct hash_node *hash_table[HASH_SIZE];

#define CASEMAP_SIZE 256

static char casemap_table[CASEMAP_SIZE];

static void
init_syntax_once()
{
	static int done = 0;
	int i;

	if (done)
		return;

	re_set_syntax(RE_SYNTAX_XTR);
#if !WITH_REGEX
	rx_cache_bound = 4096;
#endif

	for (i = 0; i < HASH_SIZE; ++i)
		hash_table[i] = NULL;

	for (i = 0; i < CASEMAP_SIZE; ++i)
		casemap_table[i] = TOLOWER(i);

	done = 1;
}

static unsigned int
hashkey(const char *rs, int rl)
{
	unsigned int key;

	key = 0;
	while (rl-- > 0)
		key = (*rs++ & 0377) + (key << 5) - key;

	return key % HASH_SIZE;
}

struct re_string {
	int len;
	char s[1];
};

static struct re_string *
make_regexp(const char *rs, int rl, unsigned int mode)
{
	struct re_string *r;
	int r_siz;
	int icase;
	unsigned int c;

#define EXTEND_R(sz)													\
	do {																\
		if (r_siz <= r->len + (sz)) {									\
			r_siz += (sz);												\
			r = (struct re_string *)									\
				XRealloc((voidstar)r, sizeof(struct re_string) + r_siz);\
		}																\
	} while (0)

#define STORE_R(c) do { r->s[r->len++] = (c); } while (0)
#define MB_STORE_R(wc) do { STORE_R((wc) >> 8); STORE_R(wc); } while (0)

#define PATFETCH() (rl--, (*rs++ & 0377))
#define PATDROPCH() do { rl--; rs++; } while (0)

#define FORCE_RETURN(exp) do { if ((exp)) return r; } while (0)

	icase = mode & R_ICASE;

	r_siz = rl * 2;
	r = (struct re_string *)XMalloc(sizeof(struct re_string) + r_siz);
	r->len = 0;

	while (rl > 0) {

		EXTEND_R(64);

		c = PATFETCH();
		switch (c) {
		case '[':
			STORE_R(c);

			if (rl > 0 && *rs == '^')
				STORE_R(PATFETCH());

			if (rl > 0 && *rs == ']')
				STORE_R(PATFETCH());

			while (rl > 0) {

				EXTEND_R(32);

				c = PATFETCH();
				if (c == ']') {
					STORE_R(c);
					break;
				}
				if (c == '[' && rl > 0 && *rs == ':') {
					int save_r_len;
					int save_rl;
					const char *save_rs;					

					STORE_R(c);
					STORE_R(PATFETCH());

					save_r_len = r->len;
					save_rl = rl;
					save_rs = rs;

					while (rl > 0) {

						EXTEND_R(16);

						c = PATFETCH();
						STORE_R(c);
						if (c == ':' || c == ']')
							break;
					}

					if (c == ':' && rl > 0 && *rs == ']') {
						STORE_R(PATFETCH());
						if (rl > 0 && *rs == '-')
							STORE_R(PATFETCH());
					} else {
						r->len = save_r_len;
						rl = save_rl;
						rs = save_rs;
					}

				} else if (c == '\\') {
					STORE_R(c);
					FORCE_RETURN(rl <= 0);
					c = PATFETCH();
					goto ank;

				} else if (IsKanji1(c)) {

					c = (c << 8) + PATFETCH();

					if (rl > 1 && rs[0] == '-' && rs[1] != ']') {
						unsigned int c2;

						PATDROPCH();

						c2 = PATFETCH();
						if (iskanji(c2)) {

							c2 = (c2 << 8) + PATFETCH();

							if (icase && jisalpha(c) && jisalpha(c2)) {
								c = jtoupper(c);
								c2 = jtoupper(c2);
								MB_STORE_R(c);
								STORE_R('-');
								MB_STORE_R(c2);
								c = jtolower(c);
								c2 = jtolower(c2);
							}
						} else if (c2 == '\\') {
							MB_STORE_R(c);
							STORE_R('-');
							STORE_R('\\');
							FORCE_RETURN(rl <= 0);
							STORE_R(PATFETCH());
							continue;
						}
						MB_STORE_R(c);
						STORE_R('-');
						if (Is1B(c2))
							STORE_R(c2);
						else
							MB_STORE_R(c2);

					} else {
						if (icase && jisalpha(c)) {
							c = jtoupper(c);
							MB_STORE_R(c);
							c = jtolower(c);
						}
						MB_STORE_R(c);
					}

				} else {
				ank:
					STORE_R(c);
					if (rl > 1 && rs[0] == '-' && rs[1] != ']') {
						STORE_R(PATFETCH());
						c = PATFETCH();
						if (c == '\\' || IsKanji1(c)) {
							STORE_R(c);
							FORCE_RETURN(rl <= 0);
							c = PATFETCH();
						}
						STORE_R(c);
					}
				}
			}
			break;

		case '\\':
			STORE_R(c);
			FORCE_RETURN(rl <= 0);
			STORE_R(PATFETCH());
			break;

		default:
			if (IsKanji1(c)) {
				int in_bracket = FALSE;
				c = (c << 8) + PATFETCH();
				if (icase && jisalpha(c)) {
					in_bracket = TRUE;
					STORE_R('[');
					c = jtoupper(c);
					MB_STORE_R(c);
					c = jtolower(c);
				}
				MB_STORE_R(c);
				if (in_bracket)
					STORE_R(']');

			} else
				STORE_R(c);
			break;
		}
	}

	return r;
}

#define EQ(node,rs,rl,mode)												\
    ((node)->mode == (mode) && (node)->l == (rl)						\
     && memcmp((node)->s, (rs), (rl)) == 0)

int
reg_compile(const char *rs, int rl, unsigned int mode)
{
	struct hash_node *node;
	struct re_string *r;
	unsigned int key;
	const char *error_msg;

	init_syntax_once();

	mode &= R_MASK;

	key = hashkey(rs, rl);
	for (node = hash_table[key]; node != NULL; node = node->next)
		if (EQ(node, rs, rl, mode))
			return OK;

	node = (struct hash_node *)XMalloc(sizeof(struct hash_node) + rl);
	memset(node, 0, sizeof(*node));
	node->re.translate = mode & R_ICASE ? casemap_table : NULL ;
	r = make_regexp(rs, rl, mode);
	error_msg = re_compile_pattern(r->s, r->len, &node->re);
	XFree((voidstar)r);
	if (error_msg != NULL) {
		XFree((voidstar)node);
		Error("%s", error_msg);
	}

	node->re.fastmap = (char *)malloc(1 << CHAR_BIT);
	if (node->re.fastmap != NULL && re_compile_fastmap(&node->re) < 0) {
		XFree((voidstar)node->re.fastmap);
		node->re.fastmap = NULL;
	}

	memcpy(node->s, rs, rl);
	node->l = rl;
	node->mode = mode;
	node->next = hash_table[key];

	hash_table[key] = node;

	return OK;
}

int
reg_match(const char *rs, int rl, unsigned int mode,
		  const char *str, int *nmatch, struct reg_match_t **matchs)
{
	struct re_registers regs;
	struct hash_node *node;

	*nmatch = 0;
	*matchs = NULL;

	mode &= R_MASK;

	for (node = hash_table[hashkey(rs, rl)]; node != NULL; node = node->next)
		if (EQ(node, rs, rl, mode)) {
			int ret;

			re_set_registers(&node->re, &regs, 0,
							 (regoff_t *)NULL, (regoff_t *)NULL);

			if ((ret = re_match(&node->re, str, strlen(str), 0, &regs)) >= 0) {
				int i, n;

				for (n = regs.num_regs - 1; n > 0 && regs.start[n] < 0; --n)
					;
				n++;

				*nmatch = n;
				*matchs = (struct reg_match_t *)
					XMalloc(sizeof(struct reg_match_t) * n);

				for (i = 0; i < n; ++i) {
					(*matchs)[i].beg = str + regs.start[i];
					(*matchs)[i].len = regs.end[i] - regs.start[i];
				}
			}

			if (regs.start != NULL) {
				free((voidstar)regs.start);
				free((voidstar)regs.end);
			}

			return ret >= 0 ? TRUE : FALSE ;
		}

	return FALSE;
}

int
reg_free(const char *rs, int rl, unsigned int mode)
{
	struct hash_node *node, **pnode;

	mode &= R_MASK;

	for (node = *(pnode = &hash_table[hashkey(rs, rl)]);
		 node != NULL;
		 node = *(pnode = &node->next))
		if (EQ(node, rs, rl, mode)) {

			*pnode = node->next;

			if (node->re.buffer != NULL)
				free((voidstar)node->re.buffer);

			if (node->re.fastmap != NULL)
				free((voidstar)node->re.fastmap);

			XFree((voidstar)node);

			return OK;
		}

	return ERR;
}


/******************************************************************************
*																			  *
*							  === CAUTION !!! ===							  *
*																			  *
*	   If you are running test, this file's character code must be SJIS.      *
*																			  *
******************************************************************************/

#ifdef TEST

int nokanjimode = 0;

#undef ismbchar
int
ismbchar(int c)
{
	return !nokanjimode && iskanji(c);
}

void
Error(const uchar *format, ...)
{
	va_list args;

	VA_START(args, format);
	vfprintf(stderr, format, args);
	va_end(args);
	exit(1);
}

voidstar
XMalloc(size_t sz)
{
	voidstar p;

	p = (voidstar)malloc(sz == 0 ? 1 : sz);
	if (p == NULL)
		Error("Virtual memory exhausted.\n");
	return p;
}

void
XFree(voidstar p)
{
	if (p != NULL)
		free((voidstar)p);
}

voidstar
XRealloc(voidstar p, size_t sz)
{
	if (p == NULL)
		return XMalloc(sz);

	if (sz == 0)
		sz = 1;
	p = realloc(p, sz);
	if (p == NULL)
		Error("Virtual memory exhausted.\n");
	return p;
}

struct re_test {
	int mode;
	const char *re;
	const char *ans;
    int line;
};

struct re_test test_table[] = {
    { 0,       "a\\", "a\\", __LINE__ },
	{ R_ICASE, "a\\", "a\\", __LINE__ },
	{ 0,       "a\\[b", "a\\[b", __LINE__ },
	{ R_ICASE, "a\\[b", "a\\[b", __LINE__ },
	{ 0,       "a\\[", "a\\[", __LINE__ },
	{ R_ICASE, "a\\[", "a\\[[a]", __LINE__ },
	{ 0,       "abc", "abc", __LINE__ },
	{ R_ICASE, "abc", "abc", __LINE__ },
	{ 0,       "ac", "ac", __LINE__ },
	{ R_ICASE, "ac", "a[a]c", __LINE__ },
	{ 0,       "bc", "bc", __LINE__ },
	{ R_ICASE, "bc", "[`]bc", __LINE__ },
	{ 0,       "a[]b", "a[]b", __LINE__ },
	{ R_ICASE, "a[]b", "a[]b", __LINE__ },
	{ 0,       "a[]", "a[]", __LINE__ },
	{ R_ICASE, "a[]", "a[]a", __LINE__ },
	{ 0,       "a[]-", "a[]-", __LINE__ },
	{ R_ICASE, "a[]-", "a[]a-b-", __LINE__ },
	{ 0,       "a[^]b", "a[^]b", __LINE__ },
	{ R_ICASE, "a[^]b", "a[^]b", __LINE__ },
	{ 0,       "a[^]", "a[^]", __LINE__ },
	{ R_ICASE, "a[^]", "a[^]a", __LINE__ },
	{ 0,       "a[^]-", "a[^]-", __LINE__ },
	{ R_ICASE, "a[^]-", "a[^]a-b-", __LINE__ },
	{ 0,       "a[b]", "a[b]", __LINE__ },
	{ R_ICASE, "a[b]", "a[b]", __LINE__ },
	{ 0,       "a[]", "a[]", __LINE__ },
	{ R_ICASE, "a[]", "a[a]", __LINE__ },
	{ 0,       "a[-]", "a[-]", __LINE__ },
	{ R_ICASE, "a[-]", "a[a-b-]", __LINE__ },
	{ 0,       "a[^b]", "a[^b]", __LINE__ },
	{ R_ICASE, "a[^b]", "a[^b]", __LINE__ },
	{ 0,       "a[^]", "a[^]", __LINE__ },
	{ R_ICASE, "a[^]", "a[^a]", __LINE__ },
	{ 0,       "a[^-]", "a[^-]", __LINE__ },
	{ R_ICASE, "a[^-]", "a[^a-b-]", __LINE__ },
	{ 0,       "a[b]c", "a[b]c", __LINE__ },
	{ R_ICASE, "a[b]c", "a[b]c", __LINE__ },
	{ 0,       "a[]c", "a[]c", __LINE__ },
	{ R_ICASE, "a[]c", "a[a]c", __LINE__ },
	{ 0,       "a[-]d", "a[-]d", __LINE__ },
	{ R_ICASE, "a[-]d", "a[a-b-]d", __LINE__ },
	{ 0,       "a[b]", "a[b]", __LINE__ },
	{ R_ICASE, "a[b]", "a[b][b]", __LINE__ },
	{ 0,       "a[]", "a[]", __LINE__ },
	{ R_ICASE, "a[]", "a[a][b]", __LINE__ },
	{ 0,       "a[-]", "a[-]", __LINE__ },
	{ R_ICASE, "a[-]", "a[a-b-][c]", __LINE__ },
	{ 0,       "a[b-\\c]", "a[b-\\c]", __LINE__ },
	{ R_ICASE, "a[b-\\c]", "a[b-\\c]", __LINE__ },
	{ 0,       "a[-\\c]", "a[-\\c]", __LINE__ },
	{ R_ICASE, "a[-\\c]", "a[-\\c]", __LINE__ },
	{ 0,       "a[b-]", "a[b-]", __LINE__ },
	{ R_ICASE, "a[b-]", "a[b-]", __LINE__ },
	{ 0,       "a[-c]", "a[-c]", __LINE__ },
	{ R_ICASE, "a[-c]", "a[-c]", __LINE__ },
	{ 0,       "a[[:", "a[[:", __LINE__ },
	{ R_ICASE, "a[[:", "a[[:", __LINE__ },
	{ 0,       "a[[:alpha", "a[[:alpha", __LINE__ },
	{ R_ICASE, "a[[:alpha", "a[[:alpha", __LINE__ },
	{ 0,       "a[[:alpha:", "a[[:alpha:", __LINE__ },
	{ R_ICASE, "a[[:alpha:", "a[[:alpha:", __LINE__ },
	{ 0,       "a[[:alpha:b", "a[[:alpha:b", __LINE__ },
	{ R_ICASE, "a[[:alpha:b", "a[[:alpha:b", __LINE__ },
	{ 0,       "a[[:alpha:", "a[[:alpha:", __LINE__ },
	{ R_ICASE, "a[[:alpha:", "a[[:alpha:a", __LINE__ },
	{ 0,       "a[[:alpha]", "a[[:alpha]", __LINE__ },
	{ R_ICASE, "a[[:alpha]", "a[[:alpha]", __LINE__ },
	{ 0,       "a[[:alpha]b", "a[[:alpha]b", __LINE__ },
	{ R_ICASE, "a[[:alpha]b", "a[[:alpha]b", __LINE__ },
	{ 0,       "a[[:alpha][bc]", "a[[:alpha][bc]", __LINE__ },
	{ R_ICASE, "a[[:alpha][bc]", "a[[:alpha][bc]", __LINE__ },
	{ 0,       "a[[:alpha][b-c]", "a[[:alpha][b-c]", __LINE__ },
	{ R_ICASE, "a[[:alpha][b-c]", "a[[:alpha][b-c]", __LINE__ },
	{ 0,       "a[[:alpha]", "a[[:alpha]", __LINE__ },
	{ R_ICASE, "a[[:alpha]", "a[[:alpha][a]", __LINE__ },
	{ 0,       "a[[:alpha][]", "a[[:alpha][]", __LINE__ },
	{ R_ICASE, "a[[:alpha][]", "a[[:alpha][ab]", __LINE__ },
	{ 0,       "a[[:alpha][-]", "a[[:alpha][-]", __LINE__ },
	{ R_ICASE, "a[[:alpha][-]", "a[[:alpha][a-b-]", __LINE__ },
	{ 0,       "a[[:alpha:]", "a[[:alpha:]", __LINE__ },
	{ R_ICASE, "a[[:alpha:]", "a[[:alpha:]a", __LINE__ },
	{ 0,       "a[[:alpha:]-", "a[[:alpha:]-", __LINE__ },
	{ R_ICASE, "a[[:alpha:]-", "a[[:alpha:]-a", __LINE__ },
	{ 0,       "a[[:alpha:]--", "a[[:alpha:]--", __LINE__ },
	{ R_ICASE, "a[[:alpha:]--", "a[[:alpha:]-a-b-", __LINE__ },
	{ 0,       "a[[:", "a[[:", __LINE__ },
	{ R_ICASE, "a[[:", "a[[:`kog`", __LINE__ },
	{ 0,       "a[[::", "a[[::", __LINE__ },
	{ R_ICASE, "a[[::", "a[[:`kog`:", __LINE__ },
	{ 0,       "a[[::b", "a[[::b", __LINE__ },
	{ R_ICASE, "a[[::b", "a[[:`kog`:b", __LINE__ },
	{ 0,       "a[[::", "a[[::", __LINE__ },
	{ R_ICASE, "a[[::", "a[[:`kog`:a", __LINE__ },
	{ 0,       "a[[:]", "a[[:]", __LINE__ },
	{ R_ICASE, "a[[:]", "a[[:`kog`]", __LINE__ },
	{ 0,       "a[[:]b", "a[[:]b", __LINE__ },
	{ R_ICASE, "a[[:]b", "a[[:`kog`]b", __LINE__ },
	{ 0,       "a[[:][bc]", "a[[:][bc]", __LINE__ },
	{ R_ICASE, "a[[:][bc]", "a[[:`kog`][bc]", __LINE__ },
	{ 0,       "a[[:][b-c]", "a[[:][b-c]", __LINE__ },
	{ R_ICASE, "a[[:][b-c]", "a[[:`kog`][b-c]", __LINE__ },
	{ 0,       "a[[:]", "a[[:]", __LINE__ },
	{ R_ICASE, "a[[:]", "a[[:`kog`][a]", __LINE__ },
	{ 0,       "a[[:][]", "a[[:][]", __LINE__ },
	{ R_ICASE, "a[[:][]", "a[[:`kog`][ab]", __LINE__ },
	{ 0,       "a[[:][-]", "a[[:][-]", __LINE__ },
	{ R_ICASE, "a[[:][-]", "a[[:`kog`][a-b-]", __LINE__ },
	{ 0,       "a[[::]", "a[[::]", __LINE__ },
	{ R_ICASE, "a[[::]", "a[[::]a", __LINE__ },
	{ 0,       "a[-X]", "a[-X]", __LINE__ },
	{ R_ICASE, "a[-X]", "a[-X]", __LINE__ },
	{ 0,       "a[O-]", "a[O-]", __LINE__ },
	{ R_ICASE, "a[O-]", "a[O-]", __LINE__ },
	{ 0,       "a[b-]c", "a[b-]c", __LINE__ },
	{ R_ICASE, "a[b-]c", "a[b-]c", __LINE__ },
	{ 0,       "a[-]c", "a[-]c", __LINE__ },
	{ R_ICASE, "a[-]c", "a[a-]c", __LINE__ },
	{ 0,       "a[b-]", "a[b-]", __LINE__ },
	{ R_ICASE, "a[b-]", "a[b-][b]", __LINE__ },
	{ 0,       "a[-]", "a[-]", __LINE__ },
	{ R_ICASE, "a[-]", "a[a-][b]", __LINE__ },
	{ 0,       "a[b-\\]c", "a[b-\\]c", __LINE__ },
	{ R_ICASE, "a[b-\\]c", "a[b-\\]c", __LINE__ },
	{ 0,       "a[-\\]c", "a[-\\]c", __LINE__ },
	{ R_ICASE, "a[-\\]c", "a[-\\]c", __LINE__ },
	{ 0,       "a[b-\\]", "a[b-\\]", __LINE__ },
	{ R_ICASE, "a[b-\\]", "a[b-\\]b", __LINE__ },
	{ 0,       "a[-\\]", "a[-\\]", __LINE__ },
	{ R_ICASE, "a[-\\]", "a[-\\]b", __LINE__ },
	{ 0, 0, 0 }
};

int
main()
{
	struct re_string *r;
	struct re_test *tbl;
	int nerr;

	mbinit(MBTYPE_SJIS);

	nerr = 0;
	for (tbl = test_table; tbl->re != 0; ++tbl) {
		r = make_regexp(tbl->re, strlen(tbl->re), tbl->mode);
		if (r->len != strlen(tbl->ans) || memcmp(tbl->ans, r->s, r->len) != 0) {
			r->s[r->len] = '\0';
			printf("%d: `%s'(%d) -> `%s'(%d)\n", tbl->line,
				   tbl->re, strlen(tbl->ans),
				   r->s, r->len);
			nerr++;
		}
		XFree((voidstar)r);
	}
	printf((nerr ? "%d errors.\n" : "no error.\n"), nerr);

	return nerr ? 1 : 0 ;
}

#endif /* TEST */

/*
 * Local variables:
 * mode: c
 * c-indent-level: 4
 * c-continued-statement-offset: 4
 * c-brace-offset: -4
 * c-argdecl-indent: 4
 * c-label-offset: -4
 * tab-width: 4
 * tab-stop-list: (4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80)
 * compile-command: "( cd ../lib && make -k ) && gcc -DTEST -DHAVE_CONFIG_H -I.. -I. -I../lib reg.c ../lib/libxtr.a"
 * End:
 */
