/*////////////////////////////////////////////////////////////////////////
Copyright (c) 1994 Electrotechnical Laboratry (ETL), AIST, MITI

Permission to use, copy, modify, and distribute this material for any
purpose and without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies, and
that the name of ETL not be used in advertising or publicity pertaining
to this material without the specific, prior written permission of an
authorized representative of ETL.
ETL MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY OF THIS
MATERIAL FOR ANY PURPOSE.  IT IS PROVIDED "AS IS", WITHOUT ANY EXPRESS
OR IMPLIED WARRANTIES.
/////////////////////////////////////////////////////////////////////////
Content-Type:	program/C; charset=US-ASCII
Program:	url.c (rewrite for relayed-URL in the HTML)
Author:		Yutaka Sato <ysato@etl.go.jp>
Description:

    REWRITING RULE

      Uniform rewriting rule for URLs to be gatewayed by HTTP is:

	N://H:P/F  <=> http://${delegate}/-_-N://H:P/F

      Special rewriting rule for Gopher URL to be gatewayed by Gopher is:

	G://H:P/gF <=> G://${delegate}/g-_-G://H:P/gF

#-- obsolete rule --
# Mutual conversion of URL and "delegated-URL" like:
#
#    N://H:P/F  <=>  N://h:p/=@=:H:P/F   ( when N is http )
#    N://H:P/F  <=>  n://h:p/F=@=N:H:P=G ( when N is gopher )
#    N://H:P/F  <=>  n://h:p/F=@=N:H:P   ( when N is ftp, etc...)
#
# F in a URL must be F in the delegated-URL because protocols like
# Gopher, unfortunately, uses the top character of F as a type
# identifier.
#
# A special format of delegated-URL is used for HTTP to ease automatic
# rewriting of URL by hand.  It's achieved by simple insertion or
# deletion of "h:p/=@=:" just before "H:P".

History:
	March94	created
	941224	changed the rewriting rule
//////////////////////////////////////////////////////////////////////#*/
#include "delegate.h"
#include <ctype.h>
extern char *Sprintf();
extern char *getv();
extern char *mount_url_from();

#define URLSZ	(8*1024)
typedef char URLStr[URLSZ];

int GOPHER_ON_HTTP = 1;
#define ODGU_MARK	"=@="
#define NDGU_MARK	"-_-"
#define NDGU_MARK_LEN	(sizeof(NDGU_MARK)-1)

extern int FULL_URL;

/*
 *	SEARCH URL REFERENCE IN HTML
 *	(half done and dangerous X-<)
 */
typedef struct {
	char	*a_name;
} Attr;
static Attr rewriteAttr[64] = {
	{"HREF"		},
	{"SRC"		},
	{"ACTION"	},
	{"BACKGROUND"	},
	{"CLASSID"	},
	{"CODEBASE"	},
	{"IMAGEMAP"	},
	{"USEMAP"	},
	{"SCRIPT"	},
	{"URL"		},
	0
};
static char attrChars[256];
static int attrCharsInit;

static rewriteit(ax,tag)
	char *tag;
{
	if( FULL_URL ){
		if( tag != NULL && strncasecmp(tag,"<BASE ",6) == 0 )
		if( strcmp(rewriteAttr[ax].a_name,"HREF") == 0 )
			return 0;
	}
	return 1;
}
static attrWithURL(attr,tag)
	char *attr,*tag;
{	int ai,ci;
	char *ap,ac,uc;

	if( attrCharsInit == 0 ){
		for( ai = 0; ap = rewriteAttr[ai].a_name; ai++ ){
			for( ci = 0; ci < 8 && (ac = ap[ci]); ci++ )
				attrChars[toupper(ac) & 0xFF] |= (1 << ci);
		}
		attrCharsInit = 1;
	}

	/*
	 *	Screening attribute names
	 */
	if( (attrChars[toupper(attr[0]) & 0xFF] & 1) == 0 )
		return 0;
	for( ci = 1; ci < 8 && (ac = attr[ci]); ci++ ){
		if( !isalpha(ac) )
			break;
		if( (attrChars[toupper(ac) & 0xFF] & (1 << ci)) == 0 )
			return 0;
	}

	/*
	 *	Searching attribute names
	 */
	for( ai = 0; ap = rewriteAttr[ai].a_name; ai++ ){
		for( ci = 0;; ci++ ){
			if( (ac = ap[ci]) == 0 ){
				if( rewriteit(ai,tag) )
					return ci;
				else	return 0;
			}
			if( ac != toupper(attr[ci]) )
				break;
		}
	}
	return 0;
}

static isBASE(tag)
	char *tag;
{
	if( tag != NULL && strncasecmp(tag,"<BASE",5) == 0 && isspace(tag[5]) )
		return 1;
	return 0;
}

char *html_nexturl(html,rem,tagp)
	char *html,*rem,**tagp;
{	char *str,*ref,ch,*p,*top,*tag;
	char *strcasestr();
	int len;

	top = NULL;
	tag = NULL;
	str = html;

	if( strncasecmp(str,"WWW-Authenticate:",17) == 0
	 || strncasecmp(str,"Proxy-Authenticate:",19) == 0 )
	if( p = strcasestr(str,"Realm=<") ){
		p = p + 7;
		top = p;
		goto exit;
	}
	if( strncasecmp("Location:",str,9) == 0 ){
		p = str + 9;
		while( *p == ' ' )
			p++;
		top = p;
		goto exit;
	}

	for(;;){
		if( str == html )
			ref = html;
		else{
			for( ref = str; ch = *ref; ref++ ){
				if( ch == '<' )
					tag = ref;
				if(  isspace(ch) )
					break;
				if( ch == ';' || ch == '"' || ch == '\'' ){
					ref++;
					break;
				}
			}
			for(; ch = *ref; ref++ )
				if( !isspace(ch) )
					break;
		}
		if( *ref == 0 )
			break;
		if( *ref == '<' )
			tag = ref;

		if( (len = attrWithURL(ref,tag)) == 0 ){
			str = ref + 1;
			continue;
		}
		p = ref + len;

		while( isspace(*p) )
			p++;

		switch( *p ){
			case 0:   goto push;
			case '=': p++; break;
			default:  str = ref + 1; continue;
		}

		while( isspace(*p) )
			p++;

		if( *p == '"' || *p == '\'' )
			p++;
push:
		if( *p == 0 && rem != NULL ){
			strcpy(rem,ref);
			*ref = 0;
			top = NULL;
			goto exit;
		}
		top = p;
		goto exit;
	}
exit:
	if( tagp != NULL )
		*tagp = tag;
	return top;
}

/*
 *	TRANSFORM delegated-URL to NORMAL URL:
 *	Delagation information embedded in the URL is removed, and paresd.
 *	"url" string passed from caller will be over wrote.
 */
static char *printFlags(s)
	char *s;
{
	return Sprintf(s,"=%s=",DELEGATE_FLAGS);
}
char *endofHOSTPORT = "/? \t\r\n";

url_rmprefix(proto,prefix)
	char *proto,*prefix;
{	char *p;
	int len;
	char dch;

	prefix[0] = 0;
	if( strstr(proto,NDGU_MARK) == proto ){
		p = proto + strlen(NDGU_MARK);
		dch = *p;
		if( dch == '=' || dch == '/' ){
			for( p++; *p; p++ ){
				if( *p == dch ){
					len = p - proto + 1;
					strncpy(prefix,proto,len);
					prefix[len] = 0;
					strcpy(proto,p+1);
					break;
				}
			}
		}
	}
}

isLoadableURL(url)
	char *url;
{
	if( strncasecmp(url,"ftp://",6) == 0
	 || strncasecmp(url,"file:/",6) == 0
	 || strncasecmp(url,"data:",5) == 0
	 || strncasecmp(url,"builtin:",8) == 0
	 || strncasecmp(url,"http://",7) == 0 )
		return 1;
	return 0;
}

fromProxyClient(url)
	char *url;
{	int from_proxy = 0;

	if( strncasecmp(url,"http://",  7) == 0 ) return 1;
	if( strncasecmp(url,"nntp://",  7) == 0 ) return 1;
	if( strncasecmp(url,"wais://",  7) == 0 ) return 1;
	if( strncasecmp(url,"ftp://",   6) == 0 ) return 1;
	if( strncasecmp(url,"gopher://",9) == 0 ) return 1;

	if( strstr(url,"://") != NULL )
		if( strstr(url,NDGU_MARK) == NULL )
			from_proxy = 1;
	return from_proxy;
}
is_redirected_url(url)
	char *url;
{
	if( strstr(url,ODGU_MARK) ) return 1;
	if( strstr(url,NDGU_MARK) ) return 1;
	return 0;
}
is_redirected_selector(sel)
	char *sel;
{
	if( strncmp(sel,NDGU_MARK,strlen(NDGU_MARK)) == 0 )
		return 1;
	return 0;
}

static char *scan_flags(np,tp,flags)
	char *np,*tp,*flags;
{	char *fp;

	if( *np == '+' || *np == '-' || *np == '=' ){
		if( fp = (char*)strchr(np+1,'=') ){
			*fp = 0;
			switch( *np ){
				case '+': onoff_flags(flags,np+1,1); break;
				case '-': onoff_flags(flags,np+1,0); break;
				case '=': strcpy(flags,np+1); break;
			}
			strcpy(tp,fp+1);
			np = tp;
		}
	}
	return np;
}

static put_gtype(sel,gtype,toproxy)
	char *sel;
{	char ssel[URLSZ];

	if( !toproxy  || gtype == '7' ){
		if( gtype==' ' || gtype=='\t' || gtype=='\r' || gtype=='\n' )
			gtype = '1';
		strcpy(ssel,sel);
		sprintf(sel,"(:%c:)%s",gtype,ssel);
	}
}
get_gtype(gsel,sel)
	char *gsel,*sel;
{	int gtype;
	char path[1024];

	if( gsel[0]=='(' && gsel[1]==':' && gsel[3]==':' && gsel[4]==')' ){
		gtype = gsel[2];
		if( sel ) strcpy(sel,gsel+5);
	}else{
		gtype = gsel[0];
		if( gtype=='\n' || gtype=='\r' || gtype=='\t' || gtype==0 )
			gtype = '1';
		else
		if( sscanf(gsel,"%s",path) && path[strlen(path)-1] == '/' )
			gtype = '1';
		else
		if( !strchr("0123456789gIT",gtype) )
			gtype = '9';

		if( sel ) strcpy(sel,gsel);
	}
	return gtype;
}


extern char MODIFIERS[];
static scan_modifier1(mod1,flags)
	char *mod1,*flags;
{
	if( strncmp(mod1,"cc.",3) == 0 )
		scan_CODECONV(mod1+3,CCV_TOCL,1);
	else
	if( strncmp(mod1,"cs.",3) == 0 )
		scan_CODECONV(mod1+3,CCV_TOSV,1);
	else
	if( mod1[0] == 'F' )
		strcpy(flags,mod1+1);
	return 0;
}
static scan_modifiers(mods,flags)
	char *mods,*flags;
{
	strcpy(MODIFIERS,mods);
	scan_commaList(mods,0,scan_modifier1,flags);
}

url_derefer(cproto,url,modifiers,flags,proto,host,iportp)
	char *cproto,*url,*modifiers,*flags,*proto,*host;
	int *iportp;
{	char *hp,*tp,*np;
	char flagsb[256];
	char gtype;
	int ni;

	if( tp = strstr(url,NDGU_MARK) ){
		char protob[URLSZ],port[URLSZ],urlh[URLSZ],*up;
		char modb[1024],*pb,*pp,ch;
		int len;

		if( tp[NDGU_MARK_LEN] == '/' && tp[NDGU_MARK_LEN+1] != '/' ){
			pb = modb;
			for( pp = tp + NDGU_MARK_LEN + 1; ch = *pp++; ){
				if( ch == '/' )
					break;
				*pb++ = ch;
			}
			*pb = 0;
			if( modifiers != NULL )
				strcpy(modifiers,modb);

			scan_modifiers(modb,flags);
			strcpy(tp+NDGU_MARK_LEN,pp);
		}else
		if( url < tp ){
			modb[sizeof(modb)-1] = 0;
			pb = &modb[sizeof(modb)-1];

			if( tp[-1] == ')' ){
			    for( pp = tp - 2; url <= pp; pp-- ){
				if( *pp == '(' ){
					strcpy(pp,tp);
					tp = pp;
					break;
				}
				*--pb = *pp;
			    }
			}else{
			    for( pp = tp - 1; url <= pp; pp-- ){
				if( *pp == '/' || isspace(*pp) ){
					strcpy(pp+1,tp);
					tp = pp + 1;
					break;
				}
				*--pb = *pp;
			    }
			}

			if( pp = strstr(pb,"-.-") ){
				*pp = 0;
				strcpy(urlh,tp);
				sprintf(tp,"%s:///%s%s",NDGU_MARK,pp+3,urlh);
			}
			if( modifiers != NULL )
				strcpy(modifiers,pb);
			scan_modifiers(pb,flags);
		}

		if( &url[1] < tp && strcaseeq(cproto,"http") )
			return 0;

		np = tp + strlen(NDGU_MARK);
		np = scan_flags(np,tp,flags);
		unescape_specials(np,":","//");

		port[0] = 0;
		if( strncmp(np,":///",4) == 0 ){
			protob[0] = 0;
			strcpy(host,"localhost");
			*iportp = SERVER_PORT();
			strcpy(tp,np+4);
			return 1;
		}
		if( strncmp(np,"://",3) == 0 )
			strcpy(np,np+1);

		ni = scan_protositeport(np,protob,host,port);

		if( ni == 2 || ni == 3 ){
			strcpy(proto,protob);
			up = urlh;
			up = Sprintf(up,"%s://%s",proto,host);
			if( proto[0] == 0 )
				strcpy(proto,cproto);
			if( ni == 2 )
				*iportp = serviceport(proto);
			else{	*iportp = atoi(port);
				up = Sprintf(up,":%s",port);
			}
			len = up - urlh;

			/* gopher://HP/G-_-gopher://...
			 * seems to no more be supported
			 */
			gtype = 0;
			/* skip "/Gtype" */
			if( streq(cproto,"gopher") && streq(proto,"gopher") ){
				if( np[len] == '/' ){
					len++;
					if( gtype = np[len] )
					if(strchr(endofHOSTPORT,gtype)==NULL){
						len++;
					}
				}
			}
			if( url < tp && tp[-1] == '/' && np[len] == '/' )
				len += 1;
			strcpy(tp,np+len);
			if( gtype )
				put_gtype(tp,gtype,0);
			return 1;
		}
	}

	if( (tp = strstr(url,ODGU_MARK)) == 0 )
		return 0;
	np = tp + strlen(ODGU_MARK);
	np = scan_flags(np,tp,flags);

	/*
	 *	Gopher		=@=gopher:H:P=Gtype
	 *		'Gtype' is used by Gopher/DeleGates who doesn't know
	 *		what type the requested infomation is.
	 *	Ftp/Gopher	=@=ftp:H:P=Gtype
	 *		'Gtype' may be used to determine whether P is a
	 *		directory or a flat file.
	 *	
	 */
	if( (ni = sscanf(np,"%[^:]:%[^:]:%d=%c",proto,host,iportp,&gtype)) == 4
	 || (ni = sscanf(np,"%[^:]:%[^=]=%c",   proto,host,&gtype)) == 3 )
	{
		if( ni == 3 )
			*iportp = serviceport(proto);
		if( hp = strpbrk(np+strlen(host),endofHOSTPORT) )
			strcpy(tp,hp);
		put_gtype(url,gtype,0);
		return 1;
	}

	/*
	 *	Genric     =@=proto:H:P
	 */
	ni = sscanf(np,"%[^:]:%[^:/? \t\r\n]:%d",proto,host,iportp);
	if( 2 <= ni ){
		if( ni == 2 )
			*iportp = serviceport(proto);
		if( hp = strpbrk(np+strlen(host),endofHOSTPORT) )
			strcpy(tp,hp);
		return 1;
	}

	/*
	 *	HTTP-Special /=@=:H:P
	 */
	ni = sscanf(np,":%[^:/? \t\r\n]:%d",host,iportp);
	if( 1 <= ni ){
		if( ni == 1 )
			*iportp = serviceport("http");
		if( hp = strpbrk(np+strlen(host),endofHOSTPORT) ){
			if( tp[-1] == '/' && hp[0] == '/' )
				strcpy(tp,hp+1);
			else	strcpy(tp,hp);
		}
		return 1;
	}

	proto[0] = host[0] = 0;
	return 0;
}

url_undelegate(urlxa)
	char *urlxa;
{	char *tp,*up;
	int len;
	char xvalues[URLSZ];
	char *xav[64];
	int xac;
	char *xflags,*xproto,*xhostport,*xgtype,*xpath,*xsearch;
	char *sp;
	char urlx[URLSZ];

	nonxalpha_unescape(urlxa,urlx,0);
        if( strncmp(urlx,ODGU_MARK,strlen(ODGU_MARK)) != 0 )
                return 0;

	tp = urlx;
	up = tp + strlen(ODGU_MARK);
	len = scan_urlx(up,xvalues);
	if( len <= 0 )
		return 0;

	xac = stoV(xvalues,64,xav,'\n');
	xflags    = getv(xav,"xflags");
	xproto    = getv(xav,"xproto");
	xhostport = getv(xav,"xhostport");
	xgtype    = getv(xav,"xtype");

	if( xhostport == 0 )
		return 0;

	strcpy(tp,up+len);

	if( xproto == 0 )
		xproto = "http";

	sp = strstr(urlx,"://");
	if( sp == 0 )
		return 0;

	sp += 3;
	while( *sp && !strchr("/ \t\r\n\"",*sp) )
		sp++;

	if( sp[0] == '/' && sp[1] == '/' )
		sp += 1;

	sprintf(urlxa,"%s://%s",xproto,xhostport);
/* nonxalpha_escape(sp,urlxa+strlen()); */
	strcat(urlxa,sp);
	return 1;
}

/*
 *  site = user:pass@host:port
 *  site = [ [ user [ : pass ] @ ] hostport ]
 *  unreserved = A-Z a-z 0-9 $-_.!~*'(), 
 *  user = *( unreserved | escaped | ;&=+ )
 *  pass = *( unreserved | escaped | ;&=+ )
 */
static char *scan_scheme(url,scheme)
	char *url,*scheme;
{
	*scheme = 0;
	sscanf(url,"%[^:]",scheme);
	return url + strlen(scheme);
}
static char *scan_site(url,site)
	char *url,*site;
{
	*site = 0;
/*
	sscanf(url,"%[-.A-Za-z0-9:@%%$_!~*'(),;&=+]",site);
*/
	sscanf(url,"%[-.A-Za-z0-9:@%%$_!~*'(),;&=+#]",site);
	return url += strlen(site);
}
scan_protosite(url,proto,site)
	char *url,*proto,*site;
{	int ni;
	char *sp;

	sp = scan_scheme(url,proto);
	if( strncmp(sp,"://",3) == 0 ){
		scan_site(sp+3,site);
		if( *site )
			return 2;
		else	return 1;
	}else	return 0;
}
scan_protositeport(url,proto,userpasshost,port)
	char *url,*proto,*userpasshost,*port;
{	char ch,*sp,*up,*pp;
	char site[1024];

	sp = url;
	if( *sp != '/' )
		sp = scan_scheme(sp,proto);
	if( *sp == ':' )
		sp++;

	if( strncmp(sp,"//",2) == 0 )
		sp += 2;
	else
	if( *sp == '/' )
		sp += 1; /* for IE4.0 */
	else	return 0;

	scan_site(sp,site);
	*userpasshost = 0;
	*port = 0;
	if( up = strchr(site,'@') ){
		if( pp = strchr(up,':') ){
			*pp++ = 0;
			strcpy(port,pp);
		}
		strcpy(userpasshost,site);
	}else{
		sscanf(site,"%[^:]:%s",userpasshost,port);
	}
	if( *port == 0 )
		return 2;
	else	return 3;
}

decomp_absurl(url,proto,login,upath)
	char *url,*proto,*login,*upath;
{	char *up,*bp,buf[1024];
	
	up = url;

	if( proto ) *proto = 0;
	if( login ) *login = 0;
	if( upath ) *upath = 0;

	bp = buf;
	while( *up && *up != ':' ){ *bp++ = *up++; } *bp = 0;
	if( proto ) strcpy(proto,buf);
	if( *up++ != ':' ) return;
	if( *up++ != '/' ) return;
	if( *up++ != '/' ) return;

	bp = buf;
	while( *up && *up != '/' ){ *bp++ = *up++; } *bp = 0;
	if( login ) strcpy(login,buf);
	if( *up++ != '/' ) return;

	if( upath ) strcpy(upath,up);
}

strip_urlhead(url,proto,login)
	char *url,*proto,*login;
{	char *dp,*op,protox[1024],loginx[1024];

	if( proto == NULL )
		proto = protox;
	if( login == NULL )
		login = loginx;

	op = proto;
	for( dp = url; *dp; dp++ ){
		if( !isalpha(*dp) )
			break;
		*op++ = *dp;
	}
	*op = 0;

	if( *dp != ':' )
		return;

	if( dp[1] != '/' || dp[2] != '/' ){
		strcpy(url,dp+1);
		return;
	}
	dp += 3;

	op = login;
	for(; *dp; dp++ ){
		if( *dp=='/' || *dp=='\r' || *dp=='\n' )
			break; 
		if( *dp == ' ' || *dp == '\t' ){
			while(*dp==' ' || *dp=='\t' )
				dp++;
			break;
		}
		*op++ = *dp;
	}
	*op = 0;

	if( *dp == '/' )
		strcpy(url,dp);
	else{
		if( *dp == 0 || *dp == '\r' || *dp == '\n' )
			sprintf(url,"/%s",dp);
		else	sprintf(url,"/ %s",dp);
	}
}


/*
 *	EXPAND PARTIAL HTTP-URL TO FULL SPEC URL:
 *	Absolute path in URL which have no http://H:P should be expanded to
 *	full description of URL, that is with http://HOST:PORT.
 *	Relative path will be expanded with http:H:P in the HTTP clients.
 */
char *HostPort(hostport,proto,host,port)
	char *hostport,*proto,*host;
{
	if( serviceport(proto) != port )
		sprintf(hostport,"%s:%d",host,port);
	else	strcpy(hostport,host);
	return hostport;
}

#define isSchemeChar(ch)	(isalnum(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')

isFullURL(url)
	char *url;
{	char *up,ch,proto[128],*p;

	if( !isSchemeChar(url[0]) )
		return 0;

	p = proto;
	for( up = url; isSchemeChar(ch = *up); up++ )
		*p++ = ch;
	*p = 0;

	if( up[0] == ':' ){
		if( up[1] == '/' && up[2] == '/' )
			return 1;
		if( localPathProto(proto) && isFullpath(&up[1]) )
			return 1;
		if( streq(proto,"news") )
			return 1;
		if( streq(proto,"mailto") )
			return 1;
	}
	return 0;
}

extern char *changeproxy_url_to();
char *changeproxy_url(clif,method,url,proxy)
	char *clif,*method,*url,*proxy;
{	char *opt,*mark;

	if( opt = changeproxy_url_to(clif,method,url,proxy) )
		return opt;

	if( url[0] == '/' )
	if( (mark = NDGU_MARK) && strncmp(url+1,mark,strlen(mark)) == 0
	 || (mark = ODGU_MARK) && strncmp(url+1,mark,strlen(mark)) == 0 )
		return changeproxy_url_to(clif,method,url+1+strlen(mark),proxy);

	return NULL;
}

static setBASE(referer,url)
	Referer *referer;
	char *url;
{	char values[URLSZ],*av[64],*v1,*ap,*dp;
	int len;
	char burl[2048];

	if( referer->r_altbuf == NULL )
		return;

	if( (len = scan_url1(url,values)) <= 0 )
		return;

	strncpy(burl,url,len);
	burl[len] = 0;
	sv1log("<BASE HREF=%s>\n",burl);

	stoV(values,64,av,'\n');
	ap = referer->r_altbuf;
	if( v1 = getv(av,"proto")){
		referer->r_proto = ap;
		strcpy(ap,v1);
		ap += strlen(ap) + 1;
	}
	if( v1 = getv(av,"host") ){
		referer->r_host = ap;
		strcpy(ap,v1);
		ap += strlen(ap) + 1;
	}
	if( v1 = getv(av,"port") )
		referer->r_port = atoi(v1);

	if( v1 = getv(av,"path") ){
		referer->r_path = ap;
		strcpy(ap,v1);
		ap += strlen(ap) + 1;

		referer->r_base = ap;
		strcpy(ap,referer->r_path);
		if( dp = strrchr(ap,'/') )
			dp[1] = 0;
		else	ap[0] = 0;
		ap += strlen(ap) + 1;
	}
}
static getBASE(referer,myhp,proto,hostport,host,port,base)
	Referer *referer;
	char **myhp,**proto,**hostport,**host,**base;
	int *port;
{
	*myhp = referer->r_my_hostport;
	*proto = referer->r_proto;
	*hostport = referer->r_hostport;
	*host = referer->r_host;
	*port = referer->r_port;
	*base = referer->r_base;
}

url_relative(relurl,absurl,baseurl)
	char *relurl,*absurl,*baseurl;
{	int ui,nsl;
	char *rp;

	nsl = 0;
	for( ui = 0; absurl[ui] && baseurl[ui]; ui++ ){
		if( absurl[ui] != baseurl[ui] )
			break;
		if( absurl[ui] == '/' )
			nsl++;
		if( nsl == 3 )
			break;
	}
	if( nsl == 3 ){
		relurl[0] = 0;
		return;
	}
	strcpy(relurl,absurl);
}

url_absolute(myhp,proto,host,port,base,line,xline)
	char *myhp,*proto,*host,*base,*line,*xline;
{	Referer referer;
	char hostport[128];

	referer.r_my_hostport = myhp;
	referer.r_hostport = HostPort(hostport,proto,host,port);
	referer.r_proto = proto;
	referer.r_host = host;
	referer.r_port = port;
	referer.r_base = base;
	referer.r_altbuf = NULL;
	url_absoluteS(&referer,line,xline);
}
url_absoluteS(referer,line,xline)
	Referer *referer;
	char *line,*xline;
{	char *myhp;
	char *proto;
	char *host;
	int   port;
	char *base;
	char *hp,hostportb[256];
	char *sp,*np,*xp;
	int ch;
	char *tagp;

	getBASE(referer,&myhp,&proto,&hp,&host,&port,&base);

	sp = line;
	xp = xline;

	while( np = html_nexturl(sp,NULL,&tagp) ){
		if( referer->r_altbuf != NULL && tagp != NULL && isBASE(tagp) ){
			setBASE(referer,np);
			getBASE(referer,&myhp,&proto,&hp,&host,&port,&base);
		}

		ch = np[0];
		np[0] = 0;
		strcpy(xp,sp);
		xp += strlen(xp);
		np[0] = ch;
		sp = np;

		if( strncasecmp(np,"nntp://-.-/",11) == 0 ){
			sp += 11;
			sprintf(xp,"nntp://%s/",myhp);
		}else
		if( strncasecmp(np,"http://-.-/",11) == 0 ){
			sp += 11;
			sprintf(xp,"http://%s/",myhp);
		}else
		if( isFullURL(np) ){
		}else
		if( strncasecmp(np,"http:/",6) == 0 ){
		    if( np[6] != '/' ){
			sp += 6;
			HostPort(hostportb,"http",host,port);
			sprintf(xp,"http://%s/",hostportb);
		    }
		}
		else
		if( ch != '/' && streq(proto,"ftp") )
		{
			/* Relay ftp to the proxy server for non-proxy client,
			 * who see current protocol as HTTP, thus will not
			 * make automatic expansion of relative URL of ftp type.
			 */
			if( ch == '.' && np[1] == '/' )
				sp += 2;
			strcpy(xp,base);
		}
		else
		if( ch == '/' && np[1] != '/' ){
			/* Absolute path without host:port. This will be cause
			 * ignoreing =@=:realhost:realport part in the current
			 * page's URL
			 */
			sp += 1;
			sprintf(xp,"%s://%s/",proto,hp);
		}
		else
		if( ch == '/' && np[1] == '/' ){ /* with host:port */
			sp += 2;
			sprintf(xp,"%s://",proto);
		}
		else
		if( FULL_URL )
		{
			sprintf(xp,"%s://%s",proto,hp);
			if( *base != '/' )
				strcat(xp,"/");
			strcat(xp,base);
		}

		xp += strlen(xp);
	}
	strcpy(xp,sp);
}

/*
 *	TRANSFORM URL TO delegated-URL
 *	This function assumes that URLs in the "line" is in FULL-SPEC
 *	format of URL without omittion of protocol-name nor host-port field.
 */
url_delegateS(referer,src,dst,dgrelay)
	Referer *referer;
	char *src,*dst;
	char *dgrelay;
{
	url_delegate(src,dst,
		referer->r_my_proto,
		referer->r_my_host,
		referer->r_my_port,
		referer->r_my_path,
		dgrelay);
}
url_delegate(line,xline,myproto,myhost,myport,mypath,dgrelay)
	char *line,*xline;
	char *myproto,*myhost;
	int myport;
	char *mypath;
	char *dgrelay;
{	char *sp,*np,*xp;
	URLStr rurl;
	int ulen;
	int ch;

	sp = line;
	xp = xline;

	while( np = html_nexturl(sp,NULL,NULL) ){
		ch = *np;
		*np = 0;

		strcpy(xp,sp); xp += strlen(xp);
		*np = ch;
		sp = np;

		if( ulen = url_rurl(np,rurl,myproto,myhost,myport,mypath,dgrelay) )
		if( strncmp(sp+ulen,ODGU_MARK,strlen(ODGU_MARK)) != 0 )
		if( strncmp(sp+ulen,NDGU_MARK,strlen(NDGU_MARK)) != 0 )
		{
			strcpy(xp,rurl);
			sp += ulen;
			xp += strlen(xp);
		}
	}
	strcpy(xp,sp);
}
scan_url(line,func,arg1,arg2)
	char *line;
	int (*func)();
	char *arg1,*arg2;
{	char *sp,*np,*tp,tc;
	int ulen;

	sp = line;
	while( np = html_nexturl(sp,NULL,NULL) ){
		if( tp = strpbrk(np," \t\r\n\">") ){
			ulen = tp - np;
			tc = *tp;
			*tp = 0;
			(*func)(np,arg1,arg2);
			*tp = tc;
			sp = np + ulen;
		}else	break;
	}
}


/*
 *	delegated-URL SYNTHESIZER
 *	Given "attrs" is a NL-separated list of NAME=VALUEs.  This is a
 *	output format of URL parser in the SLL library.
 */

static char *delegate_url(url,attrs,ourl,olen,dgrelay)
	char *url;
	char *attrs;
	char *ourl;
	char *dgrelay;
{	URLStr abuf;
	char *av[64]; int ac;
	char *up;
	char *proto,*val;
	char *hostport,*delegate;
	char *dproto;
	char *path,xpath[URLSZ];
	char *search;
	char *gselector;
	char  oURLbuf[URLSZ];

	strcpy(abuf,attrs);
	ac = stoV(abuf,64,av,'\n');

	if( mount_url_from(url,av) )
		return url + strlen(url);

	if( dgrelay == NULL )
		return 0;

	if( (proto = getv(av,"proto")) == 0 )
		return 0;

	if( callback_it(proto) == 0 )
		return 0;

	dproto = getv(av,"dproto");
	if( dproto == NULL )
		dproto = "http";
	delegate = getv(av,"delegate");
	if( delegate == 0 )
		return 0;

	hostport = getv(av,"hostport");
	if( hostport == 0 )
		return 0;

	path = getv(av,"path");
	if( streq(proto,"news") )
		return 0;
	if( streq(proto,"telnet") )
		return 0;

/*
	if( !isRELAYABLE(dgrelay,proto,hostport) )
		return 0;
*/
	if( !isREACHABLE(proto,hostport) )
		return 0;

	if( streq(proto,dproto) )
	if( delegate && hostport && streq(delegate,hostport) )
		return 0; /* no rewriting is necessary */

	if( path && nonxalpha_unescape(path,xpath,1) )
		path = xpath;

	gselector = 0;
	search = getv(av,"search");

	strncpy(oURLbuf,ourl,olen);
	oURLbuf[olen] = 0;

	up = url;
	up = Sprintf(up,"%s://",dproto);

	if( !GOPHER_ON_HTTP && streq(proto,"gopher") ){
		up = Sprintf(url,"gopher://");
		gselector = getv(av,"path");
		if( gselector == 0 || *gselector == 0 )
			gselector = "1";
	}

	up = Sprintf(up,"%s",delegate);
	if( gselector )
		up = Sprintf(up,"/%c",*gselector);
	else	up = Sprintf(up,"/");

	if( strncmp(ourl,url,strlen(url)) == 0 ){
		/* is this right ?  doesn't it suppress necessary one ? */
		/*Verbose("####### DON'T MAKE DUPLICATE REWRITE: %s\n",url);*/
		return 0;
	}

/*
if( MODIFIERS[0] && up[-1] == '/' )
	up = Sprintf(up,"%s",MODIFIERS);
else
if( DELEGATE_FLAGS[0] )
if( up[-1] == '/' )
	up = Sprintf(up,"F%s",DELEGATE_FLAGS);
else	up = Sprintf(up,"(F%s)",DELEGATE_FLAGS);
*/

	up = Sprintf(up,"%s",NDGU_MARK);

/* if( DELEGATE_FLAGS[0] ) up = printFlags(up); */
if( MODIFIERS[0] )
up = Sprintf(up,"/%s/",MODIFIERS);

	up = Sprintf(up,"%s",oURLbuf);
	return up;
}
delegate_url0(durl,url,myhost,myport)
	char *durl,*url,*myhost;
{
	sprintf(durl,"http://%s:%d/%s%s",myhost,myport,NDGU_MARK,url);
}
char *delegate_selector(xselector,host,iport,gtype)
	char *xselector,*host,*iport;
{	char dgopher[1024];
	char tmp[1024];
	char *dp;

	if( mount_selector_from(xselector,host,iport,gtype) )
		return;

	dp = Sprintf(dgopher,NDGU_MARK);
	if( DELEGATE_FLAGS[0] )
		dp = printFlags(dp);

	dp = Sprintf(dp,"gopher://%s:%d/%c",host,iport,gtype?gtype:'1');
	strcpy(tmp,xselector);
	sprintf(xselector,"%s%s",dgopher,tmp);
}

/* char *url_file2ftp(line,xline)
	char *line,*xline;
{	char *lp,*np;

	strcpy(xline,line);
	for( lp = xline; np = html_nexturl(lp,NULL,NULL); lp = np+1 ){
		if( strncasecmp(np,"file://",7) == 0 )
			sprintf(np,"ftp://%s",np+7);
	}
	return xline;
}
*/

char *file_hostpath(url,proto,login)
	char *url,*proto,*login;
{	char protobuf[128],hostbuf[128],*path;

	if( strchr(url,':') == NULL )
		return NULL;

	if( proto == NULL )
		proto = protobuf;
	proto[0] = 0;

	if( login == NULL )
		login = hostbuf;
	login[0] = 0;

	sscanf(url,"%[a-zA-Z0-9]",proto);
	if( !localPathProto(proto) )
		return NULL;

	path = url + strlen(proto);
	if( path[0] != ':' )
		return NULL;
	path += 1;

	if( strncmp(path,"//",2) == 0 ){
		path += 2;
		if( path[0] == '/' )
			strcpy(login,"localhost");
		else{
			sscanf(path,"%[^/]",login);
			path += strlen(login);
		}
	}
	return path;
}



/*
 *	SCAN A URL AND EXPANDS IT TO A delegated-URL
 */

#include "SLL.h"
extern SLL_putval();
extern SLLRule URL[];

url_rurl(url,rurl,dproto,dhost,dport,dpath,dgrelay)
	char *url,*rurl,*dproto,*dhost,*dpath;
	char *dgrelay;
{	char *nurl;
	char values[URLSZ];
	char *vp,*proto,*rp;
	char *tail;
	int len;

	if( strncmp(url,"!-_-",4) == 0 ){
		strcpy(rurl,url+4);
		return strlen(url);
	}

	if( reserve_url() )
		return 0;

	nurl = url;
	vp = values;
	values[0] = 0;

	*rurl = 0;
	if( SLLparse(0,URL,url,&nurl,SLL_putval,vp,URLSZ,&vp) == 0 ){
		len = nurl - url;
		if( dproto && dproto[0] )
			vp = Sprintf(vp,"dproto=%s\n",dproto);

		if( dhost && dhost[0] ){
			char hostport[128];
			if( dproto && dproto[0] )
				HostPort(hostport,dproto,dhost,dport);
			else	sprintf(hostport,"%s:%d",dhost,dport);
			vp = Sprintf(vp,"delegate=%s%s\n",hostport,dpath);
		}
		if((tail = delegate_url(rurl,values,url,len,dgrelay)) == 0)
			return 0;
		return len;
	}
	return 0;
}

/*
 *	SCAN A URL-EXTENTION
 */
extern SLLRule URLX[];

putv(t,n,l,vb)
	char *t,*n,*vb;
{	char buf[1024];

	strncpy(buf,n,l);
	buf[l] = 0;
	printf("%s=%s\n",t,buf);
}

scan_url1(url,values)
	char *url;
	char *values;
{	char *nurl;
	char *vp;

	nurl = url;
	vp = values;
	values[0] = 0;
	if( SLLparse(0,URL,url,&nurl, SLL_putval,vp,URLSZ,&vp ) == 0 )
		return nurl - url;
	return 0;
}
scan_urlx(urlx,values)
	char *urlx;
	char *values;
{	char *nurlx;
	char *vp;

	nurlx = urlx;
	vp = values;
	values[0] = 0;
	if( SLLparse(0,URLX,urlx,&nurlx, SLL_putval,vp,URLSZ,&vp ) == 0 )
		return nurlx - urlx;
	return 0;
}


/*
 *	URL SYNTAX TABLES FOR SLL LIBRARY
 */

static char DIGIT[] = "0123456789";
static char ALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
";

/*
static char NALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
$-_.&+\
!*'();, \
";
*/
static char NALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
$-_.&+\
!*'();,\
";

static char XALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
$-_.&+\
!*'():;, %\
";

static char YALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
$-_@.&+\
!*'():;, %\
";

ISRULE( URL	);
ISRULE( HTTP	);
ISRULE( GOPHER	);
ISRULE( FTP	);
ISRULE( FILEP	);
ISRULE( NEWS	);
ISRULE( NNTP	);
ISRULE( WAIS	);

/*
ISRULE( AFS	);
ISRULE( MAILTO	);
ISRULE( TELNET	);
ISRULE( GENERIC);
*/

ISRULE( HOSTPORT);
ISRULE( PATH);
ISRULE( SEARCH);

ALT(URL)
	{ "proto",	"http",		HTTP,		IGNCASE|PUTGATE},
	{ "proto",	"gopher",	GOPHER,		IGNCASE|PUTGATE},
	{ "proto",	"ftp",		FTP,		IGNCASE|PUTGATE},
	{ "proto",	"file",		FILEP,		IGNCASE|PUTGATE},
	{ "proto",	"news",		NEWS,		IGNCASE|PUTGATE},
	{ "proto",	"nntp",		NNTP,		IGNCASE|PUTGATE},
	{ "proto",	"wais",		WAIS,		IGNCASE|PUTGATE},
/*
	{ "proto",	"afs://",	AFS,		IGNCASE|PUTGATE},
	{ "proto",	"mailto::",	MAILTO,		IGNCASE|PUTGATE},
	{ "proto",	"telnet:",	TELNET,		IGNCASE|PUTGATE},
	{ "proto",	IMM,		GENERIC,	IGNCASE|PUTGATE},
*/
END

SEQ(HTTP)
	{ "://",	"://",		NEXT		},
	{ "hostport",	IMM,		HOSTPORT,	PUTVAL},
	{ "path",	"/",		PATH,		OPTIONAL|PUTVAL},
	{ "search",	"?",		SEARCH,		OPTIONAL|PUTVAL},
END

ISRULE( IALPHA );
ISRULE( DIGITS );
ISRULE( ALPHAS );
ISRULE( NALPHAS);
ISRULE( XALPHAS);
ISRULE( YALPHAS);

SEQ(HOSTNAME)
	{ "name",	IMM,		IALPHA		},
	{ "name",	".",		HOSTNAME,	OPTIONAL},
END
SEQ(HOSTNUMBER)
	{ "num1",	IMM,		DIGITS		},
	{ "num2",	".",		DIGITS		},
	{ "num3",	".",		DIGITS		},
	{ "num4",	".",		DIGITS		},
END
ALT(HOST)
	{ "name",	IMM,		HOSTNAME	},
	{ "number",	IMM,		HOSTNUMBER	},
END
SEQ(PORT)
	{ "number",	IMM,		DIGITS		},
END
SEQ(HOSTPORT)
	{ "host",	IMM,		HOST,		PUTVAL},
	{ "port",	":",		PORT,		OPTIONAL|PUTVAL},
END

SEQ(IALPHA)
	{ "alpha",	ALPHA,		NEXT,		CHARSET	},
	{ "xalphas",	IMM,		NALPHAS,	OPTIONAL},
END

SEQ(SEARCH1)
	{ "search",	IMM,		XALPHAS,	},
	{ "search",	"+",		SEARCH,		OPTIONAL},
END
SEQ(SEARCH)
	{ "search",	IMM,		SEARCH1,	OPTIONAL},
END

SEQ(ALPHAS)
	{ "alpha",	ALPHA,		NEXT,		CHARSET},
	{ "alpha",	IMM,		ALPHAS,		OPTIONAL},
END
SEQ(NALPHAS)
	{ "nalpha",	NALPHA,		NEXT,		CHARSET},
	{ "nalpha",	IMM,		NALPHAS,	OPTIONAL},
END
SEQ(XALPHAS)
	{ "xalpha",	XALPHA,		NEXT,		CHARSET},
	{ "xalpha",	IMM,		XALPHAS,	OPTIONAL},
END
SEQ(YALPHAS)
	{ "yalpha",	YALPHA,		NEXT,		CHARSET},
	{ "yalpha",	IMM,		YALPHAS,	OPTIONAL},
END

SEQ(PATH1)
	{ "name",	IMM,		YALPHAS,	OPTIONAL},
	{ "dir",	"/",		PATH,		OPTIONAL},
END

ALT(PATH)
	{ "path",	IMM,		PATH1		},
	{ "nullpath",	IMM,		SUCCESS		},
END


SEQ(USERPASS)
	{ "user",	IMM,		XALPHAS,	PUTVAL},
	{ "pass",	":",		XALPHAS,	OPTIONAL|PUTVAL},
	{ "@",		"@",		SUCCESS		},
END
SEQ(LOGIN)
	{ "userpass",	IMM,		USERPASS,	OPTIONAL|PUTVAL},
	{ "hostport",	IMM,		HOSTPORT,	PUTVAL},
END
SEQ(FTP)
	{ "login",	"://",		LOGIN,		PUTVAL	},
	{ "path",	"/",		PATH,		OPTIONAL|PUTVAL},
END

SEQ(FILEH)
	{ "host",	IMM,		HOST,		OPTIONAL|PUTVAL	},
	{ "path",	"/",		PATH,		OPTIONAL|PUTVAL	},
END
ALT(FILEP)
	{ "file",	"://",		FILEH		},
	{ "path",	":",		PATH,		PUTVAL	},
END

ALT(GROUP1)
	{"name",	".",		GROUP1		},
	{"name",	IMM,		SUCCESS		},
END
SEQ(GROUP)
	{"name",	IMM,		IALPHA		},
	{"name",	IMM,		GROUP1,		OPTIONAL},
END
SEQ(ARTICLE)
	{"serial",	IMM,		XALPHAS		},
	{"domain",	"@",		HOST		},
END
ALT(GROUPART)
	{"group",	IMM,		GROUP,		PUTVAL	},
	{"article",	IMM,		ARTICLE,	PUTVAL	},
END
SEQ(NEWS)
	{"groupart",	":",		GROUPART,	PUTVAL	},
END
SEQ(NNTP)
	{"hostport",	"://",		HOSTPORT,	PUTVAL	},
	{"group",	"/",		GROUP,		PUTVAL	},
	{"search",	"?",		SEARCH,		OPTIONAL|PUTVAL},
END

SEQ(DATABASE)
	{"database",	IMM,		XALPHAS,	},
END
SEQ(WAIS)
	{"hostport",	"://",		HOSTPORT,	PUTVAL	},
	{"database",	"/",		DATABASE,	PUTVAL	},
	{"search",	"?",		SEARCH,		OPTIONAL|PUTVAL},
END


ALT(SELECTOR)
	{ "selector",	IMM,		PATH,		},
END

ALT(GTYPE)
	{ "gtype",	DIGIT,		SUCCESS,	CHARSET},
	{ "nullgtype",	IMM,		SUCCESS		},
END

SEQ(GSELECTOR)
	{ "gtype",	IMM,		GTYPE,		PUTVAL},
	{ "selector",	IMM,		SELECTOR,	OPTIONAL|PUTVAL},
END

SEQ(GOPHER)
	{ "//",		"://",		NEXT		},
	{ "hostport",	IMM,		HOSTPORT,	PUTVAL},
	{ "path",	"/",		GSELECTOR,	OPTIONAL|PUTVAL},
	{ "search",	"?",		SEARCH,		OPTIONAL|PUTVAL},
END


ALT(DIGITS1)
	{ "digit",	DIGIT,		DIGITS1,	CHARSET	},
	{ "nondigit",	IMM,		SUCCESS		},
END
ALT(DIGITS)
	{ "digit",	DIGIT,		DIGITS1,	CHARSET	},
END

/*
 *
 */
SEQ(FLAGS1)
	{ "flags",	"=",		ALPHAS,		},
	{ "eoflags",	"=",		SUCCESS		},
END
SEQ(FLAGS2)
	{ "flags",	"+",		ALPHAS,		},
	{ "eoflags",	"=",		SUCCESS		},
END
SEQ(FLAGS3)
	{ "flags",	"(",		ALPHAS,		},
	{ "eoflags",	")",		SUCCESS		},
END
SEQ(FLAGS4)
	{ "flags",	"@",		ALPHAS,		},
	{ "eoflags",	"@",		SUCCESS		},
END
ALT(FLAGS)
	{ "f1",		IMM,		FLAGS1		},
	{ "f2",		IMM,		FLAGS2		},
	{ "f3",		IMM,		FLAGS3		},
	{ "f4",		IMM,		FLAGS4		},
END
SEQ(URLX)
	{ "xflags",	IMM,		FLAGS,		PUTVAL|OPTIONAL},
	{ "xproto",	IMM,		ALPHAS,		PUTVAL|OPTIONAL},
	{ "xhostport",	":",		HOSTPORT,	PUTVAL},
	{ "xgtype",	"=",		DIGITS,		PUTVAL|OPTIONAL},
END

/*
ISRULE(ROUTE);
ISRULE(HOSTLIST);

SEQ(ROUTE)
	{ "proto",	IMM,		ALPHAS,		PUTVAL},
	{ "host",	"://"		HOST,		PURVAL},
	{ "port",	":"		PORT,		PURVAL},
	{ "dstlist",	":"		HOSTLIST,	PURVAL},
	{ "dstlist",	":"		HOSTLIST,	PURVAL|OPTIONAL},
END

SEQ(HOSTLIST)
	{ "host",	IMM,		HOST,
END
*/
