/* ------------------------- REXX -------------------------- */
   lastmod='1996-08-21'
/* 96-08-21 or rework <table> tag error                      */
/* 96-08-03 or rework <table> tag due to "Kirchner Soft"     */
/* 96-08-02 or rework <table> tag due to pinkas@en.com       */
/* 96-07-16 or follow href=                                  */
/* 96-06-29 or rework                                        */
/* 96-04-17 or try to support <table>                        */
/* 96-01-15 or reworked                                      */
/* 95-07-10 or decode HTML files                             */
/* --------------------------------------------------------- */
/* call:    htm2txt infile  [l nn [p nn [e editor            */
/* output:  infile-name.TXT                                  */
/*                                                           */
/* recognised tags:                                          */
/*                                                           */
/*   all tags as supported by webex 1.1b                     */
/*                                                           */
/* special tags:                                             */
/*                                                           */
/*   <trace>                            trace '?i'           */
/*   <exit>                             exit immediately     */
/*                                                           */
/* recognised substitute variables see variable 'consts'     */
/*            tab-char '	' will be substituted by 10 blanks  */
/*                                                           */
/* notes:                                                    */
/*                                                           */
/*   all tags are converted as 'best fit'.                   */
/*   the image a browser produces will not be met.           */
/*                                                           */
/* --------------------------------------------------------- */
/* variables to be customized                                */
/*                                                           */
   linemax=72                /* maximum line length          */
   pixlbyt= 6                /* pixels per byte for tables   */
   editor ='e'               /* editor for output file       */
/* --------------------------------------------------------- */
/* constants as known of today                               */
/*                                                           */
   consts=       "&space '20'x"
   consts=consts "&nbsp  '20'x"
   consts=consts "&szlig   "
   consts=consts "&auml    "
   consts=consts "&Auml    "
   consts=consts "&ouml    "
   consts=consts "&Ouml    "
   consts=consts "&uuml    "
   consts=consts "&Uuml    "
   consts=consts "&aacute  "
   consts=consts "&eacute  "
   consts=consts "&iacute  "
   consts=consts "&#32   '20'x"
   consts=consts "&#58     :"
   consts=consts "&#60     <"
   consts=consts "&#62     >"
   consts=consts "&#91     ["
   consts=consts "&#93     ]"
   consts=consts "&#146    '"   /*              */
   consts=consts '&quot    "'
   consts=consts '&#145    "'   /*              */
   consts=consts '&#147    "'   /*              */
   consts=consts '&#148    "'   /*              */
   consts=consts "&#160  'a0'x"
   consts=consts "&#176    "   /*  EBCDIC !!!  */
   consts=consts "&#185    '"   /*              */
   consts=consts "&mdash   -"
   consts=consts "&lt      <"
   consts=consts "&gt      >"
   consts=consts "&amp   '00'x"
   consts=consts "&copy  '20'x"
/* --------------------------------------------------------- */
/* check input parameters                                    */
/*                                                           */
   parse upper arg ifiname options
   if ifiname='' then                              exit 4

   parse var ifiname fn '.' ext
   if ext='' then ifiname=ifiname'.HTM'
   ofiname = fn'.TXT'
/* --------------------------------------------------------- */
/* check for options                                         */
/*                                                           */
   do while options \= ''
     parse upper var options opt val options
     select
       when opt='L' then linemax=val
       when opt='P' then pixlbyt=val
       when opt='E' then editor =val
       otherwise nop
       end
     end
/* --------------------------------------------------------- */
/* some global controls                                      */
/*                                                           */
   hrf.0=1                   /* href-control                 */
   hrf.1=ifiname

   'erase' ofiname '2>NUL'

   call lineout ofiname,'HTM2TXT v.' lastmod
   call lineout ofiname,' '
   call lineout ofiname,'Extracted from' ifiname',' date()',' left(time(),5)
   call lineout ofiname,' '

   do nexthrf=1 while hrf.0>=nexthrf
     call process_file hrf.nexthrf
     end
   call lineout ofiname
   say

   if editor \= '' then 'start /F' editor ofiname /*  <=== edit result            */
/* --------------------------------------------------------- */
                                                      exit 0
/* --------------------------------------------------------- */
/* process a file                                            */
/*                                                           */
process_file: parse arg ifiname
/* --------------------------------------------------------- */
/* read infile                                               */
/*                                                           */
   nl ='0d'x                 /* new line character           */
   ifi=''
   say
   say 'reading' ifiname
   call stream ifiname,'c','close'
   do i=1 while chars(ifiname)>1
     ifi = ifi||linein(ifiname)||nl
     end
   call stream ifiname,'c','close'
   say i-1 'records read from' ifiname
/* --------------------------------------------------------- */
/* format outfile lines                                      */
/*                                                           */
   ofi.0=0                   /* out file controls            */
   dlspaces  =''             /* <DL>-spaces                  */
   lispaces  =''             /* <LI>-spaces                  */
   indents   =0              /* number of indents            */
   blanklines=0              /* number of blank lines        */
   linelen   =linemax        /* max. linelength              */
   outtext   =''             /* initial text                 */

                             /* switches:                    */
   swi_pre   = 0             /* switch PRE                   */
   swi_tbl   = 0             /* switch table definition      */
   swi_lst   = 0             /* switch list  definition      */
   swi_cnt   = 0             /* switch center text           */
   swi_cat   = 0             /* switch concatenate           */
   swi_trc   = 0             /* switch trace                 */

/* --------------------------------------------------------- */
/* scan input stream                                         */
/*                                                           */
   call charout ,'processing token '
   text=''

   do count=1 while length(ifi)>0

     call charout ,format(count,5) copies('08'x,6)

     if swi_trc then trace 'i'

                               /* check next line            */
     parse var ifi parttext '<' tag '>' ifi
                               /* process text               */
     select
       when swi_pre            then call process_preformatted
       when strip(parttext)=nl then nop
       otherwise               do
                                 do while pos(nl,parttext)>0
                                   parse var parttext a (nl) b
                                   parttext=strip(a) strip(b)
                                   end
                                 if swi_cat then text=text||parttext
                                 else do
                                   if text='' then text=     parttext
                                              else text=text parttext
                                   end
                                 end
       end
                               /* process tag                */
     tag=translate(tag,' ',nl)
     if left(tag,1)='!' then tag='!' substr(tag,2)
     parse var tag tag options
     tag=translate(tag)
     swi_cat=0

     select
       when tag='TRACE'    then  swi_trc=1
       when tag='EXIT'     then  signal finish

       when tag='!'        then  call out '***' options '***'
       when tag='FONT'     then  swi_cat=1
       when tag='UL',
       |    tag='OL',
       |    tag='DL',
       |    tag='DIR',
       |    tag='MENU',
                           then do
                                 call out text
                                 call out ' '
                                 lispaces=' * '
                                 indents=indents+1
                                 swi_lst=1
                                 end
       when tag='LI'       then  call out text
       when tag='DT'       then do
                                 call out text
                                 lispaces=' * '
                                 if indents>0 then indents=indents-1
                                 end
       when tag='DD'       then do
                                 call out text
                                 lispaces='    '
                                 indents=indents+1
                                 end
       when tag='/UL',
       |    tag='/OL',
       |    tag='/DL',
       |    tag='/DIR',
       |    tag='/MENU',
                           then do
                                 call out text
                                 lispaces=''
                                 if indents>0 then indents=indents-1
                                 call out ' '
                                 swi_lst=0
                                 end
       when tag='CENTER',
       |    tag='CENTRE',
                           then  swi_cnt=1
       when tag='/CENTER',
       |    tag='/CENTRE',
                           then do
                                 swi_cnt=0
                                 call out text
                                 end
       when tag='P',
       |    tag='/TITLE',
       |    tag='/CENTER',
       |    tag='/CENTRE',
                           then  call out text
       when tag='/HEAD',
                           then do
                                 call out text
                                 call out ' '
                                 end
       when tag='PRE'      then do
                                 swi_pre=1
                                 linelen=parmval('WIDTH',options)
                                 end
       when tag='/PRE'     then do
                                 swi_pre=0
                                 linelen=linemax
                                 end

       when tag='HR'       then call out copies('-',linelen)

       when tag='H1',
       |    tag='H2',
       |    tag='H3',
       |    tag='H4',
       |    tag='/H1',
       |    tag='/H2',
       |    tag='/H3',
       |    tag='/H4',
                           then do
                                 call out text
                                 call out ' '
                                 end
       when tag='A'        then do
                                 parse upper var options 'HREF' . '"' hrefid '"'
                                 nogo=      pos('#',hrefid)>0
                                 parse var hrefid z '.' fext
                                 nogo=nogo|(left(fext,3)\='HTM')
                                 parse var hrefid z 'FILE:' hrefid
                                 if hrefid='' then hrefid=z
                                 nogo=nogo|(strip(hrefid)='')
                                 do i=1 to hrf.0
                                   if hrf.i=hrefid then leave
                                   end
                                 if (i>hrf.0)&(\nogo) then do
                                   hrf.0=hrf.0+1; z=hrf.0; hrf.z=hrefid
                                   end
                                 end
/*
       when tag='IMG'      then do
                                 z=parmval('ALT',options)
                                 if z\=0 then do
                                   if swi_tbl then do
                                                   text=z
                                                   call save_table_text
                                                   end
                                   else text=text z
                                   end
                                 end
*/
       when tag='TABLE'    then do
                                 call out text
                                 call out ' '
                                 swi_tbl=1
                                 swi_wid=1
                                 tbwid. =0
                                 end
       when tag='TR'       then do
                                 tbcol=0
                                 tbmax=0
                                 drop tbtxt.
                                 end
       when tag='TD'       then do
                                       /* determine next column    */

                                 z=parmval('COLSTART',options)
                                 if z=0 then tbcol=tbcol+1
                                        else tbcol=z
                                 if tbmax<tbcol then tbmax=tbcol

                                       /* check for width= tag     */

                                 p=parmval('WIDTH',options)
                                 if p>0 then do
                                   select
                                     when right(p,3)='PIX' then do
                                       parse var p n 'PIX' .
                                       tbwid.tbcol=n%pixlbyt
                                       end
                                     when right(p,1)='%'   then do
                                       parse var p n '%' .
                                       tbwid.tbcol=(n*linelen)%100
                                       end
                                     otherwise
                                     if p>linemax then p=linemax
                                     tbwid.tbcol=p
                                     end
                                   end

                                       /* set lines/col to 0       */

                                 tblin.tbcol=0
                                 end

       when tag='/TD'      then do
                                 if swi_tbl then call save_table_text
                                 end

       when tag='/TR'      then do
                                 if swi_tbl then do

                                   /* col-width already done ?     */

                                   if swi_wid then do
                                     swi_wid=0

                                         /* check predefined col-width */

                                     colwi=0
                                     do i=1 to tbmax
                                       colwi=colwi+tbwid.i
                                       end
                                     linelen=linemax-colwi
                                     if linelen<=0 then linelen=linemax

                                         /* set col-width if not set   */

                                     do i=1 to tbmax
                                       if tbwid.i>0 then iterate
                                       tbwid.i=linelen%tbmax
                                       end
                                     linelen=linemax

                                         /* check sum colwid exceeds   */

                                     sum_col=0
                                     do i=1 to tbmax
                                       sum_col=sum_col+tbwid.i
                                       end
                                     if sum_col>linemax then do
                                       ratio=linemax/sum_col
                                       do i=1 to tbmax
                                         tbwid.i=trunc(tbwid.i/ratio)
                                         end
                                       end
                                     end

                                       /* get max nr. lines in row   */

                                   lnmax=1
                                   do i=1 to tbmax
                                     if lnmax<tblin.i then lnmax=tblin.i
                                     end

                                       /* fill uninitlzd variables   */

                                   do y=1 to lnmax
                                     do k=1 to tbmax
                                       tbtxt.k.y=strip(subs(tbtxt.k.y))
                                       if left(tbtxt.k.y,6)\='TBTXT.' then iterate
                                       if k=1 then tbtxt.k.y='_'
                                              else tbtxt.k.y=''
                                       end
                                     end

                                       /* scan all lines all cols    */

                                   do y=1 to lnmax
                                     anytxt=0
                                     do k=1 to tbmax
                                       if strip(tbtxt.k.y)='' then iterate
                                       anytxt=1
                                       leave
                                       end

                                     do while anytxt
                                         anytxt=0
                                       do k=1 to tbmax

                                         /* check length fits          */

                                         if length(tbtxt.k.y)>tbwid.k then do
                                           z=lastpos(' ',tbtxt.k.y,tbwid.k)
                                           if z=0 then do  /* give up    */
                                                         otext=tbtxt.k.y
                                                         tbtxt.k.y=''
                                                         end
                                                  else do  /* split text */
                                                         otext=left(tbtxt.k.y,z)
                                                         tbtxt.k.y=substr(tbtxt.k.y,z)
                                                         anytxt=1
                                                         end
                                           end
                                         else do
                                           otext=tbtxt.k.y
                                           tbtxt.k.y=''
                                           end
                                         tbtxt.1.y='_'

                                         /*  build output line       */

                                         text=text left(otext,tbwid.k)
                                         end

                                         /* all cols processed         */

                                       call out_table_text
                                       end
                                     end
                                   end
                                 end

       when tag='/TABLE'   then do
                                 blanklines=0
                                 call out ' '
                                 swi_tbl=0
                                 end
       when tag='BR'       then do
                                 if swi_lst then call out text
                                 if swi_tbl then call save_table_text
                                            else call out text
                                 end
       otherwise nop
       end
                               /* all finished               */
     end
/* --------------------------------------------------------- */
/* write outfile                                             */
/*                                                           */
finish:

   say
   do i=1 to ofi.0
     call lineout ofiname,ofi.i
     end
/* --------------------------------------------------------- */
                                                      return
/* ========================================================= */
/* --------------------------------------------------------- */
/* save table-text                                           */
/*                                                           */
   save_table_text:

     if strip(text)=''         then return

     tblin.tbcol=tblin.tbcol+1
     z=tblin.tbcol
     tbtxt.tbcol.z=text
     text=''
                                                      return
/* --------------------------------------------------------- */
/* out  table-text                                           */
/*                                                           */
   out_table_text:

     text = strip(text)
     if text \= '_' then call o text
     text = ''
                                                      return
/* --------------------------------------------------------- */
/* process preformatted                                      */
/*                                                           */
process_preformatted:

   do while length(parttext)>0
     parse var parttext outtext (nl) parttext
     call out outtext
     end
                                                    return
/* --------------------------------------------------------- */
/* extract parameter values                                  */
/*                                                           */
parmval: procedure; parse upper arg key,string

   z=pos(key,string)
   if z=0 then                                  return 0
   string=substr(string,z)
   parse var string '=' val  .
   val=translate(val,' ','"')
   val=translate(strip(val))
                                                return val
/* --------------------------------------------------------- */
/* do output lines                                           */
/*                                                           */
out:

   oli=subs(strip(arg(1)))
   oll=length(oli)
                     /* do not output more than 1 blank line */
   if oll=0 then do
     if blanklines>0 then                       return
     blanklines=blanklines+1
     end

   if linelen>0 then do
     do while oll>linelen
       z=lastpos(' ',oli,linelen)
       if z=0 then z=oll
       if (z>0) then do
                     call o left(oli,z)
                     oli=strip(substr(oli,z+1))
                     oll=length(oli)
                     end
       end
     end
   call o oli
   if oll>0 then blanklines=0
   text=''
                                                     return
o: procedure expose swi_cnt linelen indents dlspaces lispaces ofi.
                            parse arg ooo
   if swi_cnt then do
     z=(linelen-length(ooo))%2
     if z>0 then prefix=copies(' ',z)
            else prefix=''
     end
   else do
     prefix=copies(' ',indents)||lispaces||dlspaces
     end
   ofi.0=ofi.0+1; z=ofi.0; ofi.z=prefix||ooo
                                                     return
/* --------------------------------------------------------- */
/* substitute constants                                      */
/*                                                           */
   subs: procedure expose consts;

     l = arg(1)
                               /* check for tab chars        */
     t='09'x
     s=copies(' ',10)
     z=pos(t,l)
     do while z>0
       l=left(l,z-1)||s||substr(l,z+1)
       z=pos(t,l)
       end
                               /* check for variables        */
     z=pos('&',l)
     if z=0 then                              return l

     do while z > 0
       head  = left(l,z-1)
       token = substr(l,z)
       do i=1 to words(consts) by 2
         a=word(consts,i)
         b=length(a)
         c=left(token,b)
         d=word(consts,i+1)
         if right(d,2)="'x" then interpret "d="d
         if c=a then do
           head=head||d
           token=substr(token,b+2)
           leave
           end
         end
       if i>words(consts) then do
         token='?'substr(token,2)
         end
       l = head||token
       z=pos('&',l)
       end
                               return translate(l,'&','00'x)
/* --------------------------------------------------------- */
