(*$S-*)  (*no standard*)                                     (*VAX*)
(*$C+*)  (*range checking*)                                  (*VAX*)
program compress(input,output,inprog,outprog,incref,outcref,debug);

{reads a PASCAL program and produces a similar program with short variable
names (size MAXCH).  Does not check for short name uniqueness}


const
   ONLYDISAMBIGUATE = TRUE;  { if true, don't shorten the ID's, just
                             disambiguate them in the first MAXCH chars }
   MAXCH = 16;      { number of chars significant to IBM PASCAL }
   MAXVARLEN = 31;  { number of chars that are significant to SVS PASCAL }

   BLANK = ' ';
   ORD_TAB = 9;    {UNIX}
 { ORD_TAB = 9; }  (*VAX*)
 { ORD_TAB = 5; }  (*370*)
   (* these are bogus constants so we can test this program on itself *)
   AMBIGUOUSxxxx1 = 1;
   AMBIGUOUSxxxx2 = 1;
   ZZZZZZZZZZZZZZZ = 10;
   zzzzzzzzzzzzzzzz=12;
   zzzzzzzzzzzzzzzzz=13;
   zzzzzzzzzzzzzzzzzz=13;
   LONGNAMEEXAMPLEAAAA = 15;
   LNGNMXMP = 17;
   X = 100;
   XAAAAAAAAAAAAAAAAA = 100;

type
   VariableName = packed array[1..MAXVARLEN] of char;
   ChangePointer = ^change;
   change = record
      oldName,newName: VariableName;
      newNameSize: integer;    {size of new name}
      next: ChangePointer;
      end;
var
   TAB: char;
{  argString : packed array[1..100] of char;                        }(*BRKPAS*)
   debug,inprog,outprog,incref,outcref: text;
   ListStart: array[1..MAXVARLEN,'a'..'z'] of ChangePointer;
   sentinel: ChangePointer;
   debugging: boolean;           { TRUE if debugging }


function SizeOf(var v: VariableName): integer;

var
   i : integer;

begin
i := MAXVARLEN;
while v[i] = BLANK do
   i := i - 1;
SizeOf := i;
end;

function AlreadyUsed(var v: VariableName): boolean;

{tells if a name is already used as a result}

var
   i,nch: 0..MAXVARLEN;
   look: ChangePointer;
   found: boolean;


function SizeMatch(n1, n2 : integer) : boolean;
var ret : boolean;
begin
    if (n1 = n2) then begin
        ret := TRUE;
    end else if (n1 >= MAXCH) and (n2 >= MAXCH) then begin
        ret := TRUE;
    end else begin
        ret := FALSE;
    end;
    SizeMatch := ret;
end;

function MatchInFirstMAXCH(var v1, v2 : VariableName) : boolean;
var
    endIndex, i : integer;
    saveCh1, saveCh2 : char;
begin
    if debugging then writeln(output, 'match(', v1, ',', v2, ')');
    endIndex := MAXCH + 1;
    saveCh1 := v1[endIndex];
    saveCh2 := v2[endIndex];
    v1[endIndex] := 'X';
    v2[endIndex] := 'Y';
    i := 1;
    while v1[i] = v2[i] do begin
        i := i + 1;
    end;
    v1[endIndex] := saveCh1;
    v2[endIndex] := saveCh2;

    MatchInFirstMAXCH := (i = endIndex);
    if debugging then writeln(output, 'match ends; i = ', i, 
			       ' endIndex = ', endIndex);
end;


begin
    found := FALSE;
    nch := SizeOf(v);
    sentinel^.newName := v;
    sentinel^.newNameSize := nch;
    i := 0;
    while not found and (i < MAXVARLEN) do begin
        i := i + 1;
        look := ListStart[i,v[1]];
        while not SizeMatch(look^.newNameSize,nch) do begin
            look := look^.next;
        end;
        while not MatchInFirstMAXCH(look^.newName, v) do begin
            look := look^.next;
            while not SizeMatch(look^.newNameSize,nch) do begin
                look := look^.next;
            end;
        end;
        found := (look <> sentinel);
    end;
            
    if found and debugging then writeln(output,'this ID is already used: ',v);
    AlreadyUsed := found;
end;


procedure MakeNewName(var old,result: VariableName; nch: integer);

var
    i,j,k: integer;
    finished : boolean;
begin
    i := 0;                   {step 1; remove the underscores}
    for j := 1 to nch do begin
        if {(old[j] <>  '_') and} (old[j] <> '$') then begin
            i := i + 1;
            result[i] := old[j];
        end;
    end;

    (* clean out rest of result *)
    for j := i+1 to MAXVARLEN do begin
        result[j] := BLANK;
    end;

    (* finished := ONLYDISAMBIGUATE and then not AlreadyUsed(result); *)
    if ONLYDISAMBIGUATE then begin
        if AlreadyUsed(result) then begin
            writeln(' ** collision **',result,' already used');
            finished := FALSE;
        end else begin
            finished := TRUE;
        end;
    end else begin
        finished := FALSE;
    end;

    if finished then begin
        (* do nothing *)
    end else begin
        if (i > MAXCH) then begin  {step 2; if still too long remove the vowels}
            nch := i;              {but don't change first letter to prevent}
                                   {E100xyxyxy from going to 100XYXYXY}
            i := 1;
            for j := 2 to nch do begin
                if not (result[j] in ['a','e','i','o','u']) then begin
                    i := i + 1;
                    result[i] := result[j];
                end;
            end;
        end;

        for j := i+1 to MAXCH do begin
            result[j] := BLANK;
        end;

        if i > MAXCH then begin   {step 3; if still too long, truncate}
            k := MAXCH div 2;      {get first K characters}
            result[k+1] := result[i div 2 + 1];   {then one from the middle}
            for j := 1 to k-1 do begin                  {then k-1 from the end}
                result[j+k+1] := result[i-k+j+1];
            end;
        end;

        for i := MAXCH+1 to MAXVARLEN do begin   {blank fill}
            result[i] := BLANK;
        end;

        while AlreadyUsed(result) do begin   {step 4; dis-ambiguate if necessary}
            writeln(' ** collision **',result,' already used');
            k := MAXCH;                      {find a spot that can be incremented}
            while (result[k] in [BLANK,'z','9']) and (k>1) do begin
                k := k - 1;
            end;
            if k = 1 then begin         {no incrementable characters}
                write('***DRASTIC*** ',result:SizeOf(result));
                for j := 2 to MAXCH do begin
                    result[j] := 'a';
                end;
                writeln(' changed to ',result);
            end else begin
                result[k] := succ(result[k]);
            end;
        end;
    end;
end;

function LookUp(var v: VariableName; nch: integer) : ChangePointer;
var
    look: ChangePointer;
begin
    if debugging then begin
      writeln(debug); writeln(debug,'look up [',v,']');
    end;
    sentinel^.oldName := v;
    look := ListStart[nch,v[1]];
    if debugging then writeln(debug, 'try [', look^.oldName, ']');
    while (look^.oldName <> v) do begin
        look := look^.next;
        if debugging then writeln(debug, 'try [', look^.oldName, ']');
    end;
    LookUp := look;
end;


procedure ProcessVariable(var v: VariableName; var nch: integer);

{eats a variable name, puts it on the list, and returns a legal short ID}
var
   look: ChangePointer;
   found: boolean;
   result: VariableName;

begin
look := LookUp(v,nch);
found := (look <> sentinel);
if not found then begin
   if debugging then writeln(debug,'not found');
   new(look);
   look^.next := ListStart[nch,v[1]];
   ListStart[nch,v[1]] := look;
   look^.oldName := v;
   MakeNewName(v,result,nch);
   look^.newName := result;
   nch := SizeOf(result);
   look^.newNameSize := nch;
   v := result;
   end
else begin
   if debugging then writeln(debug,'found');
   v := look^.newName;
   nch := look^.newNameSize;
   end;
end;

procedure DumpTable;

var
   i: integer;
   c: char;
   look: ChangePointer;
begin
{   argv(4,argString); rewrite(outcref, argString);                 }(*BRKPAS*)
    rewrite(outcref, argv[5]^, buffered);                    {UNIX}
{   rewrite(outcref);                                             }(*VAX*)
    for i := MAXVARLEN downto 1 do begin
        for c := 'a' to 'z' do begin
            look := ListStart[i,c];
            while look <> sentinel do begin
                writeln(outcref,look^.oldName:i,'->',
                    look^.newName:look^.newNameSize);
                look := look^.next;
            end;
        end;
    end;
end;  

procedure GetNew(var ch: char);

{gets another character.  called after las one has been output}

begin
repeat                        {to get blank lines}
   if eoln(inprog) then begin
      writeln(outprog);
      read(inprog,ch);      {read the blank}
      end;
until (not eoln(inprog)) or eof(inprog);
if not eof(inprog) then
   read(inprog,ch);
end;


procedure GoTill(var ch:char; LookFor: char);

{eats input, copying it to output, till it sees a Lookfor.  It copies tha,
 then reads one more}
var col : integer;  { for debugging }

begin
    write(outprog,ch);
    if debugging then begin
      writeln(output, 'GoTill(', ch, ',', LookFor, ')');
      col := 1;
      write(output,ch);
      col := col + 1;
    end;
    while ch <> LookFor do begin
        GetNew(ch);
        write(outprog,ch);
	if debugging then begin
	  write(output,ch);
	  col := col + 1;
	  if (col mod 50 = 0) then writeln(output);
	end;
    end;
    GetNew(ch);
    if debugging then begin
      writeln(output);
      writeln(output, 'GoTill ends, returning <', ch, '>');
    end;
end;

procedure DoID(var ch: char);

{reads an identifier, plus one extra char.  treats it if necessary}

var
   i,j: integer;
   v: VariableName;
   OK: boolean;

begin
i := 1;
v[i] := ch;
OK := TRUE;
while OK and (not eoln(inprog)) do begin
   GetNew(ch);
   OK := ch in ['a'..'z','A'..'Z','_','$','0'..'9'];
   if OK then begin
      i := i + 1;
      if i <= MAXVARLEN then
         v[i] := ch;          {don't blow up on real long names}
      end;
   end;

if i > MAXVARLEN then
   i := MAXVARLEN;

for j := 1 to i do              {downshift identifier}
   if v[j] in ['A'..'Z'] then
      v[j] := chr(ord(v[j]) + ord('a') - ord('A'));

for j := i+1 to MAXVARLEN do           {blank fill}
   v[j] := BLANK;


ProcessVariable(v,i);
write(outprog,v:i);

if eoln(inprog) and OK then {we stopped since hit end of line, so get another}
   GetNew(ch);
end;

procedure ChangeProgram;

{eats a program and changes all the identifiers}

var
   ch: char; quoteCh : char;

begin
read(inprog,ch);
while not eof(inprog) do begin
   if ch = '{' then begin
      GoTill(ch,'}')
   end else if (ch = '"') or (ch = '''') then begin
      quoteCh := ch;
      write(outprog,quoteCh);
      if debugging then write(output,quoteCh);
      GetNew(ch);
      GoTill(ch,quoteCh);
   end else if ch = '(' then begin    {might be comment}
      write(outprog,ch);
      GetNew(ch);
      if ch = '*' then begin      {it is a comment} {bug.. (<CR>* counts}
         repeat
            GoTill(ch,'*')
         until ch = ')';
         write(outprog,ch);       {write the trailing paren}
         GetNew(ch);
         end;
      end
   else if ch in ['a'..'z','A'..'Z'] then
      DoID(ch)
   else begin
      write(outprog,ch);
      GetNew(ch);
      end;
   end;
end;

(* These words are now read from a table -- this
procedure is obsolete. *)
procedure DontChange(v: VariableName);

{tells what not to change.   These are in general long keywords}

var
   look: ChangePointer;
   nch : integer;

begin
    new(look);
    nch := SizeOf(v);
    look^.next := ListStart[nch,v[1]];
    ListStart[nch,v[1]] := look;
    look^.oldName := v;
    look^.newName := v;
    look^.newNameSize := nch;
end;

procedure ReadTable;
var
    v: VariableName;
    nch: integer;
    look: ChangePointer;
    found, trash : boolean;

function ReadCRFvar(var v:VariableName;var nch:integer):boolean;
var
    res : boolean;
    i, j : integer;
begin
    res := false;
    while not eof(incref) and
        ((incref^ in [BLANK,TAB]) or eoln(incref)) do begin
        get(incref);
    end;
    if not eof(incref) then begin
        if (incref^ in ['a'..'z','0'..'9','_','$']) then begin
            res := true;
            i := 1;
            while (incref^ in ['a'..'z','0'..'9','_','$']) do begin
                v[i] := incref^;
                get(incref);
                i := i + 1;
            end;
            nch := i - 1;
            for j := i to MAXVARLEN do begin
                v[j] := BLANK;
            end;
            if debugging then writeln(debug, 'ReadCRFvar[', v, ']');
        end else begin
            writeln(output,
                'bad incref char [',incref^,'] -- EOF assumed');
        end;
    end;
    ReadCRFvar := res;
end;

procedure SkipArrow;
begin
    if incref^ <> '-' then begin
        writeln(output, 'warning: [-] expected, [',
            incref^, '] found instead');
    end;
    get(incref);
    if incref^ <> '>' then begin
        writeln(output, 'warning: [>] expected, [',
            incref^, '] found instead');
    end;
    get(incref);
end;

begin                                                { ReadTable }
    { Add each name in file to the table.  We assume the table has
    been initialized already. }
    while ReadCRFvar(v,nch) do begin
        look := LookUp(v,nch);
        found := (look <> sentinel);
        if found then begin
            if debugging then writeln(debug,'found');
        end else begin
            if debugging then writeln(debug,'not found');
            new(look);
            look^.next := ListStart[nch,v[1]];
            ListStart[nch,v[1]] := look;
            look^.oldName := v;
        end;
        SkipArrow;
        trash := ReadCRFvar(v,nch);
        look^.newName := v;
        look^.newNameSize := nch;
    end;
{   close(incref);                                             }(*VAX*)
end;                                                { ReadTable }

procedure init;

{initializes table. PASCAL keywords are entered so something doesn't map
into a keyword}

var
   i: 0..MAXVARLEN;
   c: char;

begin
    TAB := chr(ORD_TAB);
{   if (argc <> 5) and (argc <> 6) then begin                }(*BRKPAS*)
{        writeln(output, 'wrong number of args -- quit');    }(*BRKPAS*)
{        halt;                                                    }(*BRKPAS*)
{   end;                                                    }(*BRKPAS*)
    if (argc <> 5) and (argc <> 6) then begin                   {UNIX}
         writeln(output, 'wrong number of args -- quit');       {UNIX}
         halt;                                                       {UNIX}
    end;                                                       {UNIX}
{   argv(1,argString); reset(inprog, argString);            }(*BRKPAS*)
    reset(inprog, argv[2]^, buffered);                           {UNIX}
{   reset(inprog);                                             }(*VAX*)
{   argv(2,argString); rewrite(outprog, argString);                }(*BRKPAS*)
    rewrite(outprog, argv[3]^, buffered);                           {UNIX}
{   rewrite(outprog);                                             }(*VAX*)
{   argv(3,argString); reset(incref, argString);                }(*BRKPAS*)
    reset(incref, argv[4]^, buffered);                              {UNIX}
{   reset(incref);                                             }(*VAX*)

    new(sentinel); sentinel^.next := nil;  { This should be the only nil
    in this structure!}
for i := 1 to MAXVARLEN do
   for c := 'a' to 'z' do
      ListStart[i,c] := sentinel;    {no ids of any length}
end;


procedure sccslcshorten;                 {UNIX}
  {sets up sccs string for release}      {UNIX}
  const                                  {UNIX}
    SCCSTR = '%W% %Y% %Q% %G%';          {UNIX}
  var                                    {UNIX}
    sccsid: string[100]; {for release}   {UNIX}
begin                                    {UNIX}
  sccsid := SCCSTR;                      {UNIX}
end { sccslcshorten };                   {UNIX}


begin {main routine}
    debugging := FALSE;                 
  { if argc = 6 then begin             }(*BRKPAS*)
  {   debugging := TRUE;               }(*BRKPAS*)
  {   argv(5,argString);               }(*BRKPAS*)
  {   rewrite(debug, argString);       }(*BRKPAS*)
  { end;                               }(*BRKPAS*)
    if argc = 6 then begin                  {UNIX}
      debugging := TRUE;                    {UNIX}
      rewrite(debug, argv[6]^, unbuffered); {UNIX}
    end;                                    {UNIX}
    init;
    ReadTable;
    ChangeProgram;
    DumpTable;
    if debugging then close(debug);
    close(outcref);  close(outprog);
end.
