program Disasm;
{ ------------------------------------------------------------------- }
{ Disasm v. 0.2.0  Copyright (c) 1999 M. Gedminas <mgedmin@delfi.lt>  }
{ This program is Free Software.  See file COPYING for details.       }
{ ------------------------------------------------------------------- }

{ $Id: disasm.pas,v 1.2 2002/10/20 17:50:30 mg Exp $ }

{ ------------------------------------------------------------------- }

procedure AbortError(const Msg: string);
begin
  WriteLn('Disasm: ', Msg);
  Halt(1);
end;

type
  str8 = string[8];

const
  HexChars: array[0..$F] of Char = '0123456789ABCDEF';

function HexByte(N: Byte): Str8;
begin
  HexByte[0] := Char(2);
  HexByte[1] := HexChars[(N shr  4)       ];
  HexByte[2] := HexChars[(N       ) and $F];
end;

function HexWord(N: Word): Str8;
begin
  HexWord[0] := Char(4);
  HexWord[1] := HexChars[(N shr 12)       ];
  HexWord[2] := HexChars[(N shr  8) and $F];
  HexWord[3] := HexChars[(N shr  4) and $F];
  HexWord[4] := HexChars[(N       ) and $F];
end;

function HexLong(N: Longint): Str8;
begin
  HexLong[0] := Char(8);
  HexLong[1] := HexChars[(N shr 28)       ];
  HexLong[2] := HexChars[(N shr 24) and $F];
  HexLong[3] := HexChars[(N shr 20) and $F];
  HexLong[4] := HexChars[(N shr 16) and $F];
  HexLong[5] := HexChars[(N shr 12) and $F];
  HexLong[6] := HexChars[(N shr  8) and $F];
  HexLong[7] := HexChars[(N shr  4) and $F];
  HexLong[8] := HexChars[(N       ) and $F];
end;

{ ------------------------------------------------------------------- }

type
  TOpcodeArg = (
    argNone,
    argRegMem8,
    argRegMem16,
    argReg8,
    argReg16,
    argSegReg,
    argRegAL, argRegBL, argRegCL, argRegDL,
    argRegAH, argRegBH, argRegCH, argRegDH,
    argRegAX, argRegBX, argRegCX, argRegDX,
    argRegSP, argRegBP, argRegSI, argRegDI,
    argRegCS, argRegDS, argRegSS, argRegES,
    argMem8,
    argMem16,
    argMem32,
    argMemOffs8,
    argMemOffs16,
    argNear,
    argFar,
    argShort,
    argImm8,
    argImm16,
    argSImm8,
    argConst1,
    argConst3
  );

  TOpcodeData = record
    Mnemonic: string[7];
    Arg1, Arg2: TOpcodeArg;
    Prefix: Boolean;
  end;

const
  ArgsThatNeedAddrModeByte: set of TOpcodeArg = [
    argRegMem8,
    argRegMem16,
    argReg8,
    argReg16,
    argSegReg,
    argMem8,
    argMem16,
    argMem32
  ];

  OpRegs: array[argRegAL..argRegES] of string[2] = (
    'al', 'bl', 'cl', 'dl',
    'ah', 'bh', 'ch', 'dh',
    'ax', 'bx', 'cx', 'dx',
    'sp', 'bp', 'si', 'di',
    'cs', 'ds', 'ss', 'es'
  );

  Addr: array[0..7] of string[5] = (
    'bx+si', 'bx+di', 'bp+si', 'bp+di',
    'si', 'di', 'bp', 'bx');

  Reg8: array[0..7] of string[2] = (
    'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh');

  Reg16: array[0..7] of string[2] = (
    'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di');

  SegReg: array[0..3] of string[2] = (
    'es', 'cs', 'ss', 'ds');

{$I opcodes.inc}

type
  PByteArr = ^TByteArr;
  TByteArr = array[0..$FFF7] of Byte;
  PInteger = ^Integer;

procedure DecodeInsn(Buf: PByteArr; Size: Word; Ofs: Word;
            var szPrefix, szMode, szOffset, szData, OpSize: Byte;
	    var ofOpcode, ofMode, ofOffset, ofData: Word;
	    var Mnemonic: str8;
            var SegPrefix: Integer; var Arg1, Arg2: TOpcodeArg);
begin
  szPrefix := 0; szMode := 0; szOffset := 0; szData := 0;
  
  SegPrefix := -1;
  ofOpcode := Ofs;
  while (ofOpcode < Size) and (Opcodes[Buf^[ofOpcode]].Prefix) do
    begin
      if (Buf^[ofOpcode] in [$26, $2E, $36, $3E]) then  { segXX }
	SegPrefix := (Buf^[ofOpcode] shr 3) and 3;
      Inc(ofOpcode);
    end;
  szPrefix := ofOpcode - Ofs;
  
  Arg1 := Opcodes[Buf^[ofOpcode]].Arg1;
  Arg2 := Opcodes[Buf^[ofOpcode]].Arg2;
  
  ofMode := ofOpcode + 1; ofOffset := ofMode;
  if (Arg1 in ArgsThatNeedAddrModeByte) or
     (Arg2 in ArgsThatNeedAddrModeByte) then
    begin
      szMode := 1;
      ofOffset := ofMode + 1;
      if (Buf^[ofMode] shr 6) = 1 then
        szOffset := 1	{ 8-bit offset }
      else if ((Buf^[ofMode] shr 6) = 2) or
              (((Buf^[ofMode] shr 6) = 0) and (Buf^[ofMode] and 7 = 6)) then
        szOffset := 2;	{ 16-bit offset }
    end
  else if (Arg1 = argShort) or (Arg2 = argShort) then
    szOffset := 1	{ 8-bit address }
  else if (Arg1 in [argNear, argMemOffs8, argMemOffs16]) or
          (Arg2 in [argNear, argMemOffs8, argMemOffs16]) then
    szOffset := 2	{ 16-bit address }
  else if (Arg1 = argFar) or (Arg2 = argFar) then
    szOffset := 4;	{ 32-bit address }
   
  ofData := ofOffset + szOffset;
  if (Arg1 in [argSImm8, argImm8]) or (Arg2 in [argSImm8, argImm8]) then
    szData := 1		{ 8-bit data }
  else if (Arg1 = argImm16) or (Arg2 = argImm16) then
    szData := 2;	{ 16-bit data }
    
  { Special cases }
  if (Buf^[ofOpcode] = $F6) and ((Buf^[ofMode] shr 3) and 7 = 0) then
    begin
      szData := 1;	{ test r/m8, imm8 }
      Arg2 := argImm8;
    end
  else
  if (Buf^[ofOpcode] = $F7) and ((Buf^[ofMode] shr 3) and 7 = 0) then
    begin
      szData := 2;	{ test r/m16, imm16 }
      Arg2 := argImm16;
    end;
  if (Buf^[ofOpcode] = $FF) and ((Buf^[ofMode] shr 3) and 7 in [3, 5]) then
    Arg1 := argMem32;	{ call/jmp dword ptr mem32 }
    
  Mnemonic := Opcodes[Buf^[ofOpcode]].Mnemonic;
  if Mnemonic = '<ArOp>' then
    Mnemonic := ArOp[(Buf^[ofMode] shr 3) and 7]
  else if Mnemonic = '<ShfOp>' then
    Mnemonic := ShfOp[(Buf^[ofMode] shr 3) and 7]
  else if Mnemonic = '<Grp1>' then
    Mnemonic := Grp1[(Buf^[ofMode] shr 3) and 7]
  else if Mnemonic = '<Grp2>' then
    Mnemonic := Grp2[(Buf^[ofMode] shr 3) and 7]
  else if Mnemonic = '<Grp3>' then
    Mnemonic := Grp3[(Buf^[ofMode] shr 3) and 7];
    
  OpSize := szPrefix + 1 + szMode + szOffset + szData;
end;

procedure DisasmFile(const FName, OName: string);
const
  { Flags }
  flCode  	= $01; { This location is code (start of insn) }
  flLabel 	= $02; { This location is referred to }
  flCodeEnd 	= $04; { This is a jmp/ret }
  flPartOfCode	= $08; { This is in the middle of instruction }
  flBrokenCode  = $10; { This is start of insn that shouldn't be disasmed }
var
  F: file;
  O: Text;
  Size: Longint;
  Buf, Flags: PByteArr;
  Ofs, I: Word;
  szPrefix, szMode, szOfs, szData, OpSize: Byte;
  ofOpcode, ofMode, ofOfs, ofData: Word;
  Mnemonic: str8; 
  SegPrefix: Integer; 
  SegPrefixStr: string[3];
  Arg1, Arg2: TOpcodeArg;
  PrevWasCode, PrevWasJump: Boolean;
  FinalLabel: Boolean;

  function CheckLabel(Addr: Word): Boolean;
  begin
    CheckLabel := (((Addr >= $100) 
                    and (Addr - $100 < Size) 
                    and (Flags^[Addr - $100] and (flLabel + flPartOfCode) = flLabel))
		   or ((Addr = $100 + Size) and FinalLabel));
  end;

  function AddrStr(Addr: Word): str8;
  begin
    if (SegPrefix < 0) and CheckLabel(Addr) then
      AddrStr := 'l' + HexWord(Addr)
    else
      AddrStr := '0' + HexWord(Addr) + 'h';
  end;

  procedure PrintArg(Arg: TOpcodeArg);
  begin
    case Arg of
      argMem8,
      argRegMem8,
      argMem16,
      argRegMem16,
      argMem32:
	begin
	  if Buf^[ofMode] shr 6 <> 3 then
	    if Arg in [argMem8, argRegMem8] then
	      Write(O, 'byte ptr ')
	    else if Arg = argMem32 then
	      Write(O, 'dword ptr ')
	    else
	      Write(O, 'word ptr ');
	  case Buf^[ofMode] shr 6 of
	    0:
	       if Buf^[ofMode] and 7 = 6 then
	         { Special case: [disp16] instead of [BP] }
		 begin
		   if (SegPrefix < 0) and not CheckLabel(PInteger(@Buf^[ofOfs])^) then
		     Write(O, 'ds:');
		   Write(O, SegPrefixStr, '[', AddrStr(PInteger(@Buf^[ofOfs])^),']');
		 end
	       else
	         Write(O, SegPrefixStr, '[', Addr[Buf^[ofMode] and 7], ']');
	    1:
	       if Shortint(Buf^[ofOfs]) >= 0 then
	         Write(O, SegPrefixStr, '[', Addr[Buf^[ofMode] and 7], '+0',
		    	       HexByte(Shortint(Buf^[ofOfs])), 'h]')
	       else
	         Write(O, SegPrefixStr, '[', Addr[Buf^[ofMode] and 7], '-0',
		    	       HexByte(-Shortint(Buf^[ofOfs])), 'h]');
	    2: Write(O, SegPrefixStr, '[', Addr[Buf^[ofMode] and 7], '+0',
			     HexWord(PInteger(@Buf^[ofOfs])^),'h]');
	    3: if Arg in [argMem8, argRegMem8] then
		 Write(O, Reg8[Buf^[ofMode] and 7])
	       else
		 Write(O, Reg16[Buf^[ofMode] and 7]);
	  end;
	end;
      argReg8:   Write(O, Reg8[(Buf^[ofMode] shr 3) and 7]);
      argReg16:  Write(O, Reg16[(Buf^[ofMode] shr 3) and 7]);
      argSegReg: Write(O, SegReg[(Buf^[ofMode] shr 3) and 3]);
      argRegAL, argRegBL, argRegCL, argRegDL,
      argRegAH, argRegBH, argRegCH, argRegDH,
      argRegAX, argRegBX, argRegCX, argRegDX,
      argRegSP, argRegBP, argRegSI, argRegDI,
      argRegCS, argRegDS, argRegSS, argRegES:
	Write(O, OpRegs[Arg]);
      argMemOffs8:   
        begin
	  if (SegPrefix < 0) and not CheckLabel(PInteger(@Buf^[ofOfs])^) then
	    Write(O, 'ds:');
	  Write(O, 'byte ptr ', SegPrefixStr, '[', AddrStr(PInteger(@Buf^[ofOfs])^), ']');
	end;
      argMemOffs16:
        begin
	  if (SegPrefix < 0) and not CheckLabel(PInteger(@Buf^[ofOfs])^) then
	    Write(O, 'ds:');
	  Write(O, 'word ptr ', SegPrefixStr, '[', AddrStr(PInteger(@Buf^[ofOfs])^), ']');
	end;
      argNear:   Write(O, 'near ptr l', HexWord($100+Ofs+OpSize+PInteger(@Buf^[ofOfs])^));
      argFar:    Write(O, 'far ptr ', HexWord(PInteger(@Buf^[ofOfs+2])^), ':', HexWord(PInteger(@Buf^[ofOfs])^));  { won't assemble }
      argShort:  Write(O, 'short l', HexWord($100+Ofs+OpSize+Shortint(Buf^[ofOfs])));
      argImm8:   Write(O, '0', HexByte(Buf^[ofData]), 'h');
      argSImm8:
                 if ShortInt(Buf^[ofData]) >= 0 then
		   Write(O, '0', HexByte(Buf^[ofData]), 'h')
		 else
		   Write(O, '-0', HexByte(-ShortInt(Buf^[ofData])), 'h');
      argImm16:  Write(O, '0', HexWord(PInteger(@Buf^[ofData])^), 'h');
      argConst1: Write(O, '1');
      argConst3: Write(O, '3');
    end;
  end;

  procedure Trace(Ofs: Word);
  var
    Dest: Word;
    OpSize: Byte;
    szPrefix, szMode, szOfs, szData: Byte;
    ofOpcode, ofMode, ofOfs, ofData: Word;
    Mnemonic: str8; SegPrefix: Integer; Arg1, Arg2: TOpcodeArg;
    I: Word;

    procedure BreakInsn(Dest: Word);
    begin
      while (Dest > 0) and (Flags^[Dest] and flCode = 0) do
        Dec(Dest);
      if Flags^[Dest] and flCode <> 0 then
        Flags^[Dest] := Flags^[Dest] and not (flCode + flCodeEnd) or flBrokenCode;
    end;

    procedure SetCodeLabel(Dest: Word);
    begin
      if (Dest < Size) then
	begin
	  if Flags^[Dest] and (flCode + flPartOfCode) = flPartOfCode then
	    BreakInsn(Dest); { Jump to middle of instruction }
	  Flags^[Dest] := Flags^[Dest] or flLabel;
	  Trace(Dest);
	end
      else
        if Dest = Size then
	  FinalLabel := True
	else
	  BreakInsn(Ofs); { No label defined -- can't disassemble }
    end;
    
    procedure SetDataLabel(Dest: Word);
    begin
      if (Dest < Size) then
	begin
	  if Flags^[Dest] and flPartOfCode = 0 then
	    { Be conservative: no code self-modifications }
	    Flags^[Dest] := Flags^[Dest] or flLabel;
	end
      else
        if Dest = Size then
	  FinalLabel := True;
    end;
    
  begin
    while (Ofs < Size) and (Flags^[Ofs] and flPartOfCode = 0) do
      begin
	DecodeInsn(Buf, Size, Ofs,
	           szPrefix, szMode, szOfs, szData, OpSize,
		   ofOpcode, ofMode, ofOfs, ofData,
		   Mnemonic, SegPrefix, Arg1, Arg2);
        
	if (Ofs + OpSize > Size) then { Not full opcode }
	  Break;

	if (Mnemonic = '') then 
	  begin { Invalid opcode }
	    Inc(Ofs, szPrefix+1);
	    Continue;
	  end;
	
	if ((Arg1 in [argMem8, argMem16, argMem32]) or
	    (Arg2 in [argMem8, argMem16, argMem32])) and
	   (Buf^[ofMode] shr 6 = 3) then
	  begin { Invalid arguments }
	    Inc(Ofs, szPrefix+1);
	    Continue;
	  end;
	
	Flags^[Ofs] := Flags^[Ofs] or (flCode + flPartOfCode);
	for I := Ofs + 1 to Ofs + OpSize - 1 do
	  begin
	    if Flags^[I] and flLabel <> 0 then
	      Flags^[Ofs] := Flags^[Ofs] and not flCode or flBrokenCode;
  	    Flags^[I] := Flags^[I] or flPartOfCode;
	  end;

	{ Check: jmp, call, jXX, ret }
	if (Buf^[ofOpcode] in [$70..$7F, $E0..$E3, $EB]) then { jXX, loop, jmp }
	  begin
	    Dest := ofOpcode + OpSize + Shortint(Buf^[ofOfs]);
	    SetCodeLabel(Dest);
	    if Buf^[ofOpcode] = $EB then { jmp short }
	      begin
		Flags^[Ofs] := Flags^[Ofs] or flCodeEnd;
	   	Break;
	      end;
	  end
	else if (Buf^[ofOpcode] in [$E8, $E9]) then { call near, jmp near }
	  begin
	    Dest := ofOpcode + OpSize + PInteger(@Buf^[ofOfs])^;
	    SetCodeLabel(Dest);
	    if Buf^[ofOpcode] = $E9 then { jmp near }
	      begin
		Flags^[Ofs] := Flags^[Ofs] or flCodeEnd;
	   	Break;
	      end;
	  end
	else if (Buf^[ofOpcode] in [$C2, $C3, $CA, $CB, $CF, $EA]) or { ret, iret, jmp far }
	        ((Buf^[ofOpcode] = $FF) and
	         ((Buf^[ofMode] shr 3) and 7 in [4, 5])) then { jmp [...] }
	  begin
	    if Buf^[ofOpcode] = $EA then { jmp far }
	      BreakInsn(Ofs) { don't disassemble }
	    else
    	      Flags^[Ofs] := Flags^[Ofs] or flCodeEnd;
	    Break;
	  end
	else if (Buf^[ofOpcode] = $9A) then { call far }
	  begin
	    { Don't trace: .COM files have no relocations }
	    BreakInsn(Ofs); { don't disassemble }
	  end;
	{ Ignored: FF 2/3 (call near/far [...]): too clever to trace for me }
	{ Also ignored: push addr + ret ; setting interrupt handlers, etc }
	if (SegPrefix < 0) and
	   ((Arg1 in [argMemOffs8, argMemOffs16]) or
	    (Arg2 in [argMemOffs8, argMemOffs16]) or 
	    (((Arg1 in ArgsThatNeedAddrModeByte) or
	      (Arg2 in ArgsThatNeedAddrModeByte)) and
	     (Buf^[ofMode] shr 6 = 0) and (Buf^[ofMode] and 7 = 6))) then
	  begin
	    SetDataLabel(PInteger(@Buf^[ofOfs])^ - $100);
	  end;
	
	Inc(Ofs, OpSize);
      end;
  end;

begin
  FileMode := 2; { Open for reading only }
  Assign(F, FName);
  {$I-}
  Reset(F, 1);
  {$I+}
  if IOResult <> 0 then
    AbortError('Cannot open '+FName);
  Size := FileSize(F);
  if Size > SizeOf(TByteArr) then
    AbortError('File is too big');
  GetMem(Buf, Size); GetMem(Flags, Size);
  BlockRead(F, Buf^, Size);
  FillChar(Flags^, Size, 0);
  Close(F);

  if (Size >= 2) and
     (((PChar(Buf)[0] = 'M') and (PChar(Buf)[1] = 'Z'))
      or ((PChar(Buf)[0] = 'Z') and (PChar(Buf)[1] = 'M'))) then
    AbortError('.EXE files are not supported');

  Assign(O, OName);
  {$I-}
  Rewrite(O);
  {$I+}
  if IOResult <> 0 then
    AbortError('Cannot create '+OName);
  WriteLn(O, #9'.model'#9'tiny');
  WriteLn(O, #9'.code');
  WriteLn(O, #9'org'#9'100h');
  WriteLn(O, 'start:');
  Trace(0);

  Ofs := 0; PrevWasCode := True; PrevWasJump := False;
  while (Ofs < Size) do
    begin
      if PrevWasJump or
         (PrevWasCode xor (Flags^[Ofs] and flCode <> 0)) then
	begin
	  WriteLn(O);
	  PrevWasCode := Flags^[Ofs] and flCode <> 0;
	end;
      if Flags^[Ofs] and flLabel <> 0 then
        Write(O, 'l', HexWord($100+Ofs), ':');

      if (Flags^[Ofs] and (flCode + flBrokenCode) <> 0) then
        begin
	  if (Flags^[Ofs] and flBrokenCode <> 0) then
	    Write(O, ';*');
	  Write(O, #9);
	  
	  DecodeInsn(Buf, Size, Ofs,
		     szPrefix, szMode, szOfs, szData, OpSize,
		     ofOpcode, ofMode, ofOfs, ofData,
		     Mnemonic, SegPrefix, Arg1, Arg2);
          if SegPrefix >= 0 then
	    SegPrefixStr := SegReg[SegPrefix]+':'
	  else
	    SegPrefixStr := '';
	 
	  for I := 1 to szPrefix do
	    begin
	      if (Buf^[Ofs+I-1] in [$26, $2e, $36, $3e]) then
	        if (SegPrefix = (Buf^[Ofs+I-1] shr 3) and 3) then
		  Continue;
	      Write(O, Opcodes[Buf^[Ofs+I-1]].Mnemonic, ' ');
	    end;
	
	  Write(O, Mnemonic);
	  if Arg1 <> argNone then
	    begin
	      Write(O, #9);
	      PrintArg(Arg1);
	      if Arg2 <> argNone then
		begin
		  Write(O, ', ');
		  PrintArg(Arg2);
		end;
	    end;
	end;

      if (Flags^[Ofs] and flCode = 0) then
	begin
	  if (Flags^[Ofs] and flBrokenCode <> 0) then
	    WriteLn(O);
	  OpSize := 1;
	  if (Flags^[Ofs] and flPartOfCode = 0) and
	     (Buf^[Ofs] in [32..126]) then
	    begin
	      Write(O, #9'db'#9'''');
	      I := 0;
	      while (I < 16) and (Ofs+I < Size) and
	            (Flags^[Ofs+I] and flPartOfCode = 0) and
		    (Buf^[Ofs+I] in [32..126]) do
		begin
		  if Buf^[Ofs+I] = Ord('''') then
   		    Write(O, '''''')
		  else
   		    Write(O, Chr(Buf^[Ofs+I]));
		  Inc(I);
		end;
	      OpSize := I;
	      Write(O, '''');
	    end
	  else
  	    Write(O, #9'db'#9'0', HexByte(Buf^[Ofs]), 'h');
	end;

      Write(O, #9#9#9'; ', HexWord($100+Ofs), ' ');
      for I := 0 to OpSize-1 do
        Write(O, ' ', HexByte(Buf^[Ofs+I]));
      WriteLn(O);
      PrevWasJump := Flags^[Ofs] and flCodeEnd <> 0;
      Inc(Ofs, OpSize);
    end;
  { Assert(Ofs = Size); }
  if FinalLabel then
    WriteLn(O, 'l', HexWord($100+Size), ':');
  WriteLn(O, 'end'#9'start');

  Close(O);

  FreeMem(Buf, Size);
end;

{ ------------------------------------------------------------------- }

begin
  if (ParamCount = 0) or (ParamStr(1) = '-h') or
     (ParamStr(1) = '--help') then
    begin
      WriteLn('Disasm v. 0.2.0.  Copyright (c) 1999 Marius Gedminas <mgedmin@delfi.lt>');
      WriteLn('This program is Free Software.  See file COPYING for details.');
      WriteLn('Usage: disasm <in-file> <out-file>');
      WriteLn('Disasm accepts only .COM files for 8086/8088 CPUs');
      Halt;
    end;
  DisasmFile(ParamStr(1), ParamStr(2));
end.
