/* $Id: kernfix.c,v 1.1.2.1 2002/10/21 00:11:39 root Exp $ */

/* Kernel fixup to allow using NPX registers in FSDs.
 *
 * Background: the kernel traps with MMX-based drivers if a user-mode program:
 *
 * 1. Executes FP code (including the LIBCS.DLL setup routines),
 *    AND
 * 2. Operates with files on RAMFS.
 *
 * The trap occurs during program termination in SaveNPX (14.091b_W4):
 *
 * (ebx is zero)
 * 0178:fff19dbe 8b9b38020000   mov       ebx,dword ptr [ebx+00000238]
 * 0178:fff19dc4 66833dccf6dbff02 cmp       word ptr [ffdbf6cc],+02
 * 0178:fff19dcc 7505           jnz       fff19dd3
 * 0178:fff19dce 0fae03         fxsave    dword ptr [ebx]
 * 0178:fff19dd1 eb02           jmp       fff19dd5
 * 0178:fff19dd3 dd33           fsave     byte ptr [ebx]
 * 0178:fff19dd5 32c0           xor       al,al
 * 0178:fff19dd7 e6f0           out       f0,al
 * 0178:fff19dd9 66c70562addbff0000 mov       word ptr [NPX_Owner (ffdbad62)],0000
 * 0178:fff19de2 07             pop     es
 * 0178:fff19de3 5e             pop       esi
 * 0178:fff19de4 59             pop       ecx
 *
 * Seems like the kernel is trying to save the NPX context for a thread that is
 * being cleaned up. And yes, the FSD _does_ perform the necessary fsave/frstor
 * around the memory transfers.
 *
 * Mangling with CR0 TS/EM flags would slow down the system to a freeze, so not
 * letting the kernel to know about NPX transactions in the FSD is a wrong way.
 *
 * The correct workaround was to cut in the kernel code above the failing
 * instruction:
 *
 * 0178:fff19dbb 8B1C98         mov       ebx,[eax][ebx]*4
 * 0178:fff19dbe 8B9B38020000   mov       ebx,[ebx][000000238]
 *
 * And replace it with a far call to our routine (to fit in 9 bytes):
 *
 * 0178:fff19dbb 9Axxxxxxxxxxxx call      far32 ptr _savenpx_override
 * 0178:fff19dbe 7215           jc        fff19dd9
 *
 * See the ASM file for the routine body.
 *
 * AAB 15/10/2002 */

#include <stddef.h>

#include "std32.h"

#include "ldrtypes.h"
#include "ldrmte.h"

#include "patchram.h"

/* Imported stuff */

struct ldrmte_s * locate_krnl_mte();
extern unsigned long npx_tcb_base;
void savenpx_override();
unsigned short get_cs();

/* SaveNPX */

struct locator os2krnl_savenpx[]=
{
 {0, 1, 0x50},                          /* Entry */
 {1, 1, 0x51},
 {1, 1, 0x56},
 {1, 1, 0x06},
 {1, 1, 0x0F},
 {1, 1, 0x06},
 {4, 12, 0x0F},                         /* movzx ebx, bx */
 {1, 1, 0xB7},
 {1, 1, 0xDB},
 {1, 16, 0xA1},                         /* mov eax, ... _papTCBSlots */
 {3, 16, 0x8B},                         /* mov ebx, dword ptr [eax+ebx*4] */
 {1, 1, 0x1C},
 {1, 1, 0x98},
 {1, 1, 0x8B},                          /* mov ebx,                     */
 {1, 1, 0x9B},                          /*          dword ptr [ebx+...] */
 /* 6 bytes (incl. previous 2 bytes) to cut in here! */
 {3, 1, 0x00},                          /* The structures are unlikely */
 {1, 1, 0x00},                          /* to exceed 65536 bytes! */
 /* Cut-in area ends! Next goes a Katmai branch which is kernel specific
    (older kernels don't care about SSE), and some gap should be accounted
    for. */
 {3, 127, 0x66},                        /* Landing area */
 {1, 1, 0xC7},                          /* Store immediate value: */
 {6, 1, 0x00},                          /* 00 */
 {1, 1, 0x00},                          /* 00 */
 {1, 32, 0x07},                         /* Exit sequence */
 {1, 1, 0x5E},
 {1, 1, 0x59},
 {1, 1, 0x58},
 {-1, -1, 0} 
};

/* Patch area */

struct locator os2krnl_npx_takeoff[]=
{
 {0, 1, 0x8B},                          /* mov ebx, dword ptr [eax+ebx*4] */
 {1, 1, 0x1C},
 {1, 1, 0x98},
 {1, 1, 0x8B},                          /* mov ebx,                     */
 {1, 1, 0x9B},                          /*          dword ptr [ebx+...] */
 /* 6 bytes (incl. previous 2 bytes) to cut in here! */
 {3, 1, 0x00},                          /* The structures is unlikely */
 {1, 1, 0x00},                          /* to exceed 65536 bytes! */
 {-1, -1, 0} 
};

/* Landing area */

struct locator os2krnl_npx_landing[]=
{
 {0, 1, 0x66},                          /* 386 */
 {1, 1, 0xC7},                          /* Store immediate value: */
 {6, 1, 0x00},                          /* 00 */
 {1, 1, 0x00},                          /* 00 */
 {1, 32, 0x07},                         /* Exit sequence */
 {1, 1, 0x5E},
 {1, 1, 0x59},
 {1, 1, 0x58},
 {-1, -1, 0} 
};

/* Installs the NPX hook */

int _far fix_kernel()
{
 struct ldrote_s *objtab;
 struct ldrmte_s *pmte;
 unsigned int i, j;
 struct area a;
 long o, ot, ol;
 int rc=1;
 int npx_hook_installed=0;

 pmte=locate_krnl_mte();
 if(pmte==NULL)
  return(1);
 objtab=(struct ldrote_s *)pmte->mte_swapmte->smte_objtab;
 if(objtab==NULL)
  return(1);
 i=pmte->mte_swapmte->smte_objcnt-1;
 a.first=(char *)objtab[i].ote_base;
 a.len=objtab[i].ote_size;
 /* NPX hook */
 if(!npx_hook_installed)
 {
  if((o=locate(os2krnl_savenpx, (struct area *)SSToDS(&a), 0))!=-1&&
     (ot=locate(os2krnl_npx_takeoff, (struct area *)SSToDS(&a), o))!=-1&&
     (ol=locate(os2krnl_npx_landing, (struct area *)SSToDS(&a), ot))!=-1&&
     (ol-ot)<137)
  {
   npx_tcb_base=*(unsigned long *)&a.first[ot+5];
   /* Compose the code to drive away from IBM's procedure */
   a.pos=ot;
   ac(0x9A);                          /* call far ... */
   ad((unsigned long)savenpx_override);
   aw(get_cs());
   ac(0x72);                          /* jc */
   ac((unsigned char)((ol-ot)-9));
   npx_hook_installed=1;
   a.pos=o;
   rc=0;
  }
 }
 /* Report if the patch found its way into the kernel. */
 return(rc);
}
