;*DDK*************************************************************************/
;
; COPYRIGHT    Copyright (C) 1995 IBM Corporation
;
;    The following IBM OS/2 WARP source code is provided to you solely for
;    the purpose of assisting you in your development of OS/2 WARP device
;    drivers. You may use this code in accordance with the IBM License
;    Agreement provided in the IBM Device Driver Source Kit for OS/2. This
;    Copyright statement may not be removed.;
;*****************************************************************************/
;***********************************************************************
;
;   Module          = HWACCESS.ASM
;
;   Description     = Functions to write to the xga hardware
;
;
;***********************************************************************

INCL_GPIBITMAPS EQU     1
include os2.inc
include eddinclt.inc

include eddhcone.inc
include eddhmacr.inc
include eddhtype.inc
include eddftype.inc
include hwaccess.inc
ifdef   _8514
include 8514.inc
include cursor.inc
ifdef   BPP24
include ffmacros.inc
endif
endif

?DF     equ     1       ; don't define _TEXT segment
include cmacros.inc

ifndef  _8514
extrn   _ShadowXGARegs          :dword
extrn   _pRealXGARegs           :dword
else
ifdef   BPP24
extrn   _pVram                  :dword
endif   ;BPP24
extrn   _DVRAM                  :proc
extrn   _p8514Regs              :dword
extrn   _Shadow8514Regs         :dword
extrn   _Screen8514VisWidth     :dword
extrn   _DDT                    :byte
endif
extrn   _pPhunkPhys             :dword
extrn   _pPhunkVirt             :dword
extrn   _softDrawInUse          :word
extrn   _pbHWPollRegister       :dword
extrn   mixTrashSourceAllDD     : FAR

pbHWPollRegister  equ _pbHWPollRegister
ifndef _8514
ShadowXGARegs     equ _ShadowXGARegs
pRealXGARegs      equ _pRealXGARegs
else
DVRAM             equ _DVRAM
Shadow8514Regs    equ _Shadow8514Regs
p8514Regs         equ _p8514Regs
endif
pPhunkPhys        equ _pPhunkPhys
pPhunkVirt        equ _pPhunkVirt
softDrawInUse     equ _softDrawInUse

_DATA           segment dword use32 public 'DATA'
_DATA           ends

_TEXT           segment dword use32 public 'CODE'
                assume  cs:FLAT, ds:FLAT, es:FLAT


; UpdateShadowRegisters copies readable xga memory mapped registers into
; the corresponding shadow registers. The readable registers are
;      - Bresenham Error Term
;      - Source Map X and Y
;      - Destination Map X and Y
;      - Pattern Map X and Y

        align   4
cProc   UpdateShadowRegisters, <PUBLIC>, <esi,edi>

cBegin

        ; If software drawing is in use then exit without doing anything
        cmp     byte ptr softDrawInUse, TRUE
        jz      short usr_exit

        ifndef   _8514
        pushxga                 ; save the segment register used for XGA
        movxga  pRealXGARegs

        ; wait until the hardware is free
        waitshort

        memregread      ax, bres_error
        mov             ShadowXGARegs.bres_error, ax
        memregread      eax, src_map
        mov             ShadowXGARegs.src_map, eax
        memregread      eax, dest_map
        mov             ShadowXGARegs.dest_map, eax
        memregread      eax, patt_map
        mov             ShadowXGARegs.patt_map, eax

        popxga                  ; restore the segment register used for XGA

        else
        lea     edi, Shadow8514Regs

        WaitQ   1

        mov     ax, word ptr [edi].Err_Term
        outwQ   ERR_TERM,ax

        endif

usr_exit:

cEnd

ifndef _8514

; TransferShadowRegisters copies fields from the xga shadow registers to
; the xga hardware (memory mapped) registers. The subset of registers to
; be copied is specified by the ulFlags parameters as follows.
; The TSR_LINES flag indicates to transfer the following registers:
;     - Bresenham Error Term
;     - Bresenham K1
;     - Bresenham K2
;     - Direction Steps
;
; The TSR_COLOUR_MIX flag indicates to transfer the following registers:
;     - Fgd Mix
;     - Bgd Mix
;     - Foreground Colour
;     - Background Colour
;     - Colour Compare Condition
;     - Colour Compare Colour
;
; The TSR_COORDINATES flag indicates to transfer the following registers:
;     - Operation Dimension 1
;     - Operation Dimension 2
;     - Source Map X Address
;     - Source Map Y Address
;     - Destination Map X Address
;     - Destination Map Y Address
;     - Pattern Map X Address
;     - Pattern Map Y Address
;
; The TSR_MAP_A, TSR_MAP_B, and TSR_MAP_C flags indicate to transfer the pixel
; map specific information for the three maps A, B, and C respectively.
;
; These are:
;     - Pixel Map Base Pointer
;     - Pixel Map Height
;     - Pixel Map Width
;     - Pixel Map Format
;
; The TSR_PIXELOP flag indicates that the pixel operation register is to be
; copied.

        align   4
cProc   TransferShadowRegisters, <PUBLIC>, <edi, esi>
        parmD   ulFlags

cBegin
        ; If software drawing is in use then exit without doing anything
        cmp     byte ptr softDrawInUse, TRUE
        jz      tsr_exit

        pushxga                 ; save the segment register used for XGA

        ; get a pointer to the shadow registers.
        lea     edi, ShadowXGARegs

        ; get a pointer to the real registers.
        movxga  pRealXGARegs

        ; wait until the hardware is free
        waitshort

        ; check if we're updating pixel map A registers
        test    ulFlags, TSR_MAP_A
        jz      short tsr_no_map_a

        ; select pixel map A, get a pointer to the map A shadow and
        ; transfer the data to the hardware
        memregwrite      pi_map_index_A, SEL_PIX_MAP_A

        mov     eax, [edi].sw_pi_map_base_ptr_A
        memregwrite      pi_map_base_ptr_A, eax

        mov     ax, [edi].sw_pi_map_width_A
        memregwrite      pi_map_width_A, ax

        mov     ax, [edi].sw_pi_map_height_A
        memregwrite      pi_map_height_A, ax

        mov     al, [edi].sw_pi_map_format_A
        memregwrite      pi_map_format_A, al


tsr_no_map_a:
        ; check if we're updating pixel map B registers
        test    ulFlags, TSR_MAP_B
        jz      short tsr_no_map_b

        ; select pixel map B, get a pointer to the map B shadow and
        ; transfer the data to the hardware
        memregwrite      pi_map_index_B, SEL_PIX_MAP_B

        mov     eax, [edi].sw_pi_map_base_ptr_B
        memregwrite      pi_map_base_ptr_B, eax

        mov     ax, [edi].sw_pi_map_width_B
        memregwrite      pi_map_width_B, ax

        mov     ax, [edi].sw_pi_map_height_B
        memregwrite      pi_map_height_B, ax

        mov     al, [edi].sw_pi_map_format_B
        memregwrite      pi_map_format_B, al

tsr_no_map_b:
        ; check if we're updating pixel map C registers
        test    ulFlags, TSR_MAP_C
        jz      short tsr_no_map_c

        ; select pixel map C, get a pointer to the map C shadow and
        ; transfer the data to the hardware
        memregwrite      pi_map_index_C, SEL_PIX_MAP_C

        mov     eax, [edi].sw_pi_map_base_ptr_C
        memregwrite      pi_map_base_ptr_C, eax

        mov     ax, [edi].sw_pi_map_width_C
        memregwrite      pi_map_width_C, ax

        mov     ax, [edi].sw_pi_map_height_C
        memregwrite      pi_map_height_C, ax

        mov     al, [edi].sw_pi_map_format_C
        memregwrite      pi_map_format_C, al

tsr_no_map_c:
        ; check if line registers are to be transferred
        test    ulFlags, TSR_LINES
        jz      short tsr_no_lines

        ; copy the lines related registers from the shadow to the hardware
        mov     ax, word ptr [edi].bres_error
        memregwrite      bres_error, ax
        mov     ax, [edi].bres_k1
        memregwrite      bres_k1, ax
        mov     ax, [edi].bres_k2
        memregwrite      bres_k2, ax
        mov     eax, [edi].dir_steps
        memregwrite      dir_steps, eax

tsr_no_lines:
        ; check if the colour and mix registers are to be transferred
        test    ulFlags, TSR_COLOUR_MIX
        jz      short tsr_no_colour_mix

        ; copy the colour and mix registers
        mov     al, [edi].fg_mix
        memregwrite      fg_mix, al
        mov     al, [edi].bg_mix
        memregwrite      bg_mix, al
        mov     ax, [edi].fg_colour
        memregwrite      fg_colour, ax
        mov     ax, [edi].bg_colour
        memregwrite      bg_colour, ax
        mov     al, [edi].colour_comp_fun
IFDEF FIREWALLS
        ; The COLCOMP_SRC_NOT_EQUAL is not supported by the XGA hardware.
        ; This value is only recognised by the software drawing code (MESS)
        ; in order to implement the BM_SRCTRANSPARENT background mix.
        ; For further details see explanation in eddnbblt.c
        cmp     al, COLCOMP_SRC_NOT_EQUAL
        jne     short @F
        int     3
@@:
ENDIF
        memregwrite      colour_comp_fun, al
        mov    ax, [edi].colour_comp_colour
        memregwrite      colour_comp_colour, ax

tsr_no_colour_mix:
        ; check if the pixmap coordinates are to be transferred
        test    ulFlags, TSR_COORDINATES
        jz      short tsr_no_coordinates

        ; copy operating dimension registers...
        mov     ax, [edi].dim1
        memregwrite      dim1, ax
        mov     ax, [edi].dim2
        memregwrite      dim2, ax

        ; source map coordinates...
        mov     eax, [edi].src_map
        memregwrite      src_map, eax

        ; destination map coordinates...
        mov     eax, [edi].dest_map
        memregwrite      dest_map, eax

        ; and pattern map coordinates
        mov     eax, [edi].patt_map
        memregwrite      patt_map, eax

tsr_no_coordinates:
        ; check if we need to start off a pixel operation
        test    ulFlags, TSR_PIXELOP
        jz      short tsr_no_pixelop

        ; do the pixel op
        mov     eax, [edi].pixel_op
        memregwrite      pixel_op, eax

tsr_no_pixelop:

        popxga                  ; restore the segment register used for XGA

tsr_exit:

cEnd

else ;_8514
;*********************************************************************
; TransferShadowRegisters copies fields from the 8514 shadow registers to
; the 8514 hardware (memory mapped) registers. The subset of registers to
; be copied is specified by the ulFlags parameters as follows:
;
; The TSR_LINES flag indicates to transfer the following registers:
;     - Bresenham Error Term
;     - Bresenham K1
;     - Bresenham K2
;     - Bresenham LX
;
; The TSR_COLOUR_MIX flag indicates to transfer the following registers:
;     - Foreground Mix
;     - Background Mix
;     - Foreground Color
;     - Background Color
;     - Pixel Mode
;     - Color Compare Color
;     - Pattern0 (Low Nibble)
;     - Pattern1 (High Nibble)
;
; The TSR_COORDINATES flag indicates to transfer the following registers:
;     - Operation Dimension 1
;     - Operation Dimension 2
;     - Source X Address
;     - Source Y Address
;     - Destination X Address
;     - Destination Y Address
;
; The TSR_CLIPPING flag indicates to transfer the clipping registers:
;     - Top Scissors    - YMIN
;     - Left Scissors   - XMIN
;     - Bottom Scissors - YMAX
;     - Right Scissors  - XMAX
;
; The TSR_PIXELOP flag indicates that the command register is to be copied.
;     - Command
;
;*********************************************************************
        align   4
cProc   TransferShadowRegisters, <PUBLIC>, <edi, esi>
        parmD   ulFlags

cBegin
        ;*************************************************************
        ; If software drawing is in use then exit without doing anything
        ;*************************************************************
        cmp     byte ptr softDrawInUse, TRUE
        jz      tsr_exit

        WaitQIdle

        ;*************************************************************
        ; Get a pointer to the shadow registers.
        ;*************************************************************
        lea     edi, Shadow8514Regs

        ;*************************************************************
        ; Check if line registers are to be transferred, if not skip
        ; ahead to the next check...
        ;*************************************************************
        test    ulFlags, TSR_LINES
        jz      short tsr_no_lines

        ;*************************************************************
        ; copy the lines related registers from the shadow to the
        ; hardware.
        ;*************************************************************
        WaitQ   5

        mov     ax, word ptr [edi].Err_Term
        outwQ   ERR_TERM,ax

        mov     ax, [edi].sr_K1
        outwQ   K1,ax

        mov     ax, [edi].sr_K2
        outwQ   K2,ax

        mov     ax, [edi].sr_LX
        outwQ   LX,ax

tsr_no_lines:

        ;*************************************************************
        ; Check if the colour and mix registers are to be transferred,
        ; if not skip ahead to the next check....
        ;*************************************************************
        test    ulFlags, TSR_COLOUR_MIX
        jz      short tsr_no_colour_mix

        ;*************************************************************
        ; copy the colour and mix registers
        ;*************************************************************
        WaitQ   8

        mov     eax, [edi].Function_0
        outwQ   FUNCTION_0,ax

        mov     eax, [edi].Function_1
        outwQ   FUNCTION_1,ax

        ; @DMS 24bpp this is ineffective for our 24bit driver. !!!
        mov     ax, [edi].Color_0
        outwQ   COLOR_0,ax

        mov     ax, [edi].Color_1
        outwQ   COLOR_1,ax

        ;@DMS look at color compare more carefully !!!
        mov     ax, [edi].Color_Comp

IFDEF FIREWALLS
        ;*************************************************************
        ; The COLCOMP_SRC_NOT_EQUAL is not supported by the XGA hardware.
        ; This value is only recognised by the software drawing code (MESS)
        ; in order to implement the BM_SRCTRANSPARENT background mix.
        ; For further details see explanation in eddnbblt.c
        ;*************************************************************
        cmp     al, COLCOMP_SRC_NOT_EQUAL
        jne     short @F
        int     3
@@:
ENDIF
        outwQ   COLOR_COMP,ax

        mov     ax, [edi].Pattern_0
        or      ax,08000h             ;Index 8 - Multifunction Ctrl Reg.
        outwQ   PATTERN_0,ax

        mov     ax, [edi].Pattern_1
        or      ax,09000h             ;Index 9 - Multifunction Ctrl Reg.
        outwQ   PATTERN_1,ax

        mov     ax, [edi].Mode
        or      ax,0A000h             ;Index A - Multifunction Ctrl Reg.
        outwQ   MODE,ax

tsr_no_colour_mix:

        ;*************************************************************
        ; Check if the pixmap coordinates are to be transferred, if
        ; not, skip ahead to the next check....
        ;*************************************************************
        test    ulFlags, TSR_COORDINATES
        jz      short tsr_no_coordinates

        ;*************************************************************
        ; copy operating dimension registers...
        ;*************************************************************
        WaitQ   6

        mov     ax, [edi].sr_LX
        outwQ   LX,ax

        mov     ax, [edi].sr_LY       ;Index 0 - Multifunction Ctrl Reg.
        outwQ   LY,ax

        ;*************************************************************
        ; source map coordinates...
        ;*************************************************************
        mov     ax, [edi].sr_Y0
        outwQ   Y0,ax

        mov     ax, [edi].sr_Y1
        outwQ   Y1,ax

        mov     ax, [edi].sr_X0
        outwQ   X0,ax

        mov     ax, [edi].sr_X1
        outwQ   X1,ax

tsr_no_coordinates:

        ;*************************************************************
        ; Check if we need to update the scissors, if not, skip ahead
        ; to the next check...
        ;*************************************************************
        test    ulFlags, TSR_CLIPPING
        jz      short tsr_no_clipping

        ;*************************************************************
        ; copy clipping coordinates to hardware
        ;*************************************************************
        WaitQ   4

        mov     ax,[edi].YMin
        or      ax,01000h             ;Index 1 - Multifunction Ctrl Reg.
        outwQ   YMIN,ax

        mov     ax,[edi].XMin
        or      ax,02000h             ;Index 2 - Multifunction Ctrl Reg.
        outwQ   XMIN,ax

        mov     ax,[edi].YMax
        or      ax,03000h             ;Index 3 - Multifunction Ctrl Reg.
        outwQ   YMAX,ax

        mov     ax,[edi].XMax
        or      ax,04000h             ;Index 4 - Multifunction Ctrl Reg.
        outwQ   XMAX,ax

tsr_no_clipping:
        ;*************************************************************
        ; Check if we need to start off a pixel operation, if not,
        ; we're done.
        ;*************************************************************
        test    ulFlags, TSR_PIXELOP
        jz      short tsr_exit

        ;*************************************************************
        ; Do the pixel op.  Remember, this action usually kicks off
        ; some kind of drawing operation, so make sure all of the
        ; data registers needed for this op are set up first.
        ; 
        ; @RCW - We're not sure that we want to write to the command
        ;        register via this function.  For now, however, we'll
        ;        put up an INT 3 to make us look at this if we try.
        ;*************************************************************
        ifdef   FIREWALLS
        int     3
        ;WaitQ   1
        ;mov     eax, [edi].Cmd_Flags
        ;outwQ   CMD_FLAGS,ax
        endif


tsr_exit:

cEnd
endif;_8514


ifndef  _8514
; CopyMemoryToVRAM copies  system memory to VRAM.
;
; pSystemMemory is the virtual address of the source of the copy.
; pVRAMMemory is the physical address of the destination of the copy.
; ulWidth is the width of the block of memory to copy in pels.
; ulHeight is the height of the block of memory to copy in pels.
; ulBPPFormat is the bits per pel format of the memory to be copied.
;
; Notes:
;     - ulHeight and ulWidth are zero based (ie 0 corresponds to 1 pel).
;     - The format of the source is motorola, and the VRAM is intel.

; DO NOT be tempted to copy 1bpp data as say 8bpp data because the
; motorola to intel conversion means that this is not what we want.

        align   4
cProc   CopyMemoryToVRAM, <PUBLIC>, <esi, edi>
        parmD   pSystemMemory
        parmD   pVRAMMemory
        parmD   ulWidth
        parmD   ulHeight
        parmD   ulBPPFormat

cBegin
        pushxga                 ; save the segment register used for XGA

        ; First copy the system memory to the PHUNK.

        ; Get the parameters we are about to use into registers.
        mov     eax, ulWidth
        mov     edx, ulHeight
        mov     ecx, ulBPPFormat

        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
        and     ecx, BPP_MASK

        ; Adjust the width so that it is the number of bytes wide minus one
        ; instead of the number of pels wide minus one.

        ; If 8 bits per pel then do nothing.
        cmp     ecx, EIGHT_BPP
        jz      short got_width

        ; If 1 bit per pel then divide by 8.
        cmp     ecx, ONE_BPP
        jnz     short @F
        shr     eax, 3
        jmp     short got_width

        ; If 4 bits per pel then divide by 2.
@@:     cmp     ecx, FOUR_BPP
        jnz     short @F
        shr     eax, 1
        jmp     short got_width

        ; At 16 bits per pel then multiply by 2 and add 1.
@@:     shl     eax, 1
        inc     eax

got_width:

        ; Adjust width and height because we currently have 0 -> n-1
        ; but we actually need 1 -> n
        inc     eax
        inc     edx

        ; Calculate number total size of the bitmap in bytes.
        ; (multiply the width and the height)
        mul     edx

        ; Since the hardware may be using the phunk data, we must not
        ; write to it until the hardware is free

        ; Get a pointer to the hardware registers.
        movxga  pRealXGARegs

        ; Wait for the hardware to be free before we start writing to
        ; the registers.
        waitshort

        push    esi             ; save the HW register pointer

        ; Set up edi to point to the PHUNK and esi to point to the source
        ; of the copy.
        mov     edi, pPhunkVirt
        mov     esi, pSystemMemory

        ; Transfer the data: do it in 32-bit blocks then finish off any odd
        ; bytes.
        mov     ecx, eax
        shr     ecx, 2
        rep     movs dword ptr [edi], dword ptr [esi]
        mov     cl, al
        and     cl, 3
        rep     movs byte ptr [edi], byte ptr [esi]

        ; The source is now in the PHUNK. Next we must set up a blt
        ; using the hardware to copy from the PHUNK to the VRAM.

        pop     esi             ; restore the HW register pointer

        ; Set up map A to use as the source (ie. the PHUNK).
        memregwrite     pi_map_index_A, SEL_PIX_MAP_A

        mov             eax, pPhunkPhys
        memregwrite     pi_map_base_ptr_A, eax

        ; get width and height into registers
        mov             ecx, ulWidth
        mov             edx, ulHeight
        memregwrite     pi_map_width_A,  cx
        memregwrite     pi_map_height_A, dx

        ; Write mapA format (Motorola) and bits per pel
        mov             al, byte ptr ulBPPFormat
        or              al, MOTOROLA
        memregwrite     pi_map_format_A, al

        ; Set up map B to use as the destination (ie. VRAM).
        memregwrite     pi_map_index_B, SEL_PIX_MAP_B

        mov             eax, pVRAMMemory
        memregwrite     pi_map_base_ptr_B, eax

        ; use the same width and height as the source
        memregwrite     pi_map_width_B,  cx
        memregwrite     pi_map_height_B, dx

        ; Write mapB format (Intel) and bits per pel
        mov             al, byte ptr ulBPPFormat
        and             al, not MOTOROLA
        memregwrite     pi_map_format_B, al

        ; Write the blt dimensions
        memregwrite     dim1, cx        ; width
        memregwrite     dim2, dx        ; height

        ; Write the blt origins
        xor             eax,eax
        memregwrite     src_map, eax
        memregwrite     dest_map, eax

        ; Set the Foreground and Background mixes to 'COPY SOURCE'

        memregwrite     fg_mix, HWMIX_SOURCE

        ; Now write the pixel op to kick off the blt
        mov             eax,  BACK_SRC_SRC_PIX_MAP or \
                              FORE_SRC_SRC_PIX_MAP or \
                              STEP_PXBLT or           \
                              SRC_PIX_MAP_A or        \
                              DST_PIX_MAP_B or        \
                              PAT_PIX_MAP_FORE or     \
                              MASK_PIX_MAP_OFF or     \
                              DRAW_MODE_DONTCARE or   \
                              DIR_OCTANT_LRTB
        memregwrite     pixel_op, eax

        popxga                  ; restore the segment register used for XGA
cEnd

else ;_8514
.386    ;put us in 386 mode
;**************************************************************************
;*
;* FUNCTION NAME = CopyMemoryToVRAM
;*
;* DESCRIPTION   =
;*
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************

        align   4
cProc   CopyMemoryToVRAM, <PUBLIC>, <esi, edi, ebx>
        parmD   pSystemMemory
        parmD   pVRAMMemory
        parmD   ulWidth
        parmD   ulHeight
        parmD   ulBPPFormat

        localW  x_cache
        localW  y_cache
cBegin

        WaitQIdle
        WaitQ   3
        outwQ   YMIN,(YMIN_2DECODE+0)
        ifndef S3
        outwQ   YMAX,(YMAX_2DECODE+SCR_8514_HEIGHT-1)   ; set max y extent
        else
        outwQ   YMAX,(YMAX_2DECODE+SCR_S3_HEIGHT-1)     ; set max y extent
        outwQ   XMAX,(XMAX_2DECODE+SCR_S3_WIDTH-1)      ; set max x extent
        endif

        cld                                              ; work up in address
        ;Derive X0 and Y0 from pVRAM
        mov     eax,pVRAMMemory
        and     eax,0FFFFFFFh
        mov     y_cache,ax
        ror     eax,16
        mov     x_cache,ax

        mov     eax,ulBPPFormat
        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
        and     eax, BPP_MASK
        cmp     eax,ONE_BPP
        jz      copy_1BPP
        ifdef   BPP24
        cmp     eax,TWENTYFOUR_BPP
        jz      copy_24bpp
        endif

        WaitQ   8
        outwQ   MODE,(MODE_2DECODE+MD_PS_ONES)   ; 
        outwQ   FUNCTION_1,(FUNC_2OP_VAR+FUNC_S)
        outwQ   FUNCTION_0,(FUNC_2OP_VAR+FUNC_S)
        outwQ   WRITE_ENABLE,WRITE_ALL_PLANES
        outwQ   LX,ulWidth                       ; set zero based width
        outwQ   LY,ulHeight                      ; set zero based height
ifdef ENDIVE
        inc     ax
        movzx   ebx,ax
else
        mov     bx,ax
        inc     bx                               ; make one based height
endif
        outwQ   X0,x_cache                       ; set the destination x
        outwQ   Y0,y_cache                       ; set the destination y

        WaitQ   1
        outwQ   CMD_FLAGS,CMDPATCOLOR            ; do entire bitmap

; The 8514/A is now patiently waiting for us to download 8bpp Pixel data.
; We do this for each byte in each row of the source bitmap.

        mov     esi, pSystemMemory
        or      esi,esi
        jz      copy_error

ifdef  S3
        mov     eax,ulBPPFormat
        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
        and     eax, BPP_MASK
        cmp     eax,EIGHT_BPP
        jz      copy_8bpp
ifdef   BPP24
        jmp      copy_16bpp

;;
;; SOURCE IS COLOR - 24BPP
;;
copy_24bpp:

        WaitQ   8
        outwQ   MODE,(MODE_2DECODE+MD_PS_ONES)   ; 
        outwQ   FUNCTION_1,(FUNC_2OP_VAR+FUNC_S)
        outwQ   FUNCTION_0,(FUNC_2OP_VAR+FUNC_S)
        outwQ   WRITE_ENABLE,WRITE_ALL_PLANES
        mov     ax,x_cache              ; one byte / pel
        mov     dx,3
        mul     dx                      ; 3 bytes / pel
        outwQ   X0,ax                   ; set the destination x
        outwQ   Y0,y_cache              ; set the destination y
        outwQ   LY,ulHeight             ; set zero based height
ifdef ENDIVE
        inc     ax
        movzx   ebx,ax
else
        mov     bx,ax
        inc     bx                      ; make one based height
endif
        mov     ax,ulWidth              ; one byte / pel
        inc     ax
        mov     dx,3
        mul     dx                      ; now eax 3 bytes / pel
        dec     ax
        outwQ   LX,ax                   ; set zero based width
        mov     cx,ax                   ; save the width

        WaitQ   1
        outwQ   CMD_FLAGS,CMDPATCOLOR   ; do entire bitmap
        mov     esi, pSystemMemory

        mov     dx,COLOR_0_WAIT         ; variable data port
        mov     ax,cx                   ; restore the width
        inc     ax
        movzx   ecx,bx                  ; save height
        movzx   ebx,ax                  ; save new width

color_24_loop:
        push    ecx
        mov     ecx,ebx
        shr     ecx,1                   ; set up to do words
   rep  outsw                           ; Load data for remaining words
        mov     ecx,ebx
        shr     ecx,1
        jnc     short @f                ; is count odd
        lodsb
        out     dx,ax                   ; Load odd byte.
@@:     pop     ecx
        loop    color_24_loop

        jmp     copy_exit


copy_16bpp:
endif
;;
;; SOURCE IS COLOR - 16BPP
;;

        mov     eax,ulWidth             ; one word / pel
        inc     eax                     ; make one based
ifdef ENDIVE
        mul     ebx
else
        mul     bx                      ; Determine # of pels
endif
        mov     ecx,eax
        mov     dx,COLOR_0_WAIT         ; variable data port

   ; Load data for words
@@:     lodsw
        ror     ax,8
        out     dx,ax
        loop    @b
        jmp     copy_exit

copy_8bpp:
endif

;;
;; SOURCE IS COLOR - 8BPP
;;

        ifndef  S3
color_mp_loop:
        mov     eax,ulWidth             ; one byte / pel
        inc     eax                     ; make one based
        mul     bx                      ; Determine # of pels
        mov     ecx,eax
        mov     dx,COLOR_0_WAIT         ; variable data port
        shr     ecx,1                   ; set up to do words

ColorMem2V_Loop:
   rep  outsw                           ; Load data for remaining words
        shr     eax,1
        jnc     copy_exit               ; is count odd
        lodsw
        out     dx,ax                   ; Load data for 1 pel.

        else
        mov     dx,COLOR_0_WAIT         ; variable data port
        movzx   ecx,bx                  ; save scan line count
color_mp_loop:
        push    ecx
        mov     eax,ulWidth             ; one byte / pel
        inc     eax                     ; make one based
        mov     ecx,eax
        shr     ecx,1                   ; set up to do words
   rep  outsw                           ; Load data for remaining words
        shr     eax,1
        jnc     short @f                ; is count odd
        lodsb
        out     dx,ax                   ; Load data for 1 pel.
@@:     pop     ecx
        loop    color_mp_loop
        endif

        jmp     copy_exit

;;
;; SOURCE IS MONOCHROME - 1BPP
;;

copy_1BPP:
        ifdef   BPP24
        ; check the destination for 24bpp
        test    [_DDT],USE_24BPP
        jz      short @f
        xor     ax,ax
        push    ax                      ; start bits
        push    ulBPPFormat
        push    ulHeight
        push    ulWidth
        push    ulWidth
        movzx   eax,x_cache
        ror     eax,16
        mov     ax,y_cache
        push    eax
        push    pSystemMemory
        call    Copy24MonoToVRAM
        add     esp,26
        jmp     copy_exit
@@:
        endif

        WaitQ   3                       ; need room in the queue
        mov     bx,x_cache              ; set up scissor rectangle
        mov     ax,bx
        or      ax,XMIN_2DECODE
        outwQ   XMIN,ax

        mov     eax,ulWidth             ; width of the mono bitmap
        outwQ   LX,ax                   ; 0-based width of the pointer buffer
        inc     eax
        add     bx,ax                   ; Calculate the right edge of the scissor rect.

        mov     ax,bx                   ; Grab the right edge value.
        or      ax,XMAX_2DECODE
        outwQ   XMAX,ax


        WaitQ   7
        outwQ   LY,ulHeight
        outwQ   X0,x_cache              ; data x coordinate
        outwQ   Y0,y_cache              ; data y coordinate
        outbQ   FUNCTION_0,(FUNC_2OP_COL0+FUNC_S); write 0s to bg
        outbQ   FUNCTION_1,(FUNC_2OP_COL1+FUNC_S); write 1s to fg
        outwQ   MODE,(MODE_2DECODE+MD_PS_VAR+MD_UP_FALSE)
        outwQ   CMD_FLAGS,CMD_WCUR      ; command to output cursor masks

; The 8514/A is now patiently waiting for us to download patterns of the
; AND and XOR masks. We do this in the format expected by the adapter.

        mov     ecx,ulHeight            ; for # of scanlines in a pointer
        inc     ecx                     ; ecx = ulHeight+1 ( make one based one )
        mov     esi,pSystemMemory
        or      esi,esi
        jz      short copy_error
        mov     dx,COLOR_0_WAIT         ; variable data port

outer_mono_mp_loop:

        ifndef  S3
        push    ecx                     ; Save the outer loop counter.
        mov     ecx,ulWidth
        inc     ecx                     ; make one based
        shr     ecx,4                   ; convert to word count
        jnc     inner_mono_mp_loop
        inc     ecx                     ; for single byte or odd count
                                        ; any extra we do is clipped
inner_mono_mp_loop:
                                        ; Load up a byte of monochrome data
        twistbits16nomap                ; Massage into planar format and send it
                                        ; to the board.
        loop inner_mono_mp_loop

        pop     ecx                     ; Restore outer loop counter

        loop outer_mono_mp_loop

        else    ; S3
        push    ecx                     ; Save the outer loop counter.
        mov     ecx,ulWidth
        inc     ecx                     ; make one based
        add     ecx,7                   ; adjust for odd pels
        mov     eax,ecx
        shr     ecx,4                   ; round off and get word count
        jcxz    @f
        rep     outsw                   ; to the board.
@@:     shr     eax,4                   ; get the odd word
        jnc     short @f
        lodsb

        out     dx,ax
@@:     pop     ecx                     ; Restore outer loop counter

        loop outer_mono_mp_loop
        endif   ; S3

        WaitQ   2
        outwQ   XMIN,(XMIN_2DECODE+0)   ;; set scissor to full screen
        ifndef  S3
        outwQ   XMAX,(XMAX_2DECODE+SCR_8514_WIDTH-1)   ; set max x extent
        else
        ; we should be okay here for all resolutions
        outwQ   XMAX,(XMAX_2DECODE+SCR_S3_WIDTH-1)   ; set max x extent
        endif

        jmp     short copy_exit
copy_error:
        int     3
copy_exit:
        ;call    DVRAM
cEnd

;**************************************************************************
;*
;* FUNCTION NAME = MemoryPelToVRAMPel      CMVC 76949
;*
;* DESCRIPTION   = This function if responsable for displaying a single pel
;*                 using the user defined foreground & background mixes and
;*                 source colors.
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************

        align   4
cProc   MemoryPelToVRAMPel, <PUBLIC>, <esi, edi, ebx>
        parmD   pSystemMemory
        parmD   pVRAMMemory
        parmD   FgMix
        parmD   Mix
        parmD   ulBPPFormat

        localW  x_cache
        localW  y_cache
        localD  bpp_Format
cBegin

        WaitQIdle
        WaitQ   3
        outwQ   YMIN,(YMIN_2DECODE+0)
        ifndef S3
        outwQ   YMAX,(YMAX_2DECODE+SCR_8514_HEIGHT-1)   ; set max y extent
        else
        outwQ   YMAX,(YMAX_2DECODE+SCR_S3_HEIGHT-1)     ; set max y extent
        outwQ   XMAX,(XMAX_2DECODE+SCR_S3_WIDTH-1)      ; set max x extent
        endif

        cld                                              ; work up in address
        ;Derive X0 and Y0 from pVRAM
        mov     eax,pVRAMMemory
        and     eax,0FFFFFFFh
        mov     y_cache,ax
        ror     eax,16
        mov     x_cache,ax

        mov     eax,ulBPPFormat
        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
        and     eax, BPP_MASK
        mov     bpp_Format,eax      ; Keep current pits per pel format.

        cmp     eax,ONE_BPP
        jz      copy1_1BPP
        ifdef   BPP24
        cmp     eax,TWENTYFOUR_BPP
        jz      copy1_24bpp
        endif

        WaitQ   8
        outwQ   MODE,(MODE_2DECODE+MD_PS_ONES)   ; 
        outwQ   FUNCTION_1,(FgMix)
        outwQ   FUNCTION_0,(Mix)
        outwQ   WRITE_ENABLE,WRITE_ALL_PLANES
        outwQ   LX,0                             ; ulWidth  set zero based width
        outwQ   LY,0                             ; ulHeight set zero based height
        mov     bx,ax
        inc     bx                               ; make one based height
        outwQ   X0,x_cache                       ; set the destination x
        outwQ   Y0,y_cache                       ; set the destination y

        WaitQ   1
        outwQ   CMD_FLAGS,CMDPATCOLOR            ; do entire bitmap

; The 8514/A is now patiently waiting for us to download 8bpp Pixel data.
; We do this for each byte in each row of the source bitmap.

        mov     esi, pSystemMemory
        or      esi,esi
        jz      copy1_error

ifdef  S3
;        mov     eax,ulBPPFormat
        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
;        and     eax, BPP_MASK
;        cmp     eax,EIGHT_BPP
        cmp     bpp_Format, EIGHT_BPP
        jz      copy1_8bpp
ifdef   BPP24
        jmp      copy1_16bpp

;;
;; SOURCE IS COLOR - 24BPP
;;
copy1_24bpp:

        WaitQ   8
        outwQ   MODE,(MODE_2DECODE+MD_PS_ONES)   ; 
        outwQ   FUNCTION_1,(FgMix)
        outwQ   FUNCTION_0,(Mix)
        outwQ   WRITE_ENABLE,WRITE_ALL_PLANES
        mov     ax,x_cache              ; one byte / pel
        mov     dx,3
        mul     dx                      ; 3 bytes / pel
        outwQ   X0,ax                   ; set the destination x
        outwQ   Y0,y_cache              ; set the destination y
        outwQ   LY,0                    ; ulHeight set zero based height
        mov     bx,ax
        inc     bx                      ; make one based height
        mov     ax,2                    ; zero base width for a single pel at 3 bytes / pel
;        mov     ax,0                    ; ulWidth one byte / pel
;        inc     ax
;        mov     dx,3
;        mul     dx                      ; now eax 3 bytes / pel
;        dec     ax
        outwQ   LX,ax                   ; set zero based width
        mov     cx,ax                   ; save the width

        WaitQ   1
        outwQ   CMD_FLAGS,CMDPATCOLOR   ; do entire bitmap
        mov     esi, pSystemMemory

        mov     dx,COLOR_0_WAIT         ; variable data port
        mov     ax,cx                   ; restore the width
        inc     ax
        movzx   ecx,bx                  ; save height
        movzx   ebx,ax                  ; save new width

;c_24_loop:
        push    ecx
        mov     ecx,ebx
        shr     ecx,1                   ; set up to do words
   rep  outsw                           ; Load data for remaining words
        mov     ecx,ebx
        shr     ecx,1
        jnc     short @f                ; is count odd
        lodsb
        out     dx,ax                   ; Load odd byte.
@@: ;    pop     ecx
    ;    loop    c_24_loop

        jmp     copy1_exit


copy1_16bpp:
endif
;;
;; SOURCE IS COLOR - 16BPP
;;

        mov     eax,0                   ; ulWidth  one word / pel
        inc     eax                     ; make one based
        mul     bx                      ; Determine # of pels
        mov     ecx,eax
        mov     dx,COLOR_0_WAIT         ; variable data port

   ; Load data for words
@@:     lodsw
        ror     ax,8
        out     dx,ax
        loop    @b
        jmp     copy1_exit

copy1_8bpp:
endif

;;
;; SOURCE IS COLOR - 8BPP
;;

        ifndef  S3
color1_mp_loop:
        mov     eax,0                   ; ulWidth  one byte / pel
        inc     eax                     ; make one based
        mul     bx                      ; Determine # of pels
        mov     ecx,eax
        mov     dx,COLOR_0_WAIT         ; variable data port
        shr     ecx,1                   ; set up to do words

   rep  outsw                           ; Load data for remaining words
        shr     eax,1
        jnc     copy1_exit               ; is count odd
        lodsw
        out     dx,ax                   ; Load data for 1 pel.

        else
        mov     dx,COLOR_0_WAIT         ; variable data port
        movzx   ecx,bx                  ; save scan line count
c_mp_loop:
        push    ecx
        mov     eax,0                   ; ulWidth one byte / pel
        inc     eax                     ; make one based
        mov     ecx,eax
        shr     ecx,1                   ; set up to do words
   rep  outsw                           ; Load data for remaining words
        shr     eax,1
        jnc     short @f                ; is count odd
        lodsb
        out     dx,ax                   ; Load data for 1 pel.
@@:     pop     ecx
        loop    c_mp_loop
        endif

        jmp     copy1_exit

;;
;; SOURCE IS MONOCHROME - 1BPP
;;

copy1_1BPP:
        ifdef   BPP24
        ; check the destination for 24bpp
        test    [_DDT],USE_24BPP
        jz      short @f
        xor     ax,ax
        push    ax                      ; start bits
        push    ulBPPFormat
        push    0                       ; ulHeight
        push    0                       ; ulWidth
        push    0                       ; ulWidth
        movzx   eax,x_cache
        ror     eax,16
        mov     ax,y_cache
        push    eax
        push    pSystemMemory
        call    Copy24MonoToVRAM
        add     esp,26
        jmp     copy1_exit
@@:
        endif

        WaitQ   3                       ; need room in the queue
        mov     bx,x_cache              ; set up scissor rectangle
        mov     ax,bx
        or      ax,XMIN_2DECODE
        outwQ   XMIN,ax

        mov     eax,0                   ; ulWidth width of the mono bitmap
        outwQ   LX,ax                   ; 0-based width of the pointer buffer
        inc     eax
        add     bx,ax                   ; Calculate the right edge of the scissor rect.

        mov     ax,bx                   ; Grab the right edge value.
        or      ax,XMAX_2DECODE
        outwQ   XMAX,ax


        WaitQ   7
        outwQ   LY,0                    ; ulHeight
        outwQ   X0,x_cache              ; data x coordinate
        outwQ   Y0,y_cache              ; data y coordinate
        outbQ   FUNCTION_0,(Mix); write 0s to bg
        outbQ   FUNCTION_1,(FgMix); write 1s to fg
        outwQ   MODE,(MODE_2DECODE+MD_PS_VAR+MD_UP_FALSE)
        outwQ   CMD_FLAGS,CMD_WCUR      ; command to output cursor masks

; The 8514/A is now patiently waiting for us to download patterns of the
; AND and XOR masks. We do this in the format expected by the adapter.

        mov     ecx,0                   ; ulHeight for # of scanlines in a pointer
        inc     ecx                     ; ecx = ulHeight+1 ( make one based one )
        mov     esi,pSystemMemory
        or      esi,esi
        jz      short copy1_error
        mov     dx,COLOR_0_WAIT         ; variable data port

outer_mono_loop:

        ifndef  S3
        push    ecx                     ; Save the outer loop counter.
        mov     ecx,0                   ; ulWidth
        inc     ecx                     ; make one based
        shr     ecx,4                   ; convert to word count
        jnc     inner_mono_mp_loop
        inc     ecx                     ; for single byte or odd count
                                        ; any extra we do is clipped
inner_mono_loop:
                                        ; Load up a byte of monochrome data
        twistbits16nomap                ; Massage into planar format and send it
                                        ; to the board.
        loop inner_mono_loop

        pop     ecx                     ; Restore outer loop counter

        loop outer_mono_loop

        else    ; S3
        push    ecx                     ; Save the outer loop counter.
        mov     ecx,0                   ; ulWidth
        inc     ecx                     ; make one based
        add     ecx,7                   ; adjust for odd pels
        mov     eax,ecx
        shr     ecx,4                   ; round off and get word count
        jcxz    @f
        rep     outsw                   ; to the board.
@@:     shr     eax,4                   ; get the odd word
        jnc     short @f
        lodsb

        out     dx,ax
@@:     pop     ecx                     ; Restore outer loop counter

        loop outer_mono_loop
        endif   ; S3

        WaitQ   2
        outwQ   XMIN,(XMIN_2DECODE+0)   ;; set scissor to full screen
        ifndef  S3
        outwQ   XMAX,(XMAX_2DECODE+SCR_8514_WIDTH-1)   ; set max x extent
        else
        ; we should be okay here for all resolutions
        outwQ   XMAX,(XMAX_2DECODE+SCR_S3_WIDTH-1)   ; set max x extent
        endif

        jmp     short copy1_exit
copy1_error:
        int     3
copy1_exit:
        ;call    DVRAM
cEnd
;********** END OF CMVC 76949 NEW CODE ***********

;**************************************************************************
;*
;* FUNCTION NAME = CopyVRAMToMemory
;*
;* DESCRIPTION   =
;*
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************

        align   4
cProc   CopyVRAMToMemory, <PUBLIC>, <esi, edi, ebx, ecx , edx>
        parmD   pSystemMemory
        parmD   pVRAMMemory
        parmD   ulWidth
        parmD   ulHeight
        parmD   ulBPPFormat

        localW  x_cache
        localW  y_cache
cBegin

        ;Derive X0 and Y0 from pVRAM
        mov     eax,pVRAMMemory
        mov     y_cache,ax
        ror     eax,16
        mov     x_cache,ax

        WaitQIdle
        WaitQ   8
        outwQ   MODE,(MODE_2DECODE+MD_PS_ONES)   ; pattern always 1s
        outwQ   FUNCTION_1,(FUNC_2OP_VAR+FUNC_S)
        outwQ   FUNCTION_0,(FUNC_2OP_VAR+FUNC_S)
        outwQ   READ_ENABLE,READ_ALL_PLANES

        ifdef   BPP24
        mov     eax,ulBPPFormat
        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
        and     eax, BPP_MASK
        cmp     eax,TWENTYFOUR_BPP
        jz      short cvram_24bpp
        endif

        outwQ   LX,ulWidth
        outwQ   LY,ulHeight
        movzx   ebx,ax
        outwQ   X0,x_cache              ; starting x of color pointer cache
        outwQ   Y0,y_cache              ; starting y of color pointer cache
        WaitQ   1
        outwQ   CMD_FLAGS,CMDPATCOLOR_R ; do entire cached pattern

; The 8514/A is now patiently waiting for us to upload 16bpp Pixel data.
; We do this for each byte in each row of the source bitmap.

        cld
        mov     edi, pSystemMemory      ; Load EDI with target pointer
        inc     ebx                     ; Make height one-based

ifdef  S3
        mov     eax,ulBPPFormat
        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
        and     eax, BPP_MASK
        cmp     eax,EIGHT_BPP
        jz      cvram_8BPP

ifdef   BPP24
        jmp     cvram_16bpp

;;
;; SOURCE IS COLOR - 24BPP
;;
cvram_24bpp:

        WaitQ   5
        mov     ax,x_cache              ; one byte / pel
        mov     dx,3
        mul     dx                      ; 3 bytes / pel
        outwQ   X0,ax                   ; set the destination x
        outwQ   Y0,y_cache              ; set the destination y
        outwQ   LY,ulHeight             ; set zero based height
        mov     bx,ax
        inc     bx                      ; make one based height
        mov     ax,ulWidth              ; one byte / pel
        inc     ax
        mov     dx,3
        mul     dx                      ; now eax 3 bytes / pel
        dec     ax
        outwQ   LX,ax                   ; set zero based width
        mov     cx,ax                   ; save the width

        outwQ   CMD_FLAGS,CMDPATCOLOR_R ; do entire cached pattern
        mov     edi, pSystemMemory

        mov     dx,COLOR_0_WAIT         ; variable data port
        mov     ax,cx                   ; restore the width
        inc     ax
        movzx   ecx,bx                  ; save height
        movzx   ebx,ax                  ; save new width

vtm24:
        push    ecx
        mov     ecx,ebx
        shr     ecx,1                   ; set up to do words
@@:     insw                            ; Copy one word from VRAM to memory
        nop
        nop
        loop    @b
        mov     ecx,ebx
        shr     ecx,1
        jnc     short @f                ; is count odd
        in      ax,dx
        stosb
@@:     pop     ecx
        loop    vtm24

        jmp     short cvram_exit


cvram_16bpp:
endif

        mov     eax,ulWidth             ; Check the ulWidth parm to see
        inc     eax                     ; Make width one-based.
        mul     ebx                     ; Calc. # of pels in rectangle
        mov     dx,COLOR_0_WAIT         ; variable data port
        mov     ecx,eax                 ; ECX now contains # of words
    ; Copy one from VRAM to memory
@@:     in      ax,dx
        ror     ax,8
        stosw
        loop    @b

        jmp     short cvram_exit

cvram_8BPP:
endif

ifndef  S3
        mov     eax,ulWidth             ; Check the ulWidth parm to see
        inc     eax                     ; Make width one-based.
        mul     ebx                     ; Calc. # of pels in rectangle
        mov     dx,COLOR_0_WAIT         ; variable data port
        mov     ecx,eax                 ; ECX now contains # of words
        shr     ecx,1                   ; get words and odd byte in carry
    rep insw                            ; Copy one word from VRAM to memory
        shr     eax,1
        jnc     short @F
        in      ax,dx
        stosb
@@:

else    ;S3
        mov     ecx,ebx
vtm1:   push    ecx
        mov     ecx,ulWidth             ; Check the ulWidth parm to see
        inc     ecx                     ; Make width one-based.
        mov     dx,COLOR_0_WAIT         ; variable data port
        mov     eax,ecx                 ; ECX now contains # of words
        shr     ecx,1                   ; get words and odd byte in carry
        jcxz    vtm2
@@:     insw                            ; Copy one word from VRAM to memory
        nop
        nop
        loop    @b
        shr     eax,1
        jnc     short @F
vtm2:
        in      ax,dx
        stosb
@@:     pop     ecx
        loop    vtm1
endif   ;S3

cvram_exit:

cEnd

;**************************************************************************
;*
;* FUNCTION NAME = CopyDestToMemory
;*
;* DESCRIPTION   =
;*
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************

        align   4
cProc   CopyDestToMemory, <PUBLIC>, <esi, edi, ebx, edx>
        parmD   pSystemMemory
        parmD   pVRAMMemory
        parmD   ulWidth
        parmD   ulHeight
        parmD   ulBPPFormat

        localW  x_cache
        localW  y_cache
cBegin

        ;Derive X0 and Y0 from pVRAM
        mov     eax,pVRAMMemory
        mov     y_cache,ax
        ror     eax,16
        mov     x_cache,ax

        WaitQIdle
        WaitQ   8
        outwQ   MODE,(MODE_2DECODE+MD_PS_ONES)   ; pattern always 1s
        outwQ   FUNCTION_1,(FUNC_2OP_VAR+FUNC_S)
        outwQ   FUNCTION_0,(FUNC_2OP_VAR+FUNC_S)
        outwQ   READ_ENABLE,READ_ALL_PLANES

        ifdef   BPP24
        mov     eax,ulBPPFormat
        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
        and     eax, BPP_MASK
        cmp     eax,TWENTYFOUR_BPP
        jz      short D2MEM_24bpp
        endif

        mov     eax,ulWidth
        outwQ   LX,ax
        mov     ebx,ulHeight
        mov     eax,ebx
        outwQ   LY,ax
        outwQ   X0,x_cache              ; starting x of color pointer cache
        outwQ   Y0,y_cache              ; starting y of color pointer cache

        WaitQ   1
        outwQ   CMD_FLAGS,CMDPATCOLOR_R ; do entire cached pattern

        cld
        mov     edi, pSystemMemory      ; Load EDI with target pointer
        inc     ebx                     ; Make height one-based
        mov     eax,ulWidth             ; Check the ulWidth parm to see
        inc     eax                     ; Make width one-based.
        mul     ebx                     ; Calc. # of pels in rectangle
        mov     dx,COLOR_0_WAIT         ; variable data port

ifdef  S3
        push    eax                     ; Def. 74355
        mov     eax,ulBPPFormat
        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
        and     eax, BPP_MASK
        cmp     eax,EIGHT_BPP
        pop     eax                     ; Def. 74355
        jz      D2MEM_8bpp

ifdef   BPP24
        jmp     D2MEM_16bpp

;;
;; SOURCE IS COLOR - 24BPP
;;
D2MEM_24bpp:

        WaitQ   5
        mov     ax,x_cache              ; one byte / pel
        mov     dx,3
        mul     dx                      ; 3 bytes / pel
        outwQ   X0,ax                   ; set the destination x
        outwQ   Y0,y_cache              ; set the destination y
        outwQ   LY,ulHeight             ; set zero based height
        mov     bx,ax
        inc     bx                      ; make one based height
        mov     ax,ulWidth              ; one byte / pel
        inc     ax
        mov     dx,3
        mul     dx                      ; now eax 3 bytes / pel
        dec     ax
        outwQ   LX,ax                   ; set zero based width
        mov     cx,ax                   ; save the width

        outwQ   CMD_FLAGS,CMDPATCOLOR_R ; do entire cached pattern
        mov     edi, pSystemMemory

        mov     dx,COLOR_0_WAIT         ; variable data port
        movzx   eax,cx                  ; restore the width
        inc     ax
        mul     bx                      ; get byte count in ax

        mov     ecx,eax                 ; ECX now contains # of words
        mov     esi,eax                 ; ECX now contains # of words
        shr     ecx,1                   ; get words and odd byte in carry
        mov     bl,Shadow8514Regs.Color_Comp

D2MEM_24loop:

        in      ax,dx                    ; Copy one word from VRAM to ax
        mov     bh,byte ptr [edi]
        cmp     bh,bl
        jnz     short @f
        mov     byte ptr [edi],al
@@:
        inc     edi
        mov     bh,byte ptr [edi]
        cmp     bh,bl
        jnz     short @f
        mov     byte ptr [edi],ah
@@:
        inc     edi
        loop    D2MEM_24loop

        shr     esi,1
        jnc     short @F
        in      ax,dx
        mov     bh,byte ptr [edi]
        cmp     bh,bl
        jnz     short @f
        stosb
@@:
        jmp     short dest_exit


D2MEM_16bpp:
endif

;;
;; SOURCE IS COLOR - 16BPP
;;


; The 8514/A is now patiently waiting for us to upload 16bpp Pixel data.
; We do this for each byte in each row of the source bitmap.

        mov     ecx,eax                 ; ECX now contains # of words
        mov     bl,Shadow8514Regs.Color_Comp

D2MEM_16loop:

        in      ax,dx                    ; Copy one word from VRAM to ax
        ror     ax,8
        mov     bh,byte ptr [edi]
        cmp     bh,bl
        jnz     short  @f
        mov     byte ptr [edi],al
@@:
        inc     edi
        mov     bh,byte ptr [edi]
        cmp     bh,bl
        jnz     short @f
        mov     byte ptr [edi],ah
@@:
        inc     edi
        loop    D2MEM_16loop

        jmp     short dest_exit

D2MEM_8bpp:
endif

;---------------------------------------------------------------------
; Defect 75128.  We are reading words; if we have an odd number of
; bytes on each line, the S3 will read a dummy byte instead of
; wrapping to the next line. So, we'll use esi to keep track of where
; we are in the scan.--EKF
;---------------------------------------------------------------------
ifdef   S3
        mov     eax,ulWidth             ; Check the ulWidth parm to see
        inc     eax                     ; Make width one-based.
        mov     esi,eax
        inc     eax                     ; need even word lines
        and     eax, not 1              ; 
        mul     ebx                     ; Calc. # of pels in rectangle
        mov     ecx,eax
        mov     dx,COLOR_0_WAIT         ; variable data port
else    ; 8514
        mov     ecx,eax                 ; ECX now contains # of bytes
        mov     esi,eax                 ; ESI now contains # of bytes
endif

        shr     ecx,1                   ; get words and odd byte in carry
        mov     bl,Shadow8514Regs.Color_Comp

D2MEM_loop:

        in      ax,dx                   ; Copy one word from VRAM to ax
        mov     bh,byte ptr [edi]
        cmp     bh,bl
        jnz     short @f
        mov     byte ptr [edi],al
@@:
        inc     edi

ifdef   S3
        dec     esi                     ; end of line?
        jnz     @f                      ; no, go ahead
        mov     esi,ulWidth             ; yes, reset esi
        inc     esi                     ; make one based
        loop    D2MEM_loop              ; blow off last byte
        jmp     dest_exit               ; all through
@@:
endif   ;S3

        mov     bh,byte ptr [edi]
        cmp     bh,bl
        jnz     short @f
        mov     byte ptr [edi],ah
@@:
        inc     edi

ifdef   S3
        dec     esi                     ; end of line?
        jnz     @f                      ; no, go ahead
        mov     esi,ulWidth             ; yes, reset esi
        inc     esi                     ; make one based
@@:
endif   ;S3

        loop    D2MEM_loop

ifndef  S3
        shr     esi,1                   ; one odd byte?
        jnc     short @F                ; no
        in      ax,dx                   ; yes
        mov     bh,byte ptr [edi]
        cmp     bh,bl
        jnz     short @f
        stosb
@@:
endif   ;~S3

dest_exit:

cEnd

;**************************************************************************
;*
;* FUNCTION NAME = CopyMaskToVRAM
;*
;* DESCRIPTION   =
;*
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************

        align   4
cProc   CopyMaskToVRAM, <PUBLIC>, <esi, edi, ebx>
        parmD   pSystemMemory
        parmD   pVRAMMemory
        parmD   ulWidth
        parmD   ulHeight
        parmD   ulBPPFormat

        localW  x_cache
        localW  y_cache
cBegin

        mov     eax,ulBPPFormat
        ; mask to keep just the BPP information (ie ignore intel/motorola bit)
        and     eax, BPP_MASK

        WaitQIdle
        WaitQ   2
        outwQ   YMIN,(YMIN_2DECODE+0)
        ifndef  S3
        outwQ   YMAX,(YMAX_2DECODE+SCR_8514_HEIGHT-1)   ; set max y extent
        else
        outwQ   YMAX,(YMAX_2DECODE+SCR_S3_HEIGHT-1)   ; set max y extent
        endif

        cld                             ; work up in address
        ;Derive X0 and Y0 from pVRAM
        mov     eax,pVRAMMemory
        mov     y_cache,ax
        ror     eax,16
        mov     x_cache,ax

        WaitQ   3                       ; need room in the queue
        mov     bx,x_cache              ; set up scissor rectangle
        mov     ax,bx
        or      ax,XMIN_2DECODE
        outwQ   XMIN,ax

        mov     eax,ulWidth             ; width of the cursor
        outwQ   LX,ax                   ; 0-based width of the pointer buffer
        inc     eax
        add     bx,ax                   ; Calculate the right edge of the scissor rect.

        mov     ax,bx                   ; Grab the right edge value.
        or      ax,XMAX_2DECODE
        outwQ   XMAX,ax


        WaitQ   8
        outwQ   LY,ulHeight
        outwQ   X0,x_cache              ; data x coordinate
        outwQ   Y0,y_cache              ; data y coordinate
        outbQ   FUNCTION_0,(FUNC_2OP_COL0+FUNC_ZEROS); write 0s to bg
        outbQ   FUNCTION_1,(FUNC_2OP_COL1+FUNC_ONES); write 1s to fg
        outwQ   MODE,(MODE_2DECODE+MD_PS_VAR+MD_UP_FALSE)
        outwQ   CMD_FLAGS,CMD_WCUR      ; command to output cursor masks

; The 8514/A is now patiently waiting for us to download patterns of the
; AND and XOR masks. We do this in the format expected by the adapter.

        mov     ecx,ulHeight            ; for # of scanlines in a pointer
        inc     ecx                     ; ecx = ulHeight+1 ( make one based one )
        mov     esi,pSystemMemory
        mov     dx,COLOR_0_WAIT         ; variable data port

outer_mask_mp_loop:

        ifndef  S3
        push    ecx                     ; Save the outer loop counter.
        mov     ecx,ulWidth
        inc     ecx                     ; make one based
        shr     ecx,4                   ; convert to word count
        jnc     inner_mask_mp_loop
        inc     ecx                     ; for single byte or odd count
                                        ; any extra we do is clipped
inner_mask_mp_loop:
                                        ; Load up a byte of monochrome data
        twistbits16nomap                ; Massage into planar format and send it
                                        ; to the board.
        loop inner_mask_mp_loop

        pop     ecx                     ; Restore outer loop counter

        loop outer_mask_mp_loop

        else    ; S3
        push    ecx                     ; Save the outer loop counter.
        mov     ecx,ulWidth
        inc     ecx                     ; make one based
        mov     eax,ecx
        shr     ecx,4                   ; round off and get word count
        jcxz    @f
        rep     outsw                   ; to the board.
@@:     shr     eax,4                   ; get the odd word
        jnc     short @f
        lodsb
        out     dx,ax
@@:     pop     ecx                     ; Restore outer loop counter

        loop outer_mask_mp_loop
        endif   ; S3

        WaitQ   2
        outwQ   XMIN,(XMIN_2DECODE+0)   ;; set scissor to full screen
        ifndef  S3
        outwQ   XMAX,(XMAX_2DECODE+SCR_8514_WIDTH-1)   ; set max y extent
        else
        ; we should be okay here for all resolutions
        outwQ   XMAX,(XMAX_2DECODE+SCR_S3_WIDTH-1)   ; set max y extent
        endif

cEnd

;**************************************************************************
;*
;* FUNCTION NAME = KlugeReset
;*
;* DESCRIPTION   =
;*
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************
        public  _KlugeReset
_KlugeReset     Proc    near

; clear the screen
        push    edx

        WaitQIdle
        WaitQ   8
        outwQ   XMIN,(XMIN_2DECODE+0)   ;; set scissor to full screen
        outwQ   YMIN,(YMIN_2DECODE+0)
        Mov     Ax,SCR_8514_WIDTH-1
        ifndef  S3
        Mov     Ax,SCR_8514_WIDTH-1
        else
        ; we should be okay here for all resolutions
        Mov     Ax,SCR_S3_WIDTH-1
        endif
        outwQ   LX,Ax                   ; set width of rect
        Or      Ax,XMAX_2DECODE
        outwQ   XMAX,Ax                 ; set max scissor x extent
        ifndef  S3
        Mov     Ax,SCR_8514_HEIGHT-1    ; get max y
        else
        Mov     Ax,SCR_S3_HEIGHT-1    ; get max y
        endif
        outwQ   LY,Ax                   ; set height of rect
        Or      Ax,YMAX_2DECODE
        outwQ   YMAX,Ax                 ; set max scissor y extent
        Sub     Ax,Ax                   ; rectangle ul is at ul of screen
        outwQ   X0,Ax
        outwQ   Y0,Ax

        WaitQ   5
        outwQ   COLOR_1,0               ; clear to black
        outwQ   WRITE_ENABLE,0ffffh
        outwQ   FUNCTION_1,(FUNC_2OP_COL1+FUNC_S)
        outwQ   MODE,(MODE_2DECODE+MD_PS_ONES+MD_UP_FALSE)
        outwQ   CMD_FLAGS,(CMD_C_HRECT+CMD_FV_FIX+CMD_PA_FOUR+CMD_RW_W+CMD_MA_ACCESS+CMD_DY+CMD_DX)

; fill the screen with a different color

;       WaitQ   7
;       Sub     Ax,Ax                   ; rectangle ul is at ul of screen
;       outwQ   X0,Ax
;       outwQ   Y0,Ax
;       outwQ   COLOR_1,6               ; clear to index 6
;       outwQ   WRITE_ENABLE,0ffffh
;       outwQ   FUNCTION_1,(FUNC_2OP_COL1+FUNC_S)
;       outwQ   MODE,(MODE_2DECODE+MD_PS_ONES+MD_UP_FALSE)
;       outwQ   CMD_FLAGS,(CMD_C_HRECT+CMD_FV_FIX+CMD_PA_FOUR+CMD_RW_W+CMD_MA_ACCESS+CMD_DY+CMD_DX)

exit:
        pop     edx
        Ret
_KlugeReset     Endp

endif ;_8514

        align   4
cProc   WaitForRealHWFunction, <PUBLIC>, <esi>
cBegin
        ifndef  _8514
        pushxga                 ; save the segment register used for XGA
        ; Get a pointer to the hardware registers.
        movxga  pRealXGARegs

        ; Wait for the hardware to be free.
        waitshort
        popxga                  ; restore the segment register used for XGA
        endif
cEnd

; * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
;
; CheckChipState
;
; This call has been added for protection of the ioinvalid
; flag being set by an invalid operation somewhere in the
; driver.  If this flag has been set, then the screen will
; never get updated, so we will have to reset this flag
; to keep the driver from seemingly hanging.  This is not a
; FIX to any problem, and could cause screen corruption, but
; will keep the driver working.
;
; This will be called by EnterDriver
;
; Michael R. Cooper
; * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

        align   4
cProc   CheckChipState, <PUBLIC>, <esi>
cBegin

        mov    ax,cs               ; Validate Ring 2
        test   ax,1
        jnz    short @F
        mov    dx,42e8h            ; get status
        in     al,dx               ; test ioinvalid flag
        test   al,4h
        jz     short @F

        mov    dx,42e8h            ; reset rinvalidio
        mov    ax,4
        out    dx,ax

@@:

cEnd

ifdef BPP24
;/***************************************************************************
;*
;* FUNCTION NAME = Copy24MonoToVRAM
;*
;* DESCRIPTION   = This proc will cache a mono bitmap or pattern to
;*                 a dword aligned cache location.
;*
;*                  Registers Preserved:
;*
;*                  Registers Destroyed:
;*
;*
;* INPUT         =
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

mono_mix_blt_inner_loop macro
local   leave_alone
local   next_dest
local   top
top:
        push    edx                     ; 
        push    ecx                     ; 
        movzx   ecx, byte ptr Shadow8514Regs.fg_mix
        rol     al, 1                   ; rotate a bit of source into carry
        jc      short leave_alone       ; 
        mov     edx,ebx                 ; get background color
        movzx   ecx, byte ptr Shadow8514Regs.bg_mix
leave_alone:                            ; 
        push    eax                     ; 
        mov     eax,[edi]               ; get the current dst pel

        call    do_mix_routine          ; mix it
        mov     byte ptr [edi],al       ; store it
        ror     eax,8
        mov     byte ptr [edi+1],al
        ror     eax,8
        mov     byte ptr [edi+2],al
        add     edi,3                   ; update dst addr
        pop     eax                     ; 
        pop     ecx                     ; 
        pop     edx                     ; 
        loop    top                     ; 
endm


mono_blt_inner_loop macro
local   leave_alone
local   next_dest
local   top
top:
        rol     al, 1                   ; rotate a bit of source into carry
        jnc     short leave_alone       ; 
        ;mov     dword ptr [edi],edx     ;
        mov byte ptr [edi],dl
        ror edx,8
        mov byte ptr [edi+1],dl
        ror edx,8
        mov byte ptr [edi+2],dl
        ror edx,16
        jmp     short next_dest         ;Foreground color
leave_alone:                            ; 
        ;mov     dword ptr [edi],ebx     ;Background color
        mov byte ptr [edi],bl
        ror ebx,8
        mov byte ptr [edi+1],bl
        ror ebx,8
        mov byte ptr [edi+2],bl
        ror ebx,16
next_dest:                              ; 
        add     edi,3                   ; 
        loop    top                     ; 
endm

SCANS_PER_BANK  EQU     32
BANK_SIZE       EQU     10000H

        align   4
cProc   Copy24MonoMixToVRAM, <PUBLIC>, <esi, edi, ebx, ecx>
        parmD   pSystemMemory
        parmD   pVRAMMemory
        parmD   ulPels
        parmD   ulWidth
        parmD   ulHeight
        parmD   ulBPPFormat
        parmW   StartBits       ; Number of bits into the first byte
                                ; of the mono bitmap  #74143
        localW  x_width
        localW  x_cache
        localW  y_cache
        localW  d_bank
        localW  y_scans
cBegin

    ;Turn on Linear Addressing.

    WaitQIdle
    cli
    mov dx,3d4h
    mov al,058h
    out dx,al
    inc dx
    in  al,dx
    or  al,10h
    out dx,al

    ; turn fast write buffer on ??? -         does it !
    mov dx,3d4h
    mov al,040h
    out dx,al
    inc dx
    in  al,dx
    or  al,08h
    out dx,al
    sti

    ; get a pointer to vram

    cld                             ; work up in address

    ;Derive X0 and Y0 from pVRAMMemory
    mov     eax,pVRAMMemory
    and     eax,0FFFFFFFh
    mov     y_cache,ax
    ror     eax,16
    mov     dx,ax
    shl     ax,1
    add     ax,dx
    mov     x_cache,ax
    ror     eax,16
    mov     edx,CB_SCAN24
    mul     edx
    movzx   eax,ax
    add     ax,x_cache
    add     eax,_pVram
    mov     edi,eax
    mov     esi,pSystemMemory

    ; select a bank and determine switching parms
    ; we could have gotten the bank from above butnot the scans remaining
    mov     ebx,SCANS_PER_BANK      ; 
    mov     cx,bx                   ; 
    xor     edx,edx                 ; 
    movzx   eax,y_cache             ;Get current Y destination.
    div     ebx                     ;Calculate current bank.
    mov     d_bank,al               ;Save current bank.
    sub     cx,dx                   ;Calculate scanline remainning in bank.
    mov     y_scans,cx              ;Save scanline left in bank.
    mov     dx,ax                   ;get bank in dx
    call    set_bank

    ;set the colors to expand to and put them in the right format
    ;to write dwords

    mov     edx,Shadow8514Regs.Color_1
    mov     ebx,Shadow8514Regs.Color_0
    ror     edx,8
    ror     dx,8
    ror     edx,16
    ror     dx,8
    ror     edx,16
    ror     ebx,8
    ror     bx,8
    ror     ebx,16
    ror     bx,8
    ror     ebx,16

    ;Starting a new row
    inc     ulHeight
    mov     ecx,ulWidth             ; get pel width
    inc     ecx                     ; make one based
    add     ecx,7                   ; get any partial bytes
    shr     ecx,3                   ; get byte width

mono_mix_inner_loop_top:
    mov     eax,ulPels
    inc     eax                     ; make it one based
    mov     x_width,ax

    push    edi                     ; Start of scan of destination
    push    esi                     ; Start of scan of source
    push    ecx

;do partial first byte
    push    ecx
    movzx   ecx,StartBits
    jcxz    @f                      ; no, go ahead
    lodsb                           ; read the first byte
    shl     al,cl                   ; move the bits we need to the left edge
    mov     ch,8                    ; set cl to the number of pels in this
    sub     ch,cl                   ;  byte
    shr     cx,8
    cmp     cx,x_width
    jb      short sbm1
    mov     cx,x_width
sbm1:
    sub     x_width,cx
    mono_mix_blt_inner_loop         ; In line expansion macro for partial byte
@@:
    pop     ecx

;do the middle bytes
mono_mix_byte_lp:                   ; mono byte loop
    push    ecx                     ; save byte counter
    mov     cx,x_width              ; get pel count ; top of ecx is zeroed
    jcxz    no_mono_mix_pels        ; see if we are out of pels
    cmp     cx,8                    ; try and do a byte
    jb      short @f                ; do a partial byte
    mov     cx,8                    ; do a byte
@@:
    sub     x_width,cx              ; get remaining width
    lodsb                           ; load the byte
    mono_mix_blt_inner_loop         ; In line expansion macro for a byte

no_mono_mix_pels:
    pop     ecx                     ; get byte counter
    loop    mono_mix_byte_lp        ; loop through all the bytes

    pop     ecx
    pop     esi                     ; restore the left edges of the blits
    pop     edi

    add     esi,ecx                 ; Look at next source scan line
    add     edi,CB_SCAN24           ; Look at next screen scan line

    dec     y_scans                 ; Updata line remaining in current bank.
    jnz     short cdsc_mono_mix_300 ; Check if time to switch bank.
    mov     y_scans,SCANS_PER_BANK  ; Get total scan lines in one bank.

        ; Switch banking detected in destination
cdsc_mono_mix_400:                  ; 
    push    edx                     ; Save dx we used in bank switching.
    mov     dl,d_bank               ; Get current bank.
    inc     dx                      ; Prepare to switch to next bank.
    mov     d_bank,dl               ; 
    call    set_bank                ; Set new bank.
    sub     edi,BANK_SIZE           ; Reset current bank address.
    pop     edx                     ; Recover dx.

cdsc_mono_mix_300:                  ; 
    dec     ulHeight                ; Another line done.
    jnz     mono_mix_inner_loop_top     ; Are we done yet ?

    ;Turn off Linear Addressing.
    cli
    mov dx,3d4h
    mov al,058h
    out dx,al
    inc dx
    in  al,dx
    and al,0efh
    out dx,al

    ;Turn fast write buffer.
    mov dx,3d4h
    mov al,040h
    out dx,al
    inc dx
    in  al,dx
    and al,0F7h
    out dx,al
    sti

    ; debug call to dump vram
    ;call    DVRAM

cEnd

        align   4
cProc   Copy24ScanMixToVRAM, <PUBLIC>, <esi, edi, ebx, ecx>
        parmD   pSystemMemory
        parmD   pVRAMMemory
        parmD   ulPels
        parmD   ulWidth

        localW  x_cache
        localW  y_cache
        localW  d_bank
        localW  x_width
cBegin

    ;Turn on Linear Addressing.

    WaitQIdle
    cli
    mov dx,3d4h
    mov al,058h
    out dx,al
    inc dx
    in  al,dx
    or  al,10h
    out dx,al

    ; turn fast write buffer on ??? -         does it !
    mov dx,3d4h
    mov al,040h
    out dx,al
    inc dx
    in  al,dx
    or  al,08h
    out dx,al
    sti

    ; get a pointer to vram

    cld                             ; work up in address

    ;Derive X0 and Y0 from pVRAMMemory
    mov     eax,pVRAMMemory
    and     eax,0FFFFFFFh
    mov     y_cache,ax
    ror     eax,16
    mov     dx,ax
    shl     ax,1
    add     ax,dx
    mov     x_cache,ax
    ror     eax,16
    mov     edx,CB_SCAN24
    mul     edx
    movzx   eax,ax
    add     ax,x_cache
    add     eax,_pVram
    mov     edi,eax
    mov     esi,pSystemMemory

    ; select a bank and determine switching parms
    ; we could have gotten the bank from above butnot the scans remaining
    mov     ebx,SCANS_PER_BANK      ; 
    mov     cx,bx                   ; 
    xor     edx,edx                 ; 
    movzx   eax,y_cache             ;Get current Y destination.
    div     ebx                     ;Calculate current bank.
    mov     d_bank,al               ;Save current bank.
    mov     dx,ax                   ;get bank in dx
    call    set_bank

    ;set the colors to expand to and put them in the right format
    ;to write dwords

    mov     edx,Shadow8514Regs.Color_1
    mov     ebx,Shadow8514Regs.Color_0
    ror     edx,8
    ror     dx,8
    ror     edx,16
    ror     dx,8
    ror     edx,16
    ror     ebx,8
    ror     bx,8
    ror     ebx,16
    ror     bx,8
    ror     ebx,16

    mov     eax,ulPels
    inc     eax                     ; make it one based
    mov     x_width,ax              ; save the width
    xor     ecx,ecx                 ; clear ecx

;do the middle bytes
scan_mix_byte_lp:                   ; mono byte loop
    mov     cx,x_width              ; get pel count ; top of ecx is zeroed
    jcxz    no_scan_mix_pels        ; see if we are out of pels

    push    esi                     ; Start of scan of source
    mov     eax,ulWidth             ; get pel width
    inc     ax                      ; make one based
    cmp     cx,ax                   ; see which is least
    jb      @f
    mov     cx,ax                   ; just do the remaining width
@@:
    sub     x_width,cx              ; get remaining width
    lodsb                           ; load the byte
    mono_mix_blt_inner_loop         ; In line expansion macro for a byte

    pop     esi                     ; restore the left edges of the src patt
    jmp     scan_mix_byte_lp        ; loop through all the bytes
no_scan_mix_pels:

    ;Turn off Linear Addressing.
    cli
    mov dx,3d4h
    mov al,058h
    out dx,al
    inc dx
    in  al,dx
    and al,0efh
    out dx,al

    ;Turn fast write buffer.
    mov dx,3d4h
    mov al,040h
    out dx,al
    inc dx
    in  al,dx
    and al,0F7h
    out dx,al
    sti

    ; debug call to dump vram
    ;call    DVRAM

cEnd

        align   4
cProc   Copy24MonoToVRAM, <PUBLIC>, <esi, edi, ebx, ecx>
        parmD   pSystemMemory
        parmD   pVRAMMemory
        parmD   ulPels
        parmD   ulWidth
        parmD   ulHeight
        parmD   ulBPPFormat
        parmW   StartBits       ; Number of bits into the first byte

        localW  x_width
        localW  x_cache
        localW  y_cache
        localW  d_bank
        localW  y_scans
cBegin

    ;Turn on Linear Addressing.

    WaitQIdle
    cli
    mov dx,3d4h
    mov al,058h
    out dx,al
    inc dx
    in  al,dx
    or  al,10h
    out dx,al

    ; turn fast write buffer on ??? -         does it !
    mov dx,3d4h
    mov al,040h
    out dx,al
    inc dx
    in  al,dx
    or  al,08h
    out dx,al
    sti

    ; get a pointer to vram

    cld                             ; work up in address

    ;Derive X0 and Y0 from pVRAMMemory
    mov     eax,pVRAMMemory
    and     eax,0FFFFFFFh
    mov     y_cache,ax
    ror     eax,16
    mov     dx,ax
    shl     ax,1
    add     ax,dx
    mov     x_cache,ax
    ror     eax,16
    mov     edx,CB_SCAN24
    mul     edx
    movzx   eax,ax
    add     ax,x_cache
    add     eax,_pVram
    mov     edi,eax
    mov     esi,pSystemMemory

    ; select a bank and determine switching parms
    ; we could have gotten the bank from above butnot the scans remaining
    mov     ebx,SCANS_PER_BANK      ; 
    mov     cx,bx                   ; 
    xor     edx,edx                 ; 
    movzx   eax,y_cache             ;Get current Y destination.
    div     ebx                     ;Calculate current bank.
    mov     d_bank,al               ;Save current bank.
    sub     cx,dx                   ;Calculate scanline remainning in bank.
    mov     y_scans,cx              ;Save scanline left in bank.
    mov     dx,ax                   ;get bank in dx
    call    set_bank

    ;set the colors to expand to and put them in the right format
    ;to write dwords

    mov     ebx,Shadow8514Regs.Color_0
    mov     edx,Shadow8514Regs.Color_1
    ror     edx,8
    ror     dx,8
    ror     edx,16
    ror     dx,8
    ror     edx,16
    ror     ebx,8
    ror     bx,8
    ror     ebx,16
    ror     bx,8
    ror     ebx,16

    ;Starting a new row
    inc     ulHeight
    mov     ecx,ulWidth             ; get pel width
    inc     ecx                     ; make one based
    add     ecx,7                   ; get any partial end bytes
    shr     ecx,3                   ; get full byte width

mono_inner_loop_top:
    mov     eax,ulPels
    inc     eax                     ; make it one based
    mov     x_width,ax

    push    edi                     ; Start of scan of destination
    push    esi                     ; Start of scan of source
    push    ecx

;do partial first byte
    push    ecx
    movzx   ecx,StartBits
    jcxz    @f                      ; no, go ahead
    lodsb                           ; read the first byte
    shl     al,cl                   ; move the bits we need to the left edge
    mov     ch,8                    ; set cl to the number of pels in this
    sub     ch,cl                   ;  byte
    shr     cx,8
    cmp     cx,x_width
    jb      short sb1
    mov     cx,x_width
sb1:
    sub     x_width,cx
    mono_blt_inner_loop             ; In line expansion macro for partial byte
@@:
    pop     ecx


;do the middle bytes
mono_byte_lp:                       ; mono byte loop
    push    ecx                     ; save byte counter
    mov     cx,x_width              ; get pel count ; top of ecx is zeroed
    jcxz    no_mono_pels            ; see if we are out of pels
    cmp     cx,8                    ; try and do a byte
    jb      short @f                ; do a partial byte
    mov     cx,8                    ; do a byte
@@:
    sub     x_width,cx              ; get remaining width
    lodsb                           ; load the byte
    mono_blt_inner_loop             ; In line expansion macro for a byte

no_mono_pels:
    pop     ecx                     ; get byte counter
    loop    mono_byte_lp            ; loop through all the bytes

    pop     ecx
    pop     esi                     ; restore the left edges of the blits
    pop     edi

    add     esi,ecx                 ; Look at next source scan line
    add     edi,CB_SCAN24           ; Look at next screen scan line

    dec     y_scans                 ; Updata line remaining in current bank.
    jnz     short cdsc_mono_300     ; Check if time to switch bank.
    mov     y_scans,SCANS_PER_BANK  ; Get total scan lines in one bank.

        ; Switch banking detected in destination
cdsc_mono_400:                          ; 
    push    edx                     ; Save dx we used in bank switching.
    mov     dl,d_bank               ; Get current bank.
    inc     dx                      ; Prepare to switch to next bank.
    mov     d_bank,dl               ; 
    call    set_bank                ; Set new bank.
    sub     edi,BANK_SIZE           ; Reset current bank address.
    pop     edx                     ; Recover dx.

cdsc_mono_300:                          ; 
    dec     ulHeight                ; Another line done.
    jnz     mono_inner_loop_top     ; Are we done yet ?

    ;Turn off Linear Addressing.
    cli
    mov dx,3d4h
    mov al,058h
    out dx,al
    inc dx
    in  al,dx
    and al,0efh
    out dx,al

    ;Turn fast write buffer off.
    mov dx,3d4h
    mov al,040h
    out dx,al
    inc dx
    in  al,dx
    and al,0F7h
    out dx,al
    sti

    ; debug call to dump vram
    ;call    DVRAM

cEnd

;/***************************************************************************
;*
;* FUNCTION NAME = set_bank
;*
;* DESCRIPTION   = This proc will set the bank
;*
;*                  Registers Preserved:
;*
;*                  Registers Destroyed: eax
;*
;*
;* INPUT         = dx contains the bank to switch to
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

cProc   set_bank, <PUBLIC>, <edx,ecx>

        localW  x_cache
        localW  y_cache
        localW  d_bank
        localW  y_scans
cBegin

  cli
  mov   cx,dx
  mov   dx,S3_NDX_PORT
  mov   ax,S3_CRTR_LOCK
  out   dx,al
  mov   dx,S3_RW_PORT
  in    al,dx
  and   al,0f0h
  push  ecx
  and   cl,0fh
  or    al,cl
  out   dx,al

  mov   dx,S3_NDX_PORT
  mov   ax,S3_EX_SCTL_2
  out   dx,al
  mov   dx,S3_RW_PORT
  in    al,dx
  and   al,0f3h
  pop   ecx
  and   cl,30h
  shr   cl,2
  or    al,cl
  out   dx,al
  sti

cEnd

;-------------------------------------------------------------------------
;
; Function: do_mix_routine
;
; This function contains expansions of the mix macros.
; (See mixTable in the data segment).
;
; INPUT
; eax = dst
; edx = src
; BltMix = rop
;
; RETURNS
; eax = dst
;
;-------------------------------------------------------------------------

        align 4
cProc   do_mix_routine, <PUBLIC>
cBegin

        ; Work out the address of the mix code to jump to.
        and     ecx, FUNC_ALU
        mov     ecx, dword ptr mixTrashSourceAllDD[ecx*4]
        add     ecx, offset FLAT:mixCodeBase
        jmp     ecx

mixCodeBase:
        mixTrashSourceAll       edx, eax
cEnd

endif ;BPP24


_TEXT           ends

END
