;*DDK*************************************************************************/
;
; COPYRIGHT    Copyright (C) 1995 IBM Corporation
;
;    The following IBM OS/2 WARP source code is provided to you solely for
;    the purpose of assisting you in your development of OS/2 WARP device
;    drivers. You may use this code in accordance with the IBM License
;    Agreement provided in the IBM Device Driver Source Kit for OS/2. This
;    Copyright statement may not be removed.;
;*****************************************************************************/
        PAGE     55,132
        TITLE    CBLT.ASM
        SUBTITLE Header
;/*****************************************************************************
;*
;* SOURCE FILE NAME = CBLT.ASM
;*
;* DESCRIPTIVE NAME = Compile a BLT subroutine onto the stack
;*
;*
;* VERSION      V2.0
;*
;* DATE         
;*
;* DESCRIPTION  This file contains two subroutines which build a small program on 
;*              the stack to accomplish the requested BLT.
;*                                                                                     
;*              This file is part of a set that makes up the BitBLT function   
;*              at driver-level.                                                       
;*
;* FUNCTIONS    CBLT 
;*              phase_align_generate
;*              y_update 
;*
;* NOTES        NONE
;*
;* STRUCTURES   NONE
;*
;* EXTERNAL REFERENCES
;*
;*              NONE
;*
;* EXTERNAL FUNCTIONS
;*
;*              NONE
;*
;* CHANGE ACTIVITY =
;*   DATE      FLAG        APAR   CHANGE DESCRIPTION
;*   --------  ----------  -----  --------------------------------------
;*   mm/dd/yy  @Vr.mpppxx  xxxxx  xxxxxxx
;*   02/22/8?                     Walt Moore [waltm] Wrote it for         in
;*                                distant past.
;*   07/12/86                     Wes Rupel [wesleyr] Made it a subroutine 
;*                                (extracted from an enourmous bitblt.asm)
;*   07/20/87                     Wes Rupel [wesleyr] Added 4-plane support.
;*   08/16/87                     Wes Rupel [wesleyr] Bitmap Color Conversion 
;*                                uses image color
;*   03/05/88                     Wes Rupel [wesleyr] Added Gray usMix support.
;*   03/07/88                     Wes Rupel [wesleyr] Added Transparency for 
;*                                ImageData
;*   03/30/88                     Wes Rupel [wesleyr] Made BackColor/ForeColor 
;*                                now 0/1 rather than 1/0 (in mono bitmaps)
;*   10/26/89                     Viroon Touranachun [viroont] Modified the part 
;*                                that generates the "REP" inner loop for 
;*                                "Source Copy" to generate 32-bit data transfer 
;*                                routine using iAPX386's MOVSD and STOSD 
;*                                instructions.
;*   03/15/93              63358  A follow copy MOVSD does not work on a three
;*                                byte sequence.             
;*
;*****************************************************************************/

        .386
        .MODEL FLAT,SYSCALL
        ASSUME  SS:FLAT, DS:FLAT, CS:FLAT, ES:FLAT

        .errnz  BITS_PEL - 4                     ;some uses of BITS_PEL expect it
                                                 ;to be 4  --  should clean up
        .xlist
        include pmgre.inc
DINCL_BB_ROPS   equ     1
DINCL_BITMAP    equ     1
DINCL_ENABLE    equ     1

        include driver.inc
        include extern.inc
        include protos.inc
        include display.inc
        include egafam.inc
        include assert.mac
        include oemblt.inc
        .list


        include constant.blt

;/*
;** !! This constant will be later moved into CONSTANT.BLT
;*/

        I_USE_386   equ      066h
        I_ROR_EAX_N  equ     0C8C1h

;/*
;** Don't DWORD align REP MOVSD if the number of bytes to transfer is too
;** small.  Alignment on the average will require 2 MOVSB's at a cost of 14
;** clocks.  Misalignment costs 1? clocks per byte.
;*/

        TOO_SMALL_TO_ALIGN equ 14


.CODE
        include devdata.blt
DEFINE_ROPCODE = 1
        include roptable.blt                     ; generate code

;/*
;**  jmp_cx_nz   - Code template for near jump if CX-1 <> 0
;**
;**  jmp_cx_nz will skip the following near jump if CX-1 is zero.
;**  CX will be left updated by this code.
;**
;**  jmp_cx_nz is used by both the inner loop code and the outer
;**  loop code if a loop instruction cannot be used.
;*/

jmp_cx_nz:
        dec     ecx                              ;Decrement counter
        jz      $+7
        db      I_JMP_NEAR                       ;JMP opcode
JMP_CX_NZ_LEN   equ     $-jmp_cx_nz              ;Length of procedure


;/*
;**    phase_align - Template for phase alignment code
;**
;**    The following code is the template that performs the phase
;**    alignment masking.  The source has already been aligned to
;**    the destination.
;**
;**    A copy of the aligned source is made.  The phase mask is then
;**    applied to the source and the copy.  The previously unused
;**    bits are ORed into the used bits of the current source, and
;**    the unused bits of the current source then become the unused
;**    bits for the next source.
;**
;**
;**    It assumes:
;**
;**            EBP  =  phase alignment mask
;**            AL   =  current byte to mask
;**            BH   =  old unused bits
;*/

phase_align:
        mov     ah,al                            ;Make a copy of aligned source
        and     eax,ebp                          ;Masked used, unused bits
        or      al,bh                            ;Mask in old unused bits
        mov     bh,ah                            ;Save new unused bits

PHASE_ALIGN_LEN equ     $-phase_align            ;Length of procedure


;/*
;**  masked_store - Template for storing first and last bytes of BLT
;**
;**  The following code is a template for storing the first and last
;**  bytes of a BLT.  The unaltered bits are saved and the altered
;**  bits set in the byte, then the byte is stored.
;**
;**
;**  It assumes:
;**
;**          AL  =  The byte to be BLTed to the destination bitmap.
;**                 All necessary logic operations have been performed
;**                 on this byte.
;**
;**          AH  =  The destination byte.
;**
;**  The AND immediate will be fixed up.
;*/

masked_store:
        and     eax,0FFFFh                       ;Mask altered/unaltered bits
        or      al,ah                            ;Combine the bits
        stosb                                    ;And store the result

MASKED_STORE_LEN        equ   $-masked_store     ;Length of the template
MASKED_STORE_MASK       equ   -7                 ;Offset to where mask goes


;/*
;** Gray usMix logical action template.  This is used instead of one of the
;** roptable rops when the GRAY_ROP MIX requested.
;*/

gray_rop_template:
        mov     ah,[edi]
        mov     al,ah
        xor     al,dl
        and     al,dh
        xor     al,ah
LENGTH_GRAY_ROP_TEMPLATE      =       $ - gray_rop_template

;/*
;** transparency_template is a piece of code that will be appended to the
;** logical action template from the roptable to achieve transparency.
;** The transparency mask is assumed to be in DH.  Where the mask is "1"
;** the result of the logical action is used.  Where the mask is "0"
;** the destination is not altered.
;*/

transparency_template:
        mov     ah,[edi]
        xor     al,ah
        and     al,dh
        xor     al,ah
LENGTH_TRANSPARENCY_TEMPLATE   =   $ -    transparency_template

        page
;/*
;**
;**       Pattern Fetch Code
;**
;**       The pattern fetch code will be created on the fly since
;**       most of the instructions need fixups.
;**
;**       This template is really just a comment to indicate what
;**       the generated code should look like.
;**
;**       Entry:  None
;**
;**       Exit:   DH = pattern
;**
;**       Uses:   AX,BX,CX,DH,flags
;**
;**
;**       The following registers are available to the pattern fetch
;**       logic (as implemented herein):
;**
;**               AX,BX,CX,DX,flags
;**
;**
;**       For monochrome brushes:
;**
;**           mov     ax,1234h                      ;Load segment of the brush
;**           mov     bx,1234h                      ;Load offset of the brush
;**           mov     cx,ds                         ;Save DS
;**           mov     ds,ax                         ;DS:BX --> brush
;**           mov     dh,7[bx]                      ;Get next brush byte
;**           mov     al,ss:[1234h]                ;Get brush index
;**           add     al,iDir                       ;Add displacement to next byte (+1/-1)
;**           and     al,00000111b                 ;Keep it in range
;**           mov     ss:[1234h],al                ;Store displacement to next byte
;**           mov     ds,cx                         ;Restore DS
;**
;**
;**       For color brushes:
;**
;**           mov     ax,1234h                      ;Load segment of the brush
;**           mov     bx,1234h                      ;Load offset of the brush
;**           mov     cx,ds                         ;Save DS
;**           mov     ds,ax                         ;DS:BX --> brush
;**           mov     dh,7[bx]                      ;Get next brush byte
;**           mov     al,ss:[1234h]                ;Get brush index
;**           add     al,SIZE Pattern              ;Add disp. to next plane's bits
;**           and     al,00011111b                 ;Keep it within the brush
;**           mov     ss:[1234h],al                ;Store disp. to next plane's bits
;**           mov     ds,cx                         ;Restore DS
;**
;**
;**       For both templates, SS:[1234] is the address of the 7 in the
;**       "mov dh,7[bx]" instruction.  This is the index to this scan's
;**       bit pattern in the brush.  This value will range from 0 to
;**       (SIZE pattern)-1 for monochrome devices, and from 0 to
;**       ((NumberPlanes)*(SIZE pattern))-1 for color devices.
;**
;**       For color brushes, SS:[1234] must also be fixed up when the next
;**       scan line is selected, else it would index into the monochrome
;**       portion of the brush (e.g. 1,9,17,25, where 25 is not part of the
;**       color brush).
;*/


;/***************************************************************************
;*
;* FUNCTION NAME = CBLT
;*
;* DESCRIPTION   = Compile a BLT onto the stack.  
;*
;* INPUT         = ES:DI --> memory on stack to receive BLT program  
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

;/*
;**   NOTE:   The definition of CBLT below is FAR in order to maintain
;**           the stack frame created for BITBLT, though it is reached
;**           with a NEAR call.
;*/

OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
 
        ALIGN   4

CBLT PROC SYSCALL,
 pddcDst  :DWORD, ;Destination ddc
 xDst     :DWORD, ;Destination x origin
 yDst     :DWORD, ;Destination y origin
 psdSrc   :DWORD, ;Source surface definition
 xSrc     :DWORD, ;Source x origin
 ySrc     :DWORD, ;Source y origin
 cxExt    :DWORD, ;x extent of the BLT
 cyExt    :DWORD, ;y extent of the BLT
 usMix    :DWORD, ;Mix mode
 ipcBkgnd :DWORD, ;Color to mono background match color
 fsBlt    :DWORD

        include frame.blt
                                                 ;Set data seg to CS so we can access
                                                 ;  code without overrides

;/*
;**       Initialize plane indicator.
;*/

        mov     ax,(PLANE_SELECT_FLAG shl 8)+I_MOV_BL_BYTE_I
        stosw

        subttl  Compile - Outer Loop
        page

;/*
;**  Create the outerloop code.  The first part of this code will save
;**  the scan line count register, destination pointer, and the source
;**  pointer (if there is a source).
;**
;**
;**  The generated code should look like:
;**
;**          push    cx                          ;Save scan line count
;**          push    di                          ;Save destination pointer
;**  <       push    si                       >  ;Save source pointer
;*/

        mov     bl,fbF0
        mov     ax,I_PUSH_ECX_PUSH_EDI           ;Save scan line count, destination ptr
        stosw
        test    bl,F0_SRC_PRESENT                ;Is a source needed?
        jz      cblt_2020                        ;  No
        mov     al,I_PUSH_ESI                    ;  Yes, save source pointer
        stosb

cblt_2020:



        subttl  Compile - Plane Selection
        page

;/*
;**       If the destination device is color and the display is involved in
;**       the blt, then the color plane selection logic must be added in.
;**       If the destination is monochrome, then no plane logic is needed.
;**       Two color memory bitmaps will not cause the plane selection logic
;**       to be copied.
;**
;**
;**       The generated code should look like:
;**
;**       <       push    bx      >              ;Save plane index
;**       <       plane selection >              ;Select plane
;*/


        test    bl,F0_DEST_IS_COLOR              ;Is the destination color?
        jz      cblt_pattern_fetch               ;  No
        mov     al,I_PUSH_EBX                    ;Save plane index
        stosb
        test    bl,F0_DEST_IS_DEV+F0_SRC_IS_DEV  ;Is the device involved?
        jz      cblt_pattern_fetch               ;  No


;/*
;**   The device is involved for a color blt.  Copy the logic for selecting
;**   the read/write plane, and perform any fixups that are needed.
;*/

        mov     esi,OFFSET cps                   ;--> plane select logic

if      LENGTH_CPS GE 8
        mov     ecx,LENGTH_CPS/4
        rep     movsd
else
if      LENGTH_CPS AND 4
        movsd
endif
endif
if      LENGTH_CPS AND 2
        movsw
endif
if      LENGTH_CPS AND 1
        movsb
endif

        subttl  Compile - Pattern Fetch
        page

;/*
;**   Set up any pattern fetch code that might be needed.
;**   The pattern code has many fixups, so it isn't taken from a
;**   template.  It is just stuffed as it is created.
;**
;**
;**   Entry:  None
;**
;**   Exit:   DH = pattern
;**
;**   Uses:   AX,BX,CX,DH,flags
;**
;**
;**   For color brushes:
;**
;**       mov     bx,YYYYh                             ;Load offset (immediate) of the brush
;**    *  mov     ax,XXXXh                             ;Load segment (immediate) of the brush
;**    *  mov     cx,ds                                ;Save DS
;**    *  mov     ds,ax                                ;DS:BX --> brush
;**       mov     dh,7[bx]                             ;Get next brush byte
;**       mov     al,ss:[xxxxh]                        ;Get brush index
;**       add     al,SIZE Pattern     ;Add displacement to next plane's bits
;**       and     al,00011111b                         ;Keep it within the brush
;**       mov     ss:[xxxxh],al                        ;Store displacement to next plane's bits
;**    *  mov     ds,cx                                ;Restore DS
;**
;**   For monochrome brushes:
;**
;**       mov     bx,YYYYh                             ;Load offset (immediate) of the brush
;**    *  mov     ax,XXXXh                             ;Load segment (immediate) of the brush
;**    *  mov     cx,ds                                ;Save DS
;**    *  mov     ds,ax                                ;DS:BX --> brush
;**       mov     dh,7[bx]                             ;Get next brush byte
;**       mov     al,ss:[xxxxh]                        ;Get brush index
;**       add     al,iDir                              ;Add displacement to next byte (+1/-1)
;**       and     al,00000111b                         ;Keep it in range
;**       mov     ss:[xxxxh],al                        ;Store displacement to next byte
;**    *  mov     ds,cx                                ;Restore DS
;**
;**
;**   For masks:
;**
;**       The Gray usMix uses the transparency mask on all planes.
;**       So the fetch logic is the same as for a monochrome brush
;**       except that we don't increment to the next brush byte until
;**       we have done all the planes.  So we will cut out the increment
;**       from the monochrome fetch code and leave it to the "next scanline"
;**       code at the very bottom to update the brush index after all the planes
;**       are done.
;**
;**       mov     bx,YYYYh                             ;Load offset (immediate) of the brush
;**    *  mov     ax,XXXXh                             ;Load segment (immediate) of the brush
;**    *  mov     cx,ds                                ;Save DS
;**    *  mov     ds,ax                                ;DS:BX --> brush
;**       mov     dh,7[bx]                             ;Get next brush byte
;**    *  mov     ds,cx                                ;Restore DS
;**
;**
;**       The address of the increment for the brush is saved for
;**       the plane looping logic if the destination is a three plane
;**       color device.  For a four plane color device, the AND
;**       automatically handles the wrap and no fixup is needed at
;**       the end of the plane loop.
;**
;**       Instructions marked with "*" are not present if there is no
;**       source bitmap in the rop.  The bitmap is what DS would otherwise
;**       be used for.
;*/
 
cblt_pattern_fetch:
        test    bl,F0_PAT_PRESENT                ;Is a pattern needed?
        jz      cblt_initial_byte_fetch ;  No, skip pattern code

;/*
;** The special gray rop will erroneously lead to Color Pat Fetch. 
;** It should be mono fetch of the transparency mask.   
;*/


        test    fsBlt,BBF_GRAY_ROP                 
        jz      cblt_not_gray_rop                  
                                                   
color_fetch_template:                              
        and     fbF0,not F0_COLOR_PAT            ; really a mono (1 plane) fetch
        and     bl,not F0_COLOR_PAT              ; really a mono (1 plane) fetch

;/*
;** Take this oportunity to compile the "color fetch" code needed by the
;** Gray usMix.  This will compile code which will expand the background color bit
;** for the current plane into all of DL.  It will look like this for color 03h:
;**
;**   mov   dl,03h   ; grab the background color as an immediate value.
;**   and   dl,bl    ; BL is the plane mask.  This isolates the color bit for
;**   neg   dl       ; the current plane.  The NEG set the carry if the bit was
;**   sbb   dl,dl    ; one.  SBB sets DL to 0 or FF depending on the carry.
;*/

        mov     eax,(I_AND_DL_BL shl 16)+I_MOV_DL_BYTE_I ; mov dl,PatBackColor
        mov     ah,BYTE PTR ipcBrushBack.ipc_bClr[0]     ; and dl,bl
        stosd
        mov     eax,(I_SBB_DL_DL shl 16)+I_NEG_DL        ; neg dl
        stosd                                            ; sbb dl,dl

cblt_not_gray_rop:                                    



        mov     al,I_MOV_EBX_DWORD_I             ;mov ebx,pBrush
        stosb
        mov     eax,pBrush
        stosd
        mov     ax,I_MOV_DH_EBX_DISP8            ;mov dh,yPatRow[ebx]
        stosw

;/*
;** apparently we don't need to save npbPatRow if it is a normal
;** mono fetch, but the Gray usMix mono fetch requires it.
;*/

        mov     npbPatRow,edi                    ;Save address of the brush index
        mov     edx,edi                          ;Save address of the brush index
        mov     al,yPatRow                       ;Set initial pattern row
        mov     bh,00000111b                     ;Set brush index mask
        and     al,bh                            ;Make sure it's legal at start
        stosb

;/*
;** If we are using the transparency mask (as with Gray usMix) then we 
;** don't want to update the brush index until we've done all 4 planes.
;** This updating will be done at the bottom after we finish the plane 
;** select loop.                                   
;*/

        test    fsBlt,BBF_GRAY_ROP                 
        jnz     cblt_using_mask                    

        mov     al,I_MOV_AL_MEM
        stosb                                    ;mov al,[xxxx]
        mov     eax,edx
        stosd
        mov     al,I_ADD_AL_BYTE_I
        mov     ah,iDir                          ;Set brush index
        .errnz  INCREASE - 1                     ;Must be a 1
        .errnz  DECREASE + 1                     ;Must be a -1

        test    bl,F0_COLOR_PAT                  ;Color pattern required?
        jz      cblt_mono_pat                    ;  No
        mov     ah,SIZE_PATTERN                  ;Set increment to next plane
        mov     bh,00011111b                     ;Set brush index mask

cblt_mono_pat:
        stosw


        mov     al,I_AND_AL_BYTE_I
        mov     ah,bh
        stosw                                    ;and al,BrushIndexMask

        mov     al,I_MOV_MEM_AL
        stosb

        mov     eax,edx
        stosd                                    ;mov [xxxx],al


cblt_using_mask:

cbltpf_no_source_here:

        subttl  Compile - Initial Byte Fetch
        page

;/*
;**       Create the initial byte code.  This may consist of one or two
;**       initial fetches (if there is a source), followed by the required
;**       logic action.  The code should look something like:
;**
;**       BLTouterloop:
;**       <       mov     bp,mPhase   >            ;Load phase mask for entire loop
;**       <       xor     bh,bh                        > ;Clear previous unused bits
;**
;**       ;       Perform first byte fetch
;**
;**       <       lodsb                                > ;Get source byte
;**       <       color<==>mono munge >            ;Color <==> mono conversion
;**       <       phase alignment     >            ;Align bits as needed
;**
;**       ;       If an optional second fetch is needed, perform one
;**
;**       <       lodsb                                > ;Get source byte
;**       <       color to mono munge >            ;Color to mono munging
;**       <       phase alignment     >            ;Align bits as needed
;**
;**               logical action                    ;Perform logical action required
;**
;**               mov     ah,es:[di]               ;Get destination
;**               and     ax,cx                     ;Saved unaltered bits
;**               or      al,ah                     ;  and mask in altered bits
;**               stosb                             ;Save the result
;**
;**
;**       The starting address of the first fetch/logical combination will be
;**       saved so that the code can be copied later instead of recreating it
;**       (if there are two fecthes, the first fetch will not be copied)
;**
;**       The length of the code up to the masking for altered/unaltered bits
;**       will be saved so the code can be copied into the inner loop.
;*/
 

cblt_initial_byte_fetch:


        mov     cFetchCode,1                     ; default is 1 == SIZE(lodsb)
        mov     edx,0
        or      dh,iHorzPhase                    ; Is the phase 0? (also get the phase)
        jz      cblt_3020                        ;   Yes, so no phase alignment needed
        mov     al,I_MOV_EBP_DWORD_I             ; Set up the phase mask
        stosb
        mov     eax,mPhase                       ; Place the mask into the instruction
        stosd

;/*
;** Somebody once said that we could remove this XOR.  We cannot!  The
;** current phase alignment code will OR this into the "used" bits.
;** If we don't clear this, we could end up ANDing bits into the currently
;** "used" bits
;*/

        mov     ax,I_XOR_BH_BH                   ; Clear previous unused bits
        stosw

cblt_3020:
        mov     npFetchStart,edi                 ;Save starting address of action
        test    bl,F0_SRC_PRESENT                ;Is there a source?
        jnz     cblt_src_is_present              ;  Yes, generate fetch code
        jmp     cblt_logical_action              ;  No, don't generate fetch code

        ALIGN   4

cblt_src_is_present:

;/*
;**       Generate the required sequence of instructions for a fetch
;**       sequence.  Only the minimum code required is generated.
;**
;**       The code generated will look something like the following:
;**
;**       BLTfetch:
;**       <       lodsb                                  > ;Get the next byte
;**       <       color munging                          > ;Mono <==> color munging
;**
;**       ;       If the phase alignment isn't zero, then generate the minimum
;**       ;       phase alignment needed.  RORs or ROLs will be generated,
;**       ;       depending on the fastest sequence.  If the phase alignment
;**       ;       is zero, than no phase alignment code will be generated.
;**
;**       <       ror     al,1                           > ;Rotate as needed
;**       <       ror     al,1                           > ;Rotate as needed
;**       <       ror     al,1                           > ;Rotate as needed
;**       <       ror     al,1                           > ;Rotate as needed
;**       <       mov     ah,al                          > ;Mask used, unused bits
;**       <       and     ax,bp                          > ;(BP) = phase mask
;**       <       or      al,bh                          > ;Mask in old unused bits
;**       <       mov     bh,ah                          > ;Save new unused bits
;**
;**
;**       The nice thing about the above is it is possible for the fetch to
;**       degenerate into a simple LODSB instruction.
;**
;**       If this was a iAPX80286 implementation, if would be faster to
;**       make three or four rotates into a "ror al,n" instruction.
;**
;**       Currently:      BL = fbF0
;**                       BH = usMix[3]
;*/
 
        mov     bh,byte ptr fsBlt[1]              ; We will test BH several times
        .errnz  low BBF_TRANS
        .errnz  low BBF_ANTI_TRANS
        .errnz  low BBF_TRANS_NO_CC
        test    bl,F0_GAG_CHOKE                   ; Color conversion?
        jnz     cblt_color_convert                ;   Yes, gag and choke on it

;/*
;**  No Color Conversion.
;**  If it is mono to mono transparent then we want to go filter thru the
;**  first part of the mono_to_color code just to get the
;**  tranparency stuff.  Being here means no color conversion and transparent
;**  means mono source, so if it is transparent than it is mono to mono.
;*/


        test    bh,(BBF_TRANS or BBF_ANTI_TRANS) shr 8
        jnz     cblt_gotta_transparency    ; it's mono_to_mono transparency
        jmp     cblt_no_color_conversion

        ALIGN   4

cblt_color_convert:
        mov     fbMore,0                          ; Assume REP cannot be used
        test    bl,F0_SRC_IS_COLOR
        jnz     cblt_color_to_mono
cblt_gotta_transparency:
        jmp     cblt_mono_to_color

        ALIGN   4


        subttl  Compile - Initial Byte Fetch, Color ==> Mono
        page

cblt_color_to_mono:
;/*
;**       We examined the target image data colors when we discovered
;**       this was color==>mono, and mapped the mix into DDx or DDxn
;**       if the target image colors were the same.  Therefore, once
;**       we converted the source to monochrome, we can either just
;**       store it or invert it and store it.
;**
;**       Generate the code to go from color to mono.  Color to mono
;**       should map all colors that are the passed background to the
;**       image data background color, and all other colors to the image
;**       data foreground color.
;**
;**       If the source is the display, the color compare register will
;**       be used to do the actual comparison.  We might have to invert
;**       the results depending on the image data colors.
;**
;**       If the source is a memory bitmap, each byte of the plane will be
;**       XORed with the color from that plane, with the results all ORed
;**       together giving the desired result.
;**
;**       The generated code for bitmaps should look something like:
;**
;**           mov     al,[si].next_plane            ; Get C1 byte of source
;**           mov     ah,[si].(2*next_plane)        ; Get C2 byte of source
;**           xor     ax,C1BkColor+(C2BkColor*256)  ; XOR with plane's color
;**           or      ah,al                         ; OR the result
;**           mov     al,[si].(3*next_plane)        ; Get C3 byte of source
;**           xor     al,C3BkColor
;**           or      ah,al
;**           lodsb                                 ; Get C0 source
;**           xor     al,C0BkColor                  ; XOR with C0BkColor
;**           or      al,ah                         ; OR with previous result
;**           not     al                            ; NOT to give 1's where background
;** - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
;**  Currently:
;**               DH = phase
;**               CX = ?
;*/


        test    bl,F0_SRC_IS_DEV                 ; If device, use color compare register
        jz      cblt_clrconv_memory_bitmap       ; It's a memory bitmap

;/*
;**       We're in luck, the color compare register can be used.  Set up
;**       for a color read, and use the normal mono fetch code.  Show the
;**       innerloop code that the REP instruction can be used if this is
;**       a source copy.
;*/

        mov     ecx,edx                           ; Save edx
        mov     ah,BYTE PTR ipcBkgnd[0]          ; Get background color
        mov     al,GRAF_COL_COMP                 ; Stuff color into compare register
        mov     dx,EGA_BASE+GRAF_ADDR
        out     dx,ax
        mov     ax,COLOR_DONT_CARE               ; Set Color Don't Care register
        out     dx,ax

;/*
;** Show color read mode to the
;**   EGA restoration code
;*/

        mov     ax,M_COLOR_READ SHL 8 + GRAF_MODE
        mov     shadowed_graf_mode.vvr_value,ah ; Must shadow this for state code
        out     dx,ax

        mov     edx,ecx
        mov     al,I_LODSB                        ;Generate source fetch
        stosb
        or      fbMore,F1_REP_OK                 ;Assume we can use rep
        mov     cFetchCode,1                      ;Only 1 byte so far
        test    ipcImageBack,MONO_BIT            ;Is background color a 1
        jnz     @F                                ;  Yes, we can use rep
        mov     ax,I_NOT_AL                       ; map  background color to 0's
        stosw                                     ; can ColorCompareReg do this for us?
                                                  ; if so then F1_REP_OK can still be set
        mov     cFetchCode,3
        and     fbMore,LOW (NOT F1_REP_OK)
@@:
        jmp     cblt_3160                         ; Go create phase & logic code

        ALIGN   4

cblt_clrconv_memory_bitmap:

;/*
;**       The source is a memory bitmap.           Generate the code to compute
;**       the result of the four planes:
;*/

        mov     ecx,edi                           ; save initial di for fetch size calc.
        mov     ax,I_MOV_AL_ESI_DISP32
        stosw
        mov     eax,devSrc.next_plane
        stosd

        xchg    ebx,eax                           ; xchg is one byte less than mov
        add     ebx,ebx
        mov     ax,I_MOV_AH_ESI_DISP32
        stosw
        mov     eax,ebx
        stosd

;/*
;** This comes out to be xor ax,ImmediateWord
;** Due to the OPERAND_SIZE_PREFIX
;*/

        mov     ax,(I_XOR_EAX_DWORD_I SHL 8) + OPERAND_SIZE_PREFIX
        stosw

        mov     al,BYTE PTR ipcBkgnd[0] ;Get background color
        mov     ah,al
        and     ax,(C2_BIT shl 8) or C1_BIT
        neg     al
        sbb     al,al
        neg     ah
        sbb     ah,ah
        stosw
;/*
;** do 4th plane
;*/

        mov     eax,(I_MOV_AL_ESI_DISP32 shl 16)+I_OR_AH_AL
        stosd
        mov     eax,devSrc.next_plane
        add     eax,ebx
        stosd

        mov     eax,(I_OR_AH_AL shl 16)+I_XOR_AL_BYTE_I
        mov     ah,BYTE PTR ipcBkgnd[0] ;Get background color
        and     ah,C3_BIT
        neg     ah
        sbb     ah,ah
        stosd

        mov     ax,I_LODSB+(I_XOR_AL_BYTE_I*256)
        stosw
        mov     al,BYTE PTR ipcBkgnd[0] ;Get background color
        shr     al,1
        sbb     al,al
        .errnz  C0_BIT-00000001b
        stosb
        mov     ax,I_OR_AL_AH
        stosw

        test    ipcImageBack,MONO_BIT            ;Is background color a 0
        jz      @F                               ;  Yes, don't need to invert it
        mov     ax,I_NOT_AL                      ;Map background color to 1's
        stosw
@@:
        sub     ecx,edi                          ;Compute size of fetch code.
        neg     ecx
        mov     cFetchCode,ecx
        jmp     cblt_3160                        ;Go create phase/logic code

        ALIGN   4



        subttl  Compile - Initial Byte Fetch, Mono ==> Color
        page

;/*
;**       The conversion is mono to color.  Generate the code to
;**       do the conversion, and generate the table which will
;**       have the conversion values in it.
;**
;**       When going from mono to color, 0 bits are considered to be
;**       the background color, and 1 bits are considered to be the
;**       foreground color.
;**
;**       For each plane:
;**
;**         If the foreground=background=1, then 1 can be used in
;**         place of the source.
;**
;**         If the foreground=background=0, then 0 can be used in
;**         place of the source.
;**
;**         If the foreground=1 and background=0, then the source
;**         can be used as is.
;**
;**         If the foreground=0 and background=1, then the source
;**         must be complemented before using.
;**
;**         Looks like a boolean function to me.
;**
;**       An AND mask and an XOR mask will be computed for each plane,
;**       based on the above.  The source will then be processed against
;**       the table.  The generated code should look like
;**
;**               lodsb
;**               and     al,ss:[xxxx]
;**               xor     al,ss:[xxxx+1]
;**
;**
;**       The table for munging the colors as stated above should look like:
;**
;**            BackGnd   ForeGnd                     Result    AND  XOR
;**               1         1                          1      00   FF
;**               0         0                          0      00   00
;**               1         0                      not S      FF   FF
;**               0         1                          S      FF   00
;**
;**       From this, it can be seen that the XOR mask is the same as the
;**       background color.  The AND mask is the XOR of the foreground
;**       and the background color.  Not too hard to compute
;**
;**
;**       It can also be seen that if the background color is black and the
;**       foreground (text) color is white, then the conversion needn't be
;**       generated (it just gives the source).  This is advantageous since
;**       it will allow phased aligned source copies to use REP MOVSW.
;**
;**
;**       Currently:      ds:si --> bkColor
;*/

cblt_mono_to_color:

;/*
;** Generate the code for munging the color as stated above.
;*/

        mov     al,I_LODSB
        stosb

;/*
;** To make transparent rops needed for ImageData we need to do the phase
;** alignment before the color conversion.  This only applies to mono memory
;** bitmaps being blted to the screen, so we will only do it here -- it is too
;** much trouble to do the phase alignment before color converion in general.
;*/


        push    esi
        call    phase_align_generate
        pop     esi

;/*
;** if transparency then the compiled code must save the source     ;
;** (now that it has been phase aligned) to use as a mask.  It will save ;
;** it in DH (where the pattern would be if there was one -- transparency ;
;** is being added for ImageData which does not use a pattern).  ;
;*/                                                      ;

        test    bh,(BBF_TRANS or BBF_ANTI_TRANS) shr 8
        jz      cblt_no_transparency_needed
        mov     ax,I_MOV_DH_AL                    ; op code for  mov al,dl
        stosw
        test    bh,BBF_ANTI_TRANS shr 8
        jz      cblt_its_not_anti_transparency
        mov     ax,I_NOT_DH                       ; op code for  not dl
        stosw
cblt_its_not_anti_transparency:
        test    bh,BBF_TRANS_NO_CC shr 8
        jnz     already_phase_aligned_load_ds     ; don't color convert
cblt_no_transparency_needed:

;/*
;** Normally being here means the dest is color.   However, we also come
;** thru here just to get the transparency stuff above when doing mono to
;** mono.  It is now time to bounce outta here if that's what we're here for
;** since we don't actually want any mono_to_color conversion code.
;*/


        test    bl,F0_DEST_IS_COLOR
        jz      already_phase_aligned_load_ds    ; it's a mono dest

;/*
;** Now we'll check to see if the background color is black, and the
;** foreground color is white.  This can be determined by looking at
;** the accelerator flags in the physical color.
;*/

        mov     ah,BYTE PTR ipcImageBack[0]      ; get background color
        xor     ah,MONO_BIT                      ; Map black to white
        and     ah,BYTE PTR ipcImageFore[0]      ; AND in foreground color
        cmp     ah,MONO_BIT+ONES_OR_ZEROS
        jne     cblt_not_black

;/*
;** A normal fetch without any color conversion will be used because with
;** black and white as our two colors the color conversion code would just map
;** ones to ones and zeros to zeros.  Show reps as ok, no color munge table.
;*/

        mov     fbMore,F1_REP_OK + F1_NO_MUNGE
        jmp     already_phase_aligned_load_ds
        ALIGN   4

cblt_not_black:

;/*
;** No way around it.  The color conversion table and code must be generated.
;**
;** Generate the XOR and the AND masks from the foreground and background colors.
;**  The XOR mask is the same as the background color.
;**  The AND mask is the XOR of the foreground and the background color.
;*/

        mov     cx,ipcImageBoth                  ;CH = background, CL = foreground
        xor     cl,ch
        shr     cl,1
        sbb     al,al
        shr     ch,1
        sbb     ah,ah
        mov     word ptr ausMono2Color[0],ax
        shr     cl,1
        sbb     al,al
        shr     ch,1
        sbb     ah,ah
        mov     word ptr ausMono2Color[2],ax
        shr     cl,1
        sbb     al,al
        shr     ch,1
        sbb     ah,ah
        mov     word ptr ausMono2Color[4],ax
        shr     cl,1
        sbb     al,al
        shr     ch,1
        sbb     ah,ah
        mov     word ptr ausMono2Color[6],ax

        mov     ax,I_AND_AL_MEM                  ;and al,[xxxx]
        stosw
        lea     eax,ausMono2Color                ;  Set address of color munge
        stosd
        mov     ebx,eax                          ;  Save address
        mov     ax,I_XOR_AL_MEM                  ;xor al,[xxxx]
        stosw
        lea     eax,[ebx][1]                     ;  Set address of XOR mask
        stosd
already_phase_aligned_load_ds:
        jmp     already_phase_aligned

        ALIGN   4


cblt_3160:
        jmp     cblt_phase_align

        ALIGN   4


cblt_no_color_conversion:

;/*
;** Just need to generate the normal fetch sequence (lodsb)
;*/

        mov     al,I_LODSB                       ;Generate source fetch
        stosb

cblt_phase_align:
        call    phase_align_generate
already_phase_aligned:
        test    fbFetch,FF_TWO_INIT_FETCHES      ; Generate another fetch?
        jz      cblt_logical_action              ; No

;/*
;** A second fetch needs to be stuffed.  Copy the one just created.
;*/

        mov     esi,edi                           ;Get start of fetch logic
        xchg    esi,npFetchStart                 ;Set new start, get old
        mov     ecx,edi                           ;Compute how long fetch is
        sub     ecx,esi                           ;  and move the bytes


        mov     eax,ecx
        shr     ecx,2
        rep     movsd
        mov     ecx,eax
        and     ecx,3
        rep     movsb

        subttl  Compile - ROP Generation
        page

;/*
;**       Create the logic action code
;**
;**       The given ROP will be converted into the actual code that
;**       performs the ROP.
;*/


SRC_IN_AL       equ     00000001b                ; Source field is in AL  (0)
DEST_IN_AH      equ     00000010b                ; Destination field is in AH (1)
PUSH_POP_FLAG   equ     00000100b                ; Next push/pop is a pop (1)


;/*
;**       Copy the ROP template into the BLT
;*/

cblt_logical_action:

;/*
;** The special gray rop will erroneously lead to Color Pat Fetch.
;** It should be mono fetch of the transparency mask.  
;*/
                                                    
        test    fsBlt,BBF_GRAY_ROP                  
        jz      cblt_not_special_gray_rop        
        mov     esi,OFFSET gray_rop_template     

if      LENGTH_GRAY_ROP_TEMPLATE GE 8
        mov     ecx,LENGTH_GRAY_ROP_TEMPLATE/4
        rep     movsd
else
if      LENGTH_GRAY_ROP_TEMPLATE AND 4
        movsd
endif
endif
if      LENGTH_GRAY_ROP_TEMPLATE AND 2
        movsw
endif
if      LENGTH_GRAY_ROP_TEMPLATE AND 1
        movsb
endif
        jmp     cblt_srccopy                        ; 

        ALIGN   4

cblt_not_special_gray_rop:                            ; 

        mov     eax,usMixData                     ; Get back rop data
        mov     bl,ah                             ; Get count of number of bits to move
        and     ebx,HIGH ROPLength
        shr     ebx,1
        shr     ebx,1
        mov     ecx,0
        mov     cl,byte ptr ropcode[ebx] ; Get length into ecx
        .errnz   ROPLength - 0001110000000000b

        mov     ebx,eax                           ; Get offset of the template
        and     ebx,ROPOffset
        jz      cblt_srccopy                      ; Source copy
        lea     esi,ropcode[ebx]                        ; --> the template
cblt_copy_template:
        rep     movsb                             ; Move the template
cblt_srccopy:

        mov     ebx,eax                           ; Keep rop around. !!!never used (I think)
        or      ah,ah                             ; Generate a negate?
        jns     cblt_no_NOT                       ;  No
        mov     ax,I_NOT_AL
        stosw
cblt_no_NOT:

;/*
;** Add transparency mask if needed:               
;*/

        test    fsBlt,BBF_TRANS or BBF_ANTI_TRANS  
        jz      cblt_transparency_not_needed       
        mov     esi,OFFSET transparency_template   

if      LENGTH_TRANSPARENCY_TEMPLATE GE 8
        mov     ecx,LENGTH_TRANSPARENCY_TEMPLATE/4
        rep     movsd
else
if      LENGTH_TRANSPARENCY_TEMPLATE AND 4
        movsd
endif
endif
if      LENGTH_TRANSPARENCY_TEMPLATE AND 2
        movsw
endif
if      LENGTH_TRANSPARENCY_TEMPLATE AND 1
        movsb
endif

cblt_transparency_not_needed:                   

        mov     npFetchEnd,edi                   ; Save end of fetch/logic operation



        subttl  Compile - Mask And Save
        page

;/*
;**    Generate code to mask and save the result.  If the destination
;**    isn't in a register, it will be loaded from ES:[DI] first.  The
;**    mask operation will then be performed, and the result stored.
;*/

        mov     ax,I_MOV_AH_DEST
        stosw

cblt_4280:

        mov     esi,OFFSET masked_store ;Move rest of masked store template
        movsd
        movsd
        .errnz  MASKED_STORE_LEN - 8             ; Must be six bytes long

        mov     ax,mStart                        ; Stuff start mask into
        xchg    ah,al                            ;   the template
        mov     MASKED_STORE_MASK[edi],ax
        mov     npFetchEnds,edi                  ; Save end of fetch/logic/store operation


        subttl  Compile - Inner Loop Generation
        page

;/*
;**   Now for the hard stuff; The inner loop (said with a "gasp!").
;**
;**   If there is no innerloop, then no code will be generated
;**   (now that's fast!).
;*/

cblt_5000:
        mov     edx,cInnerByte                   ; Get the loop count
        or      edx,edx                          ; If the count is null
        jz      cblt_inner_loop_done             ;  don't generate any code.


;/*
;**       We have something for a loop count.  If this just happens to be
;**       a source copy (S) with a phase of zero, then the innerloop degenerates
;**       to a repeated MOVSB instruction.  This little special case is
;**       worth checking for and handling!
;**
;**       Also, if this is one of the special cases {P, Pn, DDx, DDxn}, then it
;**       will also be special cased since these are all pattern fills (pattern,
;**       not pattern, 0, 1).
;**
;**       The same code can be shared for these routines, with the exception
;**       that patterns use a STOSx instruction instead of a MOVSx instruction
;**       and need a value loaded in AX
;**
;**
;**       So we lied a little.  If a color conversion is going on, then the
;**       REP MOVSB might not be usable.           If the F1_REP_OK flag has been set, then
;**       we can use it.  The F1_REP_OK flag will be set for a mono ==> color
;**       conversion where the background color is white and the foreground
;**       color is black, or for a color ==> mono conversion with the screen
;**       as the source (the color compare register will be used).
;**
;**       For the special cases {P, Pn, DDx, DDxn}, color conversion is
;**       not possible, so ignore it for them.
;*/
 
        mov     al,byte ptr usMix[0]             ; Get the raster op
        cmp     al,BB_ROP_S
        je      cblt_rop_s
        mov     bx,I_MOV_AL_0FFH                 ; Assume this is a 0 or 1 fill
        cmp     al,BB_ROP_DDxn
        je      cblt_rop_p
        cmp     al,BB_ROP_DDx
        je      cblt_rop_p
        mov     bx,I_MOV_AL_DH                    ; Assume P or Pn fill
        cmp     al,BB_ROP_P
        je      cblt_rop_p
        cmp     al,BB_ROP_Pn
        jne     cblt_cannot_rep                  ; Cannot special case it
cblt_rop_p:

;/*
;**       We should have one of the following fill operations:
;**
;**               P       - Pattern
;**               Pn      - NOT pattern
;**               DDx     - 0 fill
;**               DDxn    - 1 fill
;*/

        xchg    ax,bx
        stosw
        mov     ax,I_MOV_AH_AL                   ; The fill in AX = AL:AL
        stosw
        mov     si,I_STOSB                       ; Set up for repeated code processor
        cmp     bl,BB_ROP_P                      ; If Pn or 0, then complement pattern
        je      cblt_check_length
        cmp     bl,BB_ROP_DDxn
        je      cblt_check_length
        mov     ax,I_NOT_EAX                     ; Is Pn or 0, complement EAX
        stosw

;/*
;**   Now, we have the fill in AX; we still need to duplicate it to
;**   make EAX = AX:AX if necessary.
;*/

cblt_check_length:
        cmp     edx,4                            ; If we going to use STOSD
        jb      cblt_5080                        ; we need the fill in EAX

                                                 ; The fill in AX = AL:AL
        mov     ax, I_ROR_EAX_N
        stosw                           
        mov     al,8
        stosb                                    ; ROR EAX,8 => EAX = AL:X:X:AL

        mov     ax,I_MOV_AH_AL
        stosw                                    ; MOV AH,AL => EAX = AL:X:AL:AL

        mov     ax, I_ROR_EAX_N
        stosw                           
        mov     al,8
        stosb                                    ; ROR EAX,8 => EAX = AL:AL:X:AL

        mov     ax,I_MOV_AH_AL
        stosw                                    ; MOV AH,AL => EAX = AL:AL:AL:AL


        jmp     cblt_5080

        ALIGN   4

cblt_rop_s:

;/*
;**       This is a source copy. The phase must be zero for a source copy
;**       to be condensed into a REP MOVSx.
;*/

        cmp     iHorzPhase,0                     ; Is horizontal phase zero?
        jne     cblt_cannot_rep                  ;  No, can't condense source copy
        mov     si,I_MOVSB                       ; Set register for moving bytes

;/*
;**       For a color conversion, F1_REP_OK must be set.
;*/

        test    fsBlt,BBF_TRANS or BBF_ANTI_TRANS
        jnz     cblt_cannot_rep
        test    fbF0,F0_GAG_CHOKE                ; Color conversion?
        jz      cblt_can_use_rep                 ;  No, rep is OK to use
        test    fbMore,F1_REP_OK                 ;  Yes, can we rep it?
        jz      cblt_cannot_rep                  ;    No, do it the hard way

cblt_can_use_rep:

;/*
;**       This is a source copy or pattern fill.   If there aren't many
;**       bytes to process then a REP MOVSD or REP STOSD is used.  MOVSB's
;**       or STOSB's are then used to catch any bytes not processed when
;**       the number of bytes is not a multiple of 4.  If the REP prefix
;**       isn't needed it is left out.
;**
;**       If many bytes are being processed then MOVSB's are used to
;**       align SI on a DWORD boundary before the use of the REP MOVSD.
;**       This should save clock cycles.
;**
;**       Don't get caught on this like I did!  If the direction of the
;**       BLT is from right to left (decrementing addresses), then both
;**       the source and destination pointers must be decremented by three
;**       so that the next four bytes are processed, not the next byte and
;**       the 3 bytes just processed.  Also, after all dwords have been processed,
;**       the source and destination pointers must be incremented by three to
;**       point to the last byte (since the last MOVSD or STOSD would have
;**       decremented both pointers by 4).
;**
;**       The generated code should look something like:
;**
;**       WARP8:                                    ;This code for moving left to right
;**               movsb                             ;Dword alignment
;**               ld      cx,cInnerByte/4 ;Set word count
;**               rep                               ;If a count, then repeat is needed
;**               movsd                             ;Move words until done
;**               movsb                             ;Process odd bytes
;**
;**
;**       WARP8:                                    ;This code for moving right to left
;**               movsb                             ;Dword alignment
;**               dec     si                        ;adjust pointer for moving words
;**               dec     di
;**               dec     si                        ;adjust pointer for moving words
;**               dec     di
;**               dec     si                        ;adjust pointer for moving words
;**               dec     di
;**               ld      cx,cInnerByte/4 ;Set word count
;**               rep                               ;If a count, then repeat is needed
;**               movsd                             ;Move words until done
;**               inc     si                        ;adjust since words were moved
;**               inc     di
;**               inc     si                        ;adjust since words were moved
;**               inc     di
;**               inc     si                        ;adjust since words were moved
;**               inc     di
;**               movsb                             ;Process odd byte
;**
;**
;**       Of course, if any part of the above routine isn't needed, it isn't
;**       generated (i.e. the generated code might just be a single MOVSB)
;*/
 
;/*
;**   The following piece of code is used to speed up the source blt for the
;**   width of more than 4 bytes by using MOVSD. However, if we want to
;**   fully utilize its speed, we have to start fetching from the DWORD
;**   boundary.
;*/

        cmp     edx,TOO_SMALL_TO_ALIGN
        jb      cblt_5080
        mov     ax,si                            ;We want to start MOVSD at a DWORD
        mov     ah,al                            ;boundary, so we may do some prefetch
                                                 
cblt_dword_alignment:                            
        mov     ecx,devSrc.lp_bits               ;Get the source's starting point
        mov     bl,iStepDir                      ;See which direction we are going
        .errnz  STEPLEFT                         ;Get the right increment in BX, i.e.
        .errnz  STEPRIGHT-1                      ;   BX = +1 for left-to-right
        shl     bl,1                             ;   BX = -1 for right-to-left
        dec     bl                               
        add     cl,bl                            ;The first byte is alredy fetched
        test    fbFetch,FF_TWO_INIT_FETCHES
        jz      @F
        add     cl,bl                            ;Yes, update to the current point
@@:                                             
        or      bl,bl                            ;Get the offset from the next DWORD
        js      @F                               ;boundary!!
        not     cl                               ;For StepRight, the offset equals
@@:                                              ;For StepLeft, the offset equals
        inc     cl                               ;(current position + 1) mod 4
        shr     cl,1                            
        jnc     @F                               ;If there is an odd byte,
        stosb                                    ;fetch it first
        dec     edx                             
@@:                                             
        shr     cl,1                            
        jnc     cblt_5080                        ;If there is an odd word,
        stosw                                    ;fetch it once more
        dec     edx                             
        dec     edx                             
                                                
cblt_5080:                                      
        mov     ax,si                            ;ax = si is I_STOSB
        mov     ecx,edx                          ;Preserve it for future use
        shr     ecx,2                            ;Check the number of DWORD to blt

cblt_5090:                                       ; This is being used as a double jmp pt for jz's
        jz      cblt_5130                        ;No more bytes to move
        mov     ebx,0                            ;Flag as stepping from left to right
        cmp     bl,iStepDir                      ;Moving from the right to the left?
        .errnz  STEPLEFT                         ;  (left direction must be zero)
        jnz     cblt_5100                        ;  No
        mov     eax,I_DEC_ESI_DEC_EDI + (I_DEC_ESI_DEC_EDI shl 16)
        stosw                                    ;  Yes, decrement both pointers
        stosd
        mov     ebx,I_INC_ESI_INC_EDI  + (I_INC_ESI_INC_EDI shl 16)
                                                 ;Set up to increment the pointers later

cblt_5100:
        cmp     ecx,1                            ;Move one word or many words?
        jz      cblt_5120                        ;  Only one word

        mov     al,I_MOV_ECX_DWORD_I             ;  Many words, load count
        stosb
        mov     eax,ecx                 
        stosd
        mov     al,I_REP                         ; REP
        stosb


cblt_5120:
        mov     eax,esi                          ;esi is either stosb or movsb
        inc     eax                              ;add 1 and eax is either stosd or movsd
        stosb                                    ; 
        or      ebx,ebx                          ; Need to increment the pointers?
        jz      cblt_5130                        ;   No
        mov     eax,ebx                          ;   Yes, increment both pointers
        stosd
        stosw

cblt_5130:
        mov     ax,si                            ;REMEMBER si = I_MOVSB
        shr     edx,1                            ;Byte count / 2 for words
        jnc     @F                               ;  No odd byte to move
        stosb

@@:                                              ; This is being used as a double jmp pt for jz's
        shr     edx,1                            ;move another two odd bytes if needed
        jnc     cblt_5140
        mov     ah,al                            ;We use two 8-bit transfer because we
        stosw                                    ;don't want to adjust SI and DI

cblt_no_inner_loop_code:
cblt_5140:
        jmp     cblt_inner_loop_done             ; Done setting up the innerloop
        ALIGN   4

        page

;/*
;**       There is some count for the innerloop of the BLT.  Generate the
;**       required BLT. Two or four copies of the BLT will be placed on the
;**       stack.   This allows the LOOP instruction at the end to be distributed
;**       over two or four bytes instead of 1, saving 11 or 12 clocks for each
;**       byte (for 4).  Multiply 12 clocks by ~ 16K and you save a lot of
;**       clocks!
;**
;**       If there are less than four (two) bytes to be BLTed, then no looping
;**       instructions will be generated.  If there are more than four (two)
;**       bytes, then there is the possibility of an initial jump instruction
;**       to enter the loop to handle the modulo n result of the loop count.
;**
;**       The innerloop code will look something like:
;**
;**
;**       <       mov     cx,loopcount/n> ;load count if >n innerloop bytes
;**       <       jmp     short ???     > ;If a first jump is needed, do one
;**
;**       BLTloop:
;**               replicate initial byte BLT code up to n times
;**
;**       <       loop    BLTloop >               ;Loop until all bytes processed
;*/
 
cblt_cannot_rep:
        mov     ebx,npFetchEnd                    ;Compute size of the fetch code
        sub     ebx,npFetchStart
        inc     ebx                               ;A stosb will be appended
        mov     esi,4                             ;Assume replication 4 times
        mov     cl,2                              ;  (shift count two bits left)
        cmp     ebx,32                            ;Small enough for 4 times?
        ;SEL 1-27-92
        jb      cblt_5520                         ;  Yes, replicate 4 times
        shr     esi,1                             ;  No, replicate 2 times
        dec     cl

cblt_5520:
        cmp     edx,esi                           ;Generate a loop?
        jle     cblt_5540                         ;  No, just copy code
        mov     al,I_MOV_ECX_DWORD_I
        stosb                                     ;mov cx,loopcount/n
        mov     eax,edx                           ;Compute loop count
        shr     eax,cl
        stosd
        shl     eax,cl                            ;See if loopcount MOD n is 0
        sub     eax,edx
        jz      cblt_5540                         ;Zero, no odd count to handle


;/*
;**       There is an odd portion of bytes to be processed.  Increment
;**       the loop counter for the odd pass through the loop and then
;**       compute the displacement for entering the loop.
;**
;**       To compute the displacement, subtract the number of odd bytes
;**       from the modulus being used  (i.e. 4-3=1).  This gives the
;**       number of bytes to skip over the first time through the loop.
;**
;**       Multiply this by the number of bytes for a logic sequence,
;**       and the result will be the displacement for the jump.
;*/


        inc     dword ptr [edi-4]                ;Not zero, adjust for partial loop
        add     eax,esi                          ;Compute where to enter the loop at
        mul     bl
        movzx   ecx,ax
        mov     al,I_JMP_NEAR                    ;Stuff jump instruction
        stosb
        mov     eax,ecx                          ;Stuff displacement for jump
        stosd

;/*
;**       Currently:      EDX = loop count
;**                       ESI = loop modulus
;**                       EBX = size of one logic operation
;**                       EDI --> next location in the loop
;*/

cblt_5540:
        mov     ecx,ebx                           ;Set move count
        mov     ebx,edx                           ;Set maximum for move
        cmp     ebx,esi                           ;Is the max > what's left?
        jle     cblt_5560                         ;  No, just use what's left
        mov     ebx,esi                           ;  Yes, copy the max

cblt_5560:

        sub     edx,esi                           ;If dx > 0, then loop logic needed
        mov     esi,npFetchStart                  ;--> fetch code to copy
        mov     eax,ecx                           ;Save a copy of fetch length
        shr     ecx,2
        rep     movsd                             ;Move fetch code and stuff stosb
        mov     ecx,eax
        and     ecx,3
        rep     movsb

        mov     esi,edi                           ;--> new source (and top of loop)
        sub     esi,eax
        mov     byte ptr [edi-1],I_STOSB
        dec     bl                                ;One copy has been made
        mul     bl                                ;Compute # bytes left to move
        mov     ecx,eax                           ;Set move count

;*             63358 START

;        shr     ecx,2
;        rep     movsd                             ;Move fetch code and stuff stosb
;        mov     ecx,eax
;        and     ecx,3

;*             63358 END

        rep     movsb
        sub     esi,eax                           ;Restore pointer to start of loop

;/*
;**       The innermost BLT code has been created and needs the looping
;**       logic added to it.  If there is any looping to be done, then
;**       generate the loop code.  The code within the innerloop may be
;**       greater than 126 bytes, so a LOOP instruction may not be used
;**       in this case.
;*/

cblt_5580:
        or      edx,edx                          ; Need a loop?
        jle     cblt_inner_loop_done             ;   No, don't generate one

        mov     eax,esi                          ; Compute offset of loop
        sub     eax,edi
        cmp     eax,-125                         ; Can this be a short label?
        ;SEL 1-27-92
        jb      cblt_5600                        ;   No, must make it a near jmp

        sub     al,2                             ; Bias offset by length of LOOP inst.
        mov     ah,al
        mov     al,I_LOOP
        stosw                                    ; Set the loop instruction
        jmp     cblt_inner_loop_done ; Go process the last byte code
        ALIGN   4



cblt_5600:

        mov     esi,OFFSET jmp_cx_nz ;Move in the dec CX jnz code
        movsd
        .errnz  JMP_CX_NZ_LEN-4                  ;Must be four bytes long
        sub     eax,8                            ;Adjust jump bias
        stosw                                    ;  and store it into jump



        subttl  Compile - Last Byte Processing
        page

cblt_inner_loop_done:

;/*
;**       All the innerloop stuff has been processed.  Now generate the code for
;**       the final byte if there is one.  This code is almost identical to the
;**       code for the first byte except there will only be one fetch (if a
;**       fetch is needed at all).
;**
;**       The code generated will look something like:
;**
;**       <       fetch                        > ; Get source byte
;**       <       align                        > ; Align source if needed
;**               action                         ; Perform desired action
;**               mask and store
;*/

        mov     dx,mLast                         ; Get last byte mask
        or      dh,dh                            ; Is there a last byte to be processed?
        jz      cblt_no_last_byte                ;   No.

        mov     ecx,npFetchEnds                  ; Get end of fetch/logic/store operation
        mov     esi,npFetchStart                 ; Get start of fetch/logic sequence
        sub     ecx,esi                          ; Compute length of the code
        test    fbFetch,FF_NO_LAST_FETCH
        jz      cblt_include_fetch
        test    fbF0,F0_SRC_PRESENT              ; was there a fetch?
        jz      cblt_was_no_fetch
        cmp     iHorzPhase,0                     ; Phase zero case is not combined
                                                 ; into innerloop as it should be.
                                                 ; If the final byte is full then we
                                                 ; better not remove the lodsb ( i.e.
        je      cblt_include_fetch               ; 0 - 0 = 0 would make us think we could)

        mov     eax,cFetchCode                   ; don't copy the fetch (lodsb)
        add     esi,eax
        sub     ecx,eax

cblt_was_no_fetch:
cblt_include_fetch:


        mov     eax,ecx
        shr     ecx,2
        rep     movsd                            ; Copy the fetch/action/store code
        mov     ecx,eax
        and     ecx,3
        rep     movsb

        xchg    dh,dl
        mov     MASKED_STORE_MASK[edi],dx        ; Stuff last byte mask into the code


        subttl  Compile - Looping Logic
        page

;/*
;**       Looping logic.
;**
;**       The looping logic must handle monochrome bitmaps, color bitmaps,
;**       huge bitmaps, the device, the presence or absence of a source
;**       or pattern, and mono <==> color interactions.
;**
;**       The type of looping logic is always based on the destination.
;**
;**
;**       Plane Update Facts:
;**
;**       1)  If the destination device is color, then there will be
;**           logic for plane selection.           Plane selection is performed
;**           at the start of the loop for the display.  Plane selection
;**           for bitmaps is performed at the end of the loop in anticipation
;**           of the next plane.
;**
;**
;**           The following applies when the destination is color:
;**
;**
;**           a)  The destination update consists of:
;**
;**               1)  If the destination is the display, the next plane will
;**                   be selected by the plane selection code at the start
;**                   of the scan line loop.
;**
;**               2)  If not the display, then the PDevice must a bitmap.
;**                   The next plane will be selected by updating the
;**                   destination offset by the next_plane value.
;**
;**
;**           b)  If F0_GAG_CHOKE isn't specified, then there may be a source.
;**               If there is a source, it must be color, and the update
;**               consists of:
;**
;**               1)  If the source is the display, the next plane will be
;**                   selected by the plane selection code at the start of
;**                   the loop.
;**
;**               2)  If not the display, then the PDevice must a bitmap.
;**                   The next plane will be selected by updating the
;**                   --source--- offset by the next_plane value.
;**
;**
;**           c)  If F0_GAG_CHOKE is specified, then the source must be a
;**               monochrome bitmap which is undergoing mono to color
;**               conversion.  The AND & XOR mask table which is used
;**               for the conversion will have to be updated, unless
;**               the F1_NO_MUNGE flag is set indicating that the color
;**               conversion really wasn't needed.
;**
;**               The source's pointer will not be updated.  It will
;**               remain pointing to the same scan of the source until
;**               all planes of the destination have been processed.
;**
;**
;**           d)  In all cases, the plane mask rotation code will be
;**               generated.  If the plane indicator doesn't overflow,
;**               then start at the top of the scan line loop for the
;**               next plane.
;**
;**               If the plane indicator overflows, then:
;**
;**                   1)  If there is a pattern present, it's a color
;**                       pattern fetch.           The index of which scan of
;**                       the brush to use will have to be updated.
;**
;**                   2)  Enter the scan line update routine
;**
;**
;**       2)      If the destination is monochrome, then there will be no
;**               plane selection logic.
;**
;**               If F0_GAG_CHOKE is specified, then color ==> mono conversion
;**               is taking place.  Any plane selection logic is internal
;**               to the ROP byte fetch code.  Any color brush was pre-
;**               processed into a monochrome brush, so no brush updating
;**               need be done
;*/



        subttl  Looping Logic - Plane Selection
        page

;/*
;**       Get saved parameters off of the stack.
;**
;**       <       pop     ebx                  > ;Get plane indicator
;**       <       pop     esi                  > ;Get source pointer
;**               pop     edi                    ;Get destination pointer
;**               pop     ecx                    ;Get loop count
;*/


cblt_no_last_byte:
        mov     bh,fbF0                          ;These flags will be used a lot
        test    bh,F0_DEST_IS_COLOR              ;Is the destination color?
        jz      cblt_6120                        ;  No
        mov     al,I_POP_EBX                     ;Restore plane index
        stosb

cblt_6120:
        test    bh,F0_SRC_PRESENT                ;Is a source needed?
        jz      cblt_6140                        ;  No
        mov     al,I_POP_ESI                     ;  Yes, get source pointer
        stosb

cblt_6140:
        mov     ax,I_POP_EDI_POP_ECX             ;Get destination pointer
        stosw                                    ;Get loop count
        test    bh,F0_DEST_IS_COLOR              ;Color scanline update?
        jnz     cblt_6160                        ;  Yes
        jmp     cblt_6300                        ;  No, just do the mono scanline update
        ALIGN   4


;/*
;**       The scanline update is for color.  Generate the logic to update
;**       a brush, perform plane selection, process mono ==> color conversion,
;**       and test for plane overflow.
;*/


cblt_6160:
        or      bh,bh                             ;Color conversion?
        jns     cblt_6180                         ;  No
        errnz   F0_GAG_CHOKE-10000000b



;/*
;**       The source is monochrome.  Handle mono ==> color conversion.
;**       The AND & XOR mask table will need to be rotated for the next
;**       pass over the source.
;**
;**       The source scanline pointer will not be updated until all planes
;**       have been processed for the current scan.
;**
;**       If F1_NO_MUNGE has been specified, then the color conversion table
;**       and the color conversion code was not generated, and no update
;**       code will be needed.
;**
;*/


        test    fbMore,F1_NO_MUNGE ;Is there really a conversion table?
        jnz     cblt_6200                         ;  No, so skip the code

        mov     al,I_MOV_EBP_DWORD_I             ;lea bp,cl_a_brush
        stosb
        lea     eax,ausMono2Color                ;Get address of table
        stosd
        lea     esi,rot_and_xor                   ;--> rotate code

if      LEN_ROT_AND_XOR GE 8
        mov     ecx,LEN_ROT_AND_XOR/4
        rep     movsd
else
if      LEN_ROT_AND_XOR AND 4
        movsd
endif
endif
if      LEN_ROT_AND_XOR AND 2
        movsw
endif
if      LEN_ROT_AND_XOR AND 1
        movsb
endif

        jmp     cblt_6200
        ALIGN   4




;/*
;**       If there is a source, it must be color.  If it is a memory
;**       bitmap, then the next plane must be selected, else it is
;**       the display and the next plane will be selected through
;**       the hardware registers.
;**
;**       <       add     si,next_plane>
;*/


cblt_6180:
        test    bh,F0_SRC_PRESENT                ;Is there really a source?
        jz      cblt_6200                         ;No source.
        test    bh,F0_SRC_IS_DEV                 ;Is the source the display?
        jnz     cblt_6200                         ;  Yes, use hardware plane selection
        mov     ax,I_ADD_ESI_DWORD_I             ;  No, generate plane update
        stosw                                     ;Add esi,next_plane
        mov     eax,devSrc.next_plane
        stosd




;/*
;**       If the destination isn't the device, then it must be a color
;**       memory bitamp, and it's pointer will have to be updated by
;**       bmWidthPlanes.  If it is the display, then the next plane
;**       will be selected through the hardware registers.
;**
;**       <       add     di,next_plane>
;*/

cblt_6200:
        test    bh,F0_DEST_IS_DEV                ;Is the destination the display
        jnz     cblt_6220                        ;  Yes, don't generate update code
        mov     ax,I_ADD_EDI_DWORD_I             ;  No, update bitmap to the next plane
        stosw
        mov     eax,devDst.next_plane
        stosd





;/*
;**       The source and destination pointers have been updated.
;**       Now generate the plane looping logic.
;**
;**       <       shl     bl,1                           > ;Select next plane
;**       <       jc      $+5                            > ;  No, reset to first
;**       <       jmp     StartOfLoop   > ;  Yes, go process next
;**       <       mov     bl,PLANE_1     > ;Reset plane indicator
;**
;**       or
;**
;**       <       shl     bl,1                           > ;Select next plane
;**       <       jnc     StartOfLoop   > ;  Yes, go process next
;**       <       mov     bl,PLANE_1     > ;Reset plane indicator
;*/

cblt_6220:
        mov     ax,I_SHL_BL_1                     ;Stuff plane looping logic
        stosw

        mov     edx,pfnBlt                        ;Compute relative offset of
        sub     edx,edi                           ;  start of loop
        add     edx,2                             ;account for init of plane indicator
        cmp     edx,-125                          ;Can this be a short label?
        ;SEL 1-27-92
        jb      cblt_6240                         ;  No, must make it a near jmp
        sub     dl,2                              ;Bias offset by length of jz inst.
        mov     ah,dl
        mov     al,I_JNC_127
        stosw                                     ;jnc StartOfLoop
        jmp     cblt_6260

        ALIGN   4

cblt_6240:
        mov     ax,I_JNC_NEAR32
        stosw

        sub     edx,6                   ;adjust jump bias
        mov     eax,edx                 
        stosd                           ;store jump displacement

cblt_6260:
        mov     ax,(PLANE_SELECT_FLAG shl 8)+I_MOV_BL_BYTE_I
        stosw



        subttl  Looping Logic - Color Brush Update
        page

;/*
;** The plane update logic has been copied.  If a pattern was
;** involved for a color BLT, then the pattern index will need
;** to be updated to the next scanline for multiple plane mode.
;**
;** This will involve subtracting off NUMBER_PLANES*SIZE_PATTERN (MonoPlane),
;** and adding in the increment.  The result must be masked with
;** 00000111b to select the correct source.  Note that the update
;** can be done with an add instruction and a mask operation.
;**
;**       inc   index+MonoPlane                    inc-MonoPlane result  AND 07h
;**
;**        1       0+24 = 24                         1-24 = -23    1      1
;**        1       7+24 = 31                         1-24 = -23    8      0
;**       -1       0+24 = 24                        -1-24 = -25   FF      7
;**       -1       7+24 = 31                        -1-24 = -25    6      6
;**
;**       <       mov     al,ss:[1234]  > ;Get brush index
;**       <       add     al,n                           > ;Add displacement to next byte
;**       <       and     al,00000111b  > ;Keep it in range
;**       <       mov     ss:[1234],al  > ;Store displacement to next byte
;**
;**
;** For four plane mode, the AND 00011111b automatically wraps the
;** pattern at the correct location, so no code is generated.
;*/
 
        test    bh,F0_PAT_PRESENT                ;Is a pattern involved?
        jz      cblt_6300                        ;  No
        mov     al,I_MOV_AL_MEM
        stosb                                    ;mov al,[xxxx]
        mov     edx,npbPatRow
        mov     eax,edx
        stosd
        mov     al,I_ADD_AL_BYTE_I
        mov     ah,iDir                          ;add al,bias
        sub     ah,BITS_PEL * SIZE_PATTERN
        .errnz  INCREASE-1                       ;Must be a 1
        .errnz  DECREASE+1                       ;Must be a -1
        stosw

        mov     al,I_AND_AL_BYTE_I
        mov     ah,00000111b
        stosw

        mov     al,I_MOV_MEM_AL
        stosb                                    ;and al,00000111b
                                                 ;mov [xxxx],al
        mov     eax,edx
        stosd

        subttl  Looping Logic - Scan Line Update
        page

;/*
;**       Generate the next scanline code.  The next scan line code must
;**       handle monochrome bitmaps, the device, huge bitmaps, the presence
;**       or absence of a source.
;**
;**       Also color bitmaps, and mono <==> color interactions.
;**
;**       <       add si,devSrc.next_scan> ;Normal source scan line update
;**       <       Huge Bitmap Update     > ;>64K source update code
;**               add di,devDst.next_scan ;Normal destination scan line update
;**       <       Huge Bitmap Update     > ;>64K destination update code
;**
;**
;**       All updates will at least consist of the add IndexReg,next_plane.
;*/


cblt_6300:
        mov     ch,iDir                          ;Load this for YUpdate code
        test    bh,F0_SRC_PRESENT                ;Is there a source?
        jz      cblt_6340                        ;  No, skip source processing
        mov     dx,I_ADD_ESI_DWORD_I             ;add esi,increment

        mov     bx,((HIGH I_MOV_ESI_EAX)*256)+(HIGH I_LEA_EAX_ESI_DISP32)
        push    ebp
        lea     ebp,devSrc
        call    y_update                          ;Generate the Y scan line update code
        pop     ebp                               ;Restore frame pointer

cblt_6340:
        mov     dx,I_ADD_EDI_DWORD_I             ;add reg,increment
        mov     bx,((HIGH I_MOV_EDI_EAX)*256)+(HIGH I_LEA_EAX_EDI_DISP32)
        push    ebp
        lea     ebp,devDst                       ;--> destination data
        call    y_update                         ;Generate the Y scan line update code
        pop     ebp                              ;Restore frame pointer


;/*
;**       Compile the scan line loop.  The code simply jumps to the start
;**       of the outer loop if more scans exist to be processed.
;*/

cblt_6380:
        mov     eax,pfnBlt                        ;Compute relative offset of
        sub     eax,edi                           ;  start of loop
        add     edx,2                             ;account for init of plane indicator
        cmp     eax,-125                          ;Can this be a short label?
        ;SEL 1-27-92
        jb      cblt_6400                         ;  No, must make it a near jmp

        sub     al,2                              ;Bias offset by length of LOOP inst.
        mov     ah,al
        mov     al,I_LOOP ; loop
        stosw                                     ;Set the loop instruction
        jmp     cblt_6420
        ALIGN   4


cblt_6400:

        mov     esi,OFFSET jmp_cx_nz ;Move in the dec CX jnz code
        movsd
        .errnz  JMP_CX_NZ_LEN-4                   ;Must be four bytes long
        sub     eax,8                             ;Adjust jump bias
        stosd                                     ;  and store it into jump

cblt_6420:
        mov     al,I_RET                          ;Stuff the near return instruction
        stosb

        ret

CBLT ENDP

        subttl  Phase Align Code Generation
        page


;/***************************************************************************
;*
;* FUNCTION NAME = phase_align_generate 
;*
;* DESCRIPTION   = Generate the phase alignment if any.                     
;*                                                                          
;*                 It is assumed that AL contains the source byte  
;*
;*                 Registers Destroyed:  
;*                       AX,CX,SI        
;*
;* INPUT         = DH = phase alignment 
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        ALIGN   4

phase_align_generate PROC SYSCALL,
 pddcDst  :DWORD, ;Destination ddc
 xDst     :DWORD, ;Destination x origin
 yDst     :DWORD, ;Destination y origin
 psdSrc   :DWORD, ;Source surface definition
 xSrc     :DWORD, ;Source x origin
 ySrc     :DWORD, ;Source y origin
 cxExt    :DWORD, ;x extent of the BLT
 cyExt    :DWORD, ;y extent of the BLT
 usMix    :DWORD, ;Mix mode
 ipcBkgnd :DWORD, ;Color to mono background match color
 fsBlt    :DWORD

        include frame.blt


        assert  dh,E,iHorzPhase
        mov     ecx,0                            ;Might have garbage in it
        or      dh,dh                            ;Any phase alignment?
        jz      cblt_phase0                      ;  No, so skip alignment
        mov     cl,dh                            ;Get horizontal phase for rotating
        mov     ax,I_ROL_AL_N                    ;Assume rotate left n times
        cmp     cl,5                             ;4 or less rotates?
        ;SEL 1-27-92
        jb      cblt_small_phase                 ;  Yes
        neg     cl                               ;  No, compute ROR count
        add     cl,8
        mov     ah,HIGH I_ROR_AL_N
        .errnz   (LOW I_ROL_AL_N) - (LOW I_ROR_AL_N)

cblt_small_phase:
        stosw                                     ;Stuff the phase alignment rotates
        mov     al,cl                             ;  then the phase alignment code
        stosb

;/*
;** Do not generate phase masking if there is only 1 src And only 1 dest byte.
;** This is not just an optimization, see comments where these flags are set.
;*/

        mov     al,fbFetch
        and     al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
        xor     al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
        jz      cblt_skip_masking

        mov     esi,OFFSET phase_align

if      PHASE_ALIGN_LEN GE 8
        mov     ecx,(PHASE_ALIGN_LEN SHR 2)
        rep     movsd
else
if      PHASE_ALIGN_LEN AND 4
        movsd
endif
endif
if      PHASE_ALIGN_LEN AND 2
        movsw
endif
if      PHASE_ALIGN_LEN AND 1
        movsb
endif

cblt_skip_masking:
cblt_phase0:
        ret
phase_align_generate ENDP


        subttl  Scan Line Update Generation
        page


;/***************************************************************************
;*
;* FUNCTION NAME = y_update 
;*
;* DESCRIPTION   = 
;*                 
;*     Generate Y update code.
;*    
;*    
;*     The Y update code is generated as follows:
;*    
;*     For the display, small bitmaps, and huge bitmaps where the BLT
;*     doesn't span a segment bounday, all that need be done is add
;*     next_scan to the offset portion of the bits pointer. next_scan
;*     is a 2's complement if the BLT is Y-, so an addition can always
;*     be done.
;*    
;*         < add   si,next_scan >
;*           add   di,next_scan
;*    
;*    
;*     For huge bitmaps where the BLT spans a segment boundary, the
;*     above update must be performed, and the overflow/undeflow
;*     detected.  This isn't too hard to detect.
;*    
;*     For any huge bitmap, there can be a maximum of Planes*bmWidthBytes-1
;*     unused bytes in a 64K segment.  The minimum is 0.  The scan line
;*     update always updates to the first plane of the next (previous) scan.
;*    
;*    
;*     When the BLT is Y+, if the new offset is anywhere within the
;*     unused bytes of a segment, or in the first scan of a segment,
;*     then overflow must have occured:
;*    
;*           -bmFillBytes <= offset < Planes*bmWidthBytes
;*    
;*     Since the update is always made to the first plane of a scan,
;*     Planes in the above equation can be thrown out.  Also, if
;*     bmFillBytes is added to both sides of the equation:
;*    
;*           0 <= offset < bmWidthBytes+bmFillBytes   (unsigned compare)
;*    
;*     will be true if overflow occurs.  The Y+ overflow check will
;*     look like:
;*    
;*    
;*         lea ax,bmFillBytes[si]                      ;Adjust for fill bytes now
;*         cmp ax,bmWidthBytes+bmFillBytes            ;Overflow occur?
;*         jnc NoOverflow                              ;  No
;*         cmp cx,2                                    ;Any more scans?
;*         jnc NoOverflow                              ;  No, don't update selector
;*         add si,bmFillBytes                          ;Step over fill bytes
;*         mov ax,ds                                   ;Compute new selector
;*         add ax,bmSegmentIndex
;*         mov ds,ax
;*    
;*       NoOverflow:
;*    
;*    
;*    
;*     For Y- BLTs, the test is almost the same.  The equation becomes
;*    
;*        -(Planes*bmWidthBytes) > offset             (unsigned compare)
;*    
;*     then underflow occurs.  Planes in the above equation cannot be
;*     thrown out.  The Y- underflow check will look like:
;*    
;*         mov ax,si
;*         cmp ax,-(Planes*bmWidthBytes)              ;Overflow occur?
;*         jc  NoOverflow                              ;  No
;*         cmp cx,2                                    ;Any more scans?
;*         jnc NoOverflow                              ;  No, don't update selector
;*         add si,bmFillBytes                          ;Step over fill bytes
;*         mov ax,ds                                   ;Compute new selector
;*         add ax,bmSegmentIndex
;*         mov ds,ax
;*    
;*     bmFillBytes and bmSegment index will be the 2's complement by
;*     now if the BLT is Y-.
;*
;*                  Registers Preserved: 
;*                        DX,SI          
;*                  Registers Destroyed: 
;*                        AX,DI,flags    
;*
;* INPUT         = SS:BP --> source or destination data                 
;*                 SS:DI --> where to generate the code                 
;*                 DX     =  update register (add si,wordI & mov ax,si) 
;*                 BL     =  lea register (SI or DI)                    
;*                 BH     =  mov si,ax   or   mov di,ax register        
;*                 CL     =  segment register (DS or ES)                
;*                 CH     =  Direction                                  
;*
;* OUTPUT        = SS:BP --> source or destination data         
;*                 SS:DI --> where to generate the code         
;*                 BL     =  lea register (SI or DI)            
;*                 BH     =  mov si,ax   or   mov di,ax register
;*                 CL     =  segment register (DS or ES)        
;*                 CH     =  Direction                          
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        ALIGN   4

y_update PROC SYSCALL

;/*
;**       Stuff the scan line increment for the source or destination
;**
;**       <   add     esi,1234h                    > ;Update source
;**       <   add     edi,9ABCh                    > ;Update destination
;*/


        mov     eax,[ebp].DEV.next_scan          ;Get the increment
        or      eax,eax                           ;If zero, don't generate the code
        jz      y_update_10
        xchg    eax,edx                           ;Set opcode
        stosw   ;WORD ONLY!!
        xchg    eax,edx                           ;Set increment
        stosd

y_update_10:
        ret
y_update ENDP

END
