;*DDK*************************************************************************/
;
; COPYRIGHT (C) Microsoft Corporation, 1989
; COPYRIGHT    Copyright (C) 1995 IBM Corporation
;
;    The following IBM OS/2 WARP source code is provided to you solely for
;    the purpose of assisting you in your development of OS/2 WARP device
;    drivers. You may use this code in accordance with the IBM License
;    Agreement provided in the IBM Device Driver Source Kit for OS/2. This
;    Copyright statement may not be removed.;
;*****************************************************************************/
        .386
        .MODEL FLAT,SYSCALL
        ASSUME  SS:FLAT, DS:FLAT, CS:FLAT, ES:FLAT

        page    ,132
;/*****************************************************************************
;*
;* SOURCE FILE NAME = CBLT.ASM
;*
;* DESCRIPTIVE NAME = Compile a BLT subroutine onto the stack
;*
;*
;* VERSION      V2.0
;*
;* DATE
;*
;* DESCRIPTION  This file contains two subroutines which build a small program on
;*              the stack to accomplish the requested BLT.
;*
;*              This file is part of a set that makes up the BitBLT function
;*              at driver-level.
;*
;* FUNCTIONS    CBLT
;*              phase_align_generate
;*              y_update
;*
;* NOTES        NONE
;*
;* STRUCTURES   NONE
;*
;* EXTERNAL REFERENCES
;*
;*              NONE
;*
;* EXTERNAL FUNCTIONS
;*
;*              NONE
;*
;* CHANGE ACTIVITY =
;*   DATE      FLAG        APAR   CHANGE DESCRIPTION
;*   --------  ----------  -----  --------------------------------------
;*   mm/dd/yy  @Vr.mpppxx  xxxxx  xxxxxxx
;*   02/22/8?                     Walt Moore [waltm] Wrote it for         in
;*                                distant past.
;*   07/12/86                     Wes Rupel [wesleyr] Made it a subroutine
;*                                (extracted from an enourmous bitblt.asm)
;*   07/20/87                     Wes Rupel [wesleyr] Added 4-plane support.
;*   08/16/87                     Wes Rupel [wesleyr] Bitmap Color Conversion
;*                                uses image color
;*   03/05/88                     Wes Rupel [wesleyr] Added Gray usMix support.
;*   03/07/88                     Wes Rupel [wesleyr] Added Transparency for
;*                                ImageData
;*   03/30/88                     Wes Rupel [wesleyr] Made BackColor/ForeColor
;*                                now 0/1 rather than 1/0 (in mono bitmaps)
;*   05/14/89                     Newman Consulting
;*                                Converted to a VGA packed pixel blit compiler
;*                                from the 8514 bit compiler.
;*   05/20/89                     Newman Consulting
;*                                Added a brut force screen to screen blit routine.
;*                                This screen to screen is just for fall back if
;*                                the special code does not work.
;*   06/04/89                     Newman Consulting
;*                                Fixed compiled Gray ROP bug.
;*   07/09/89                     Newman Consulting
;*                                Fix for first line of screen blit bug
;*   08/05/89                     Newman Consulting
;*                                Removed a lot of the if 0 blocks left over
;*                                from the conversion
;*   10/26/89                     Viroon Touranachun [viroont] Modified the part
;*                                that generates the "REP" inner loop for
;*                                "Source Copy" to generate 32-bit data transfer
;*                                routine using iAPX386's MOVSD and STOSD
;*                                instructions.
;*   07/19/92                     Jonathan M. Wagner CMVC_46429 Trap 13,
;*                                CMVC_50012 Trap 6
;*   07/17/92                     Jonathan M. Wagner DCR37 implemented
;*   07/24/92                     David Scholten Binar Graphics
;*                                found memory to memory color blitting problem
;*                                length was being lost; could be more of these.
;*                                CMVC_49407 @DMS
;*   02/20/93              61128  CBLT not checking direction in certain case.
;*
;*****************************************************************************/


        .xlist
ifdef PALMGR2
INCL_GPIBITMAPS         equ     1
endif
INCL_DEV                equ     1
INCL_SUB                equ     1
INCL_DOSSEMAPHORES      equ     1
INCL_DOSMVDM            equ     1
INCL_GPIPRIMITIVES      equ     1
        include pmgre.inc
DINCL_SAVE_SCREEN_BITS  equ     1
DINCL_ENABLE            equ     1
DINCL_BITMAP            equ     1
DINCL_BB_ROPS   equ     1
        include driver.inc
        include extern.inc
        include display.inc
        include egafam.inc
        include assert.mac
        include oemblt.inc
        .list

        EXTERNDEF SCREEN_CBSCAN                 :ABS    ;screen width in bytes

.DATA
        instance_line_buffer db 1024 dup (0)
        cblt_iDir               db      0
        cblt_scan_per_bank      dw      0
        cblt_src_scan_line_left dw      0
        cblt_dst_scan_line_left dw      0
        cblt_src_current_bank   dw      0
        cblt_dst_current_bank   dw      0
        brush_index_f           db      0
        brush_index_m           db      0
        brush_index_l           db      0
public  src_transparent_stack
        src_transparent_stack   dword   ?
public  dest_transparent_stack
        dest_transparent_stack  dword   ?
.CODE
        extern bkmix            :BYTE          ;Background mix
        extern bkclr            :BYTE          ;Background color
        extern  far_bank_select:far

        include constant.blt
        include devdata.blt
DEFINE_ROPCODE  equ 1
        include roptable.blt            ; Generate the ROP codes


subttl      Compile - 8bpp templates
page


;/*
;** Screen to Screen blit test macro.
;** requires as a parameter the label to jump to if this is not a screen
;** to screen blit.
;**
;*/

scrn_to_scrn_test macro not_scrn
    local   exit

        mov     bl, fbF0                ; cache the flags
        test    bl, F0_DEST_IS_DEV      ; Is the dest the device?
        jz      @f
        test    bl, F0_SRC_PRESENT      ; Is their a source present?
        jz      @f
        test    bl, F0_SRC_IS_DEV       ; Is the source the device
        jnz     exit
@@:
        jmp     not_scrn                ; exit point if NOT screen to screen
exit:                                   ; Get to here if it is screen to screen
endm

get_brush_index_f:
        mov     al,brush_index_f
brush_index_f_len = $ - get_brush_index_f

get_brush_index_m:
        mov     al,brush_index_m
brush_index_m_len = $ - get_brush_index_m

get_brush_index_l:
        mov     al,brush_index_l
brush_index_l_len = $ - get_brush_index_l


bank_select_logic:


        push    eax
        mov     eax,set_bank_select
        call    eax
        pop     eax

BANK_SELECT_CODE_LEN = $ - bank_select_logic

bank_select_logic_m     macro


        push    eax
        mov     eax,set_bank_select
        call    eax
        pop     eax

endm

;/*
;** Template to move a line from the screen to instance memory
;**       Assumes:    ds:si = first byte of source line
;**
;*/

src_line_temp_8bpp:

        push    ecx                     ; save registers set up upon entry
        push    edi
        push    esi

        mov     ax, 0000                ; ax = initial source Y
SLT_SRC_Y  =($ - src_line_temp_8bpp) - 2
        mov     dx, 0000                ; dx = SCREEN_CBSCAN
SLT_CBSCAN1=($ - src_line_temp_8bpp) - 2
        mul     dx
        bank_select_logic_m

        mov     ecx, 0000               ; cx = length of line in bytes
SLT_COUNT = ($ - src_line_temp_8bpp) - 4

        mov     edi, 0000               ; di = segment of instance data
SLT_SEG   = ($ - src_line_temp_8bpp) - 4
        rep     movsb                   ; do the move
        mov     esi, 0000               ; si = segment instance data
SLT_SEG1  = ($ - src_line_temp_8bpp) - 4

        mov     ax, 0000                ; ax = SCREEN_CBSCAN
SLT_CBSCAN =($ - src_line_temp_8bpp) - 2
        mov     dx, 0000                ; dx = starting dest Y
SLT_DESTY  = ($ - src_line_temp_8bpp) - 2
        mul     dx
        bank_select_logic_m

        pop     esi
        pop     edi
        pop     ecx

SLT_LEN = $ - src_line_temp_8bpp

page

;/*
;** RST - Reset Source Template
;**
;*/
reset_source_temp:
        mov     esi, 0000               ; si = offset instance line buffer
RST_SEG = ($ - reset_source_temp) - 4

RST_LEN = $ - reset_source_temp



page
;/*
;** USL - Update the Scan Lines template
;**
;**   Entry:  si = source
;**
;*/

OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE

usl_template:
        ; Copy a New source line into the instance line buffer
        push    ebx
        push    ecx                     ; save the Y count

        mov     eax, SCREEN_CBSCAN
USL_SRC_ADDSUB = ($ - usl_template)
        add     esi, eax                ; add/sub line offset SCREEN_CBSCAN

        mov     dx, cblt_src_current_bank               ; dx = source current bank

        cmp     dx,cblt_dst_current_bank
        jz      @f
        bank_select_logic_m
@@:

        dec     cblt_src_scan_line_left
        jnz     @f
        cmp     cblt_iDir, INCREASE
        je      src_pos_dir
        add     esi,BANK_SIZE
        jmp     short   src_addr_set
src_pos_dir:
        sub     esi,BANK_SIZE
src_addr_set:
        push    cblt_scan_per_bank
        pop     cblt_src_scan_line_left

        mov     ax, 0000
USL_SRC_NBANK  = ($ - usl_template) - 2 ; next bank
        add     dx, ax                  ; dx = next bank +/- 1
        add     cblt_src_current_bank,ax
        bank_select_logic_m
@@:
;/*
;** At this point:
;** the proper bank has been selected.
;** esi = point to screen source line
;** direction flag is assumed correct (untouched)
;*/

        push    esi                     ; save the updated screen source addr
        push    edi                     ; save the screen dest

        mov     edi, 0000               ; di = segment of instance line buffer
USL_SEG1        = ($ - usl_template) - 4
        mov     ecx, 0000               ; cx = cxExt
USL_COUNT       = ($ - usl_template) - 4
        push    ecx
        shr     ecx,2
        rep     movsd
        pop     ecx
        and     ecx,3
        rep     movsb                   ; move the line into instance data

        pop     edi
        pop     esi
        ; The line is now in instance data
        ; The source is done, now do the destination

        mov     ax, SCREEN_CBSCAN
        movzx   eax,ax
USL_DST_ADDSUB = ($ - usl_template)
        add     edi, eax                ; add/sub line offset SCREEN_CBSCAN

        mov     dx, cblt_dst_current_bank               ; dx = current dest bank

        cmp     dx,cblt_src_current_bank
        jz      @f
        bank_select_logic_m
@@:

        dec     cblt_dst_scan_line_left
        jnz     @f
        cmp     cblt_iDir, INCREASE
        je      dst_pos_dir
        add     edi,BANK_SIZE
        jmp     short   dst_addr_set
dst_pos_dir:
        sub     edi,BANK_SIZE
dst_addr_set:
        push    cblt_scan_per_bank
        pop     cblt_dst_scan_line_left
                                        ; changed...
        push    ax
        mov     ax, 0000
USL_DST_NBANK  = ($ - usl_template) - 2 ; next bank
        add     dx, ax                  ; dx = next bank +/- 1
        add     cblt_dst_current_bank,ax
        pop     ax

        bank_select_logic_m
@@:
;/*
;** At this point:
;** the proper dest bank has been selected.
;** es:di = point to screen source line
;** direction flag is assumed correct (untouched)
;*/

        pop     ecx
        pop     ebx

USL_LEN = $ - usl_template


page

;/*
;** bank select template
;**   This code is for the Video 7 VRAM and fast write VGA card.
;**
;*/

page


;/*
;**       jmp_cx_nz   - Code template for near jump if CX-1 <> 0
;**
;**       jmp_cx_nz will skip the following near jump if CX-1 is zero.
;**       CX will be left updated by this code.
;**
;**       jmp_cx_nz is used by both the inner loop code and the outer
;**       loop code if a loop instruction cannot be used.
;*/

jmp_cx_nz:
        dec     cx                      ;Decrement counter
        jz      $+7
        db      I_JMP_NEAR              ;JMP opcode

JMP_CX_NZ_LEN   =       $-jmp_cx_nz     ;Length of procedure

;/*
;**       phase_align - Template for phase alignment code
;**
;**       The following code is the template that performs the phase
;**       alignment masking.  The source has already been aligned to
;**       the destination.
;**
;**       A copy of the aligned source is made.  The phase mask is then
;**       applied to the source and the copy.  The previously unused
;**       bits are ORed into the used bits of the current source, and
;**       the unused bits of the current source then become the unused
;**       bits for the next source.
;**
;**
;**       It assumes:
;**
;**               BP  =  phase alignment mask
;**               AL  =  current byte to mask
;**               BH  =  old unused bits
;*/

phase_align:
        mov     ah,al                   ;Make a copy of aligned source
        and     ax,bp                   ;Masked used, unused bits
        or      al,bh                   ;Mask in old unused bits
        mov     bh,ah                   ;Save new unused bits

PHASE_ALIGN_LEN equ     $-phase_align   ;Length of procedure


;/*
;**       masked_store - Template for storing first and last bytes of BLT
;**
;**       The following code is a template for storing the first and last
;**       bytes of a BLT.  The unaltered bits are saved and the altered
;**       bits set in the byte, then the byte is stored.
;**
;**
;**       It assumes:
;**
;**               AL  =  The byte to be BLTed to the destination bitmap.
;**                      All necessary logic operations have been performed
;**                      on this byte.
;**
;**               AH  =  The destination byte.
;**
;**       The AND immediate will be fixed up.
;*/

masked_store_mono:
        and     eax,0FFFFh               ;Mask altered/unaltered bits
        or      al,ah                   ;Combine the bits
        stosb                           ;And store the result

MASKED_STORE_LEN_MONO   equ     $-masked_store_mono;Length of the template
MASKED_STORE_MASK_MONO  equ     -7              ;Offset to where mask goes

;/*
;** Gray Rop logical action template.  This is used instead of one of the
;** roptable rops when the GRAY_ROP MIX is requested.
;*/

gray_rop_template_mono:
        mov     ah,[edi]
        mov     al,ah
        xor     al,dl
        and     al,dh
        xor     al,ah
LENGTH_GRAY_ROP_TEMPLATE_MONO   =       $ - gray_rop_template_mono

gray_rop_template_8bpp:
        mov     al,[edi]                ; fetch destination byte
        rol     dh,1                    ; get mask pixel to carry
        jnc     @F                      ; was mask bg pixel, propagate dest
        mov     al,dl                   ; else substitute bg color
@@:
LENGTH_GRAY_ROP_TEMPLATE_8BPP   =       $ - gray_rop_template_8bpp

;/*
;** transparency_template is a piece of code that will be appended to the
;** logical action template from the roptable to achieve transparency.
;** The transparency mask is assumed to be in DH.  Where the mask is "1"
;** the result of the logical action is used.  Where the mask is "0"
;** the destination is not altered.
;*/

transparency_template_mono:
        mov     ah,[edi]
        xor     al,ah
        and     al,dh
        xor     al,ah
LENGTH_TRANSPARENCY_TEMPLATE_MONO       =       $ - transparency_template_mono

;/*
;** the template to be used when fetching an 8bpp pixel from the hw
;** (when the destination is also 8bpp)
;*/

cblt_fetch_hw_8bpp:
        xchg    dx,si
        in      ax,dx
        xchg    dx,si
CBLT_FETCH_HW_8BPP_LEN  =       $ - cblt_fetch_hw_8bpp

;/*
;** the template to be used when fetching a 4bpp pixel from the hw
;** (when the destination is also 4bpp)
;*/

;/*
;** the template to be used when fetching an 8bpp pixel
;** (when the destination is mono)
;** NOTE: This code is assuming bitmap source and stepping left -- it gets
;**       patched if source is the device and/or stepping right.
;*/

cblt_fetch_8bpp_to_mono:
        mov     cx,00ffh        ; count of edge pixels
POS_CBLT_FETCH_8BPP_TO_MONO_COUNT       =       $ - cblt_fetch_8bpp_to_mono - 2 ;CMVC_46429
@@:     lodsb                   ; fetch a source byte
POS_CBLT_FETCH_8BPP_TO_MONO_FETCH       =       $ - cblt_fetch_8bpp_to_mono - 1
        xor     al,0ffh         ; AL  = (bgcolor) ? 0 : >0
POS_CBLT_FETCH_8BPP_TO_MONO_BGCOLOR     =       $ - cblt_fetch_8bpp_to_mono - 1
        add     al,0ffh         ; 'C' = (bgcolor) ? 0 : 1
        sbb     ax,ax           ; AL = ('C' == 1) ? 0ffh : 0
        not     ah              ; AH = ('C' == 1) ? 0 : 0ffh
        and     ax,01234h       ; isolate result ;CMVC_46429
POS_CBLT_FETCH_8BPP_TO_MONO_MONOBITS    =       $ - cblt_fetch_8bpp_to_mono - 2
        or      al,ah           ; into AL
        shr     al,1            ; now into 'C'
        rcr     bl,1            ; shift into building mono byte
POS_CBLT_FETCH_8BPP_TO_MONO_RCX =       $ - cblt_fetch_8bpp_to_mono - 1
        loop    @B              ; for all source bytes to fetch in edge
        mov     al,bl           ; fetch built mono byte to source register
CBLT_FETCH_8BPP_TO_MONO_LEN     =       $ - cblt_fetch_8bpp_to_mono
CBLT_FETCH_8BPP_TO_MONO_COUNT   =       -(CBLT_FETCH_8BPP_TO_MONO_LEN-\
                                          POS_CBLT_FETCH_8BPP_TO_MONO_COUNT)
CBLT_FETCH_8BPP_TO_MONO_FETCH   =       -(CBLT_FETCH_8BPP_TO_MONO_LEN-\
                                          POS_CBLT_FETCH_8BPP_TO_MONO_FETCH)
CBLT_FETCH_8BPP_TO_MONO_BGCOLOR =       -(CBLT_FETCH_8BPP_TO_MONO_LEN-\
                                          POS_CBLT_FETCH_8BPP_TO_MONO_BGCOLOR)
CBLT_FETCH_8BPP_TO_MONO_MONOBITS =      -(CBLT_FETCH_8BPP_TO_MONO_LEN-\
                                          POS_CBLT_FETCH_8BPP_TO_MONO_MONOBITS)
CBLT_FETCH_8BPP_TO_MONO_RCX     =       -(CBLT_FETCH_8BPP_TO_MONO_LEN-\
                                          POS_CBLT_FETCH_8BPP_TO_MONO_RCX)

page

;/*
;** src_transparent_template
;** BM_SRCTRANSPARENT will result in pels from the source bitmap matching
;** the presentation space background color NOT to be copied to the
;** destination bitmap. (OVERLAY)
;*/

src_transparent_template:

        cmp     al,0                    ;Source color == background color ?

T_SRC_TRANSPARENT_TEMPLATE_BKCLR = ($ - src_transparent_template) - 1

        jnz     src_dummy               ;If not, continue
        dec     edi                     ;If so, point to next destination byte

T_SRC_TRANSPARENT_TEMPLATE_DIRECTION = ($ - src_transparent_template) - 1

        jmp     src_dummy               ;and skip ROP

SRC_TRANSPARENT_TEMPLATE_JUMP = ($ - src_transparent_template) - 1
SRC_TRANSPARENT_TEMPLATE_LENGTH = $ - src_transparent_template
SRC_TRANSPARENT_TEMPLATE_BKCLR = -(SRC_TRANSPARENT_TEMPLATE_LENGTH - T_SRC_TRANSPARENT_TEMPLATE_BKCLR)
SRC_TRANSPARENT_TEMPLATE_DIRECTION = -(SRC_TRANSPARENT_TEMPLATE_LENGTH - T_SRC_TRANSPARENT_TEMPLATE_DIRECTION)

src_dummy:

;/*
;** dest_transparent_template
;** BM_DESTTRANSPARENT will result in pels from the source bitmap ONLY
;** being copied to the destination pels that match the presentation
;** space background color. (UNDERLAY)
;*/

dest_transparent_template:

        cmp     byte ptr[edi],0         ;Destination color == background color ?

T_DEST_TRANSPARENT_TEMPLATE_BKCLR = ($ - dest_transparent_template) - 1

        jz      dest_dummy              ;If so, continue
        dec     edi                     ;If not, point to next destination byte

T_DEST_TRANSPARENT_TEMPLATE_DIRECTION = ($ - dest_transparent_template) - 1

        jmp     dest_dummy              ;and skip ROP

DEST_TRANSPARENT_TEMPLATE_JUMP = ($ - dest_transparent_template) - 1
DEST_TRANSPARENT_TEMPLATE_LENGTH = $ - dest_transparent_template
DEST_TRANSPARENT_TEMPLATE_BKCLR = -(DEST_TRANSPARENT_TEMPLATE_LENGTH - T_DEST_TRANSPARENT_TEMPLATE_BKCLR)
DEST_TRANSPARENT_TEMPLATE_DIRECTION = -(DEST_TRANSPARENT_TEMPLATE_LENGTH - T_DEST_TRANSPARENT_TEMPLATE_DIRECTION)

dest_dummy:


        page
;/***************************************************************************
;*
;* FUNCTION NAME = CBLT
;*
;* DESCRIPTION   = Compile a BLT onto the stack.
;*
;* INPUT         = ES:DI --> memory on stack to receive BLT program
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


;/*
;**       Note:   The definition of CBLT below is FAR in order to maintain
;**               the stack frame created for BITBLT, though it is reached
;**               with a NEAR call.
;*/

OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE

CBLT PROC SYSCALL,
 pddcDst  :DWORD, ;Destination ddc
 xDst     :DWORD, ;Destination x origin
 yDst     :DWORD, ;Destination y origin
 psdSrc   :DWORD, ;Source surface definition
 xSrc     :DWORD, ;Source x origin
 ySrc     :DWORD, ;Source y origin
 cxExt    :DWORD, ;x extent of the BLT
 cyExt    :DWORD, ;y extent of the BLT
 usMix    :DWORD, ;Mix mode
 ipcBkgnd :DWORD, ;Color to mono background match color
 fsBlt    :DWORD

        include frame.blt
        include frame8.blt
                                ;Set data seg to CS so we can access
                                ;  code without overrides

        mov     fbMore,0                ; clean slate at start


subttl  Compile - Get a source line
page
src_move1:

        scrn_to_scrn_test <not_scrn1>               ; NewCon 8-12-89

;/*
;**Calculate scan line remainning in current bank
;*/

        mov     al,iDir
        mov     cblt_iDir,al
        xor     edx,edx
        mov     eax,BANK_SIZE
        mov     ebx,SCREEN_CBSCAN
        div     ebx
        mov     ebx,eax
        mov     cblt_scan_per_bank,bx

        cmp     iDir,INCREASE              ;CMVC_53201
        jz      @f                         ;CMVC_53201
        mov     ax,ySorc                   ;CMVC_53201
        inc     ax                         ;CMVC_53201
        div     bx                         ;CMVC_53201
        mov     cblt_src_scan_line_left,dx ;CMVC_53201

        mov     ax,yDest                   ;CMVC_53201
        inc     ax                         ;CMVC_53201
        div     bx                         ;CMVC_53201
        mov     cblt_dst_scan_line_left,dx ;CMVC_53201
        jmp     src_move_2                 ;CMVC_53201
@@:                                        ;CMVC_53201
        mov     eax,ySrc
        div     bx
        mov     cx,bx
        sub     cx,dx
        mov     cblt_src_scan_line_left,cx

        mov     eax,yDst
        div     bx
        mov     cx,bx
        sub     cx,dx
        mov     cblt_dst_scan_line_left,cx
src_move_2:                                ;CMVC_53201

;/*
;** emit a template to move the first source line
;** from the screen to a buffer in the instance data area.
;*/

        mov     eax, edi                        ; ax = beginning of SLT
        mov     cx, SLT_LEN
        mov     esi, offset src_line_temp_8bpp
        rep     movsb                   ; move the template on to the stack

        mov     esi, eax                        ; si = beginning of temp on stack
        mov     ax, ySorc
        mov     [esi+SLT_SRC_Y], ax     ; fixup source Y

        mov     eax, cxExt
        mov     [esi+SLT_COUNT], eax    ; fixup the count

        mov     eax, offset instance_line_buffer
        cmp     iStepDir, STEPLEFT      ; if stepping left initial start is
        jne     @f                      ; at the top of the buffer
        mov     ebx, cxExt
        add     eax, ebx
@@:
        mov     [esi+SLT_SEG], eax      ; fixup the segment
        mov     [esi+SLT_SEG1], eax     ; fixup the segment

        mov     ax, SCREEN_CBSCAN
        mov     [esi+SLT_CBSCAN], ax    ; fixup the scan line width
        mov     [esi+SLT_CBSCAN1], ax ; fixup the scan line width

        mov     ax, yDest
        mov     [esi+SLT_DESTY], ax     ; fixup the initial Y scan line


;/*
;** The following comments refer to the code generated on the stack.
;** The instance data segement and the stack are the same segment.
;** After the move the source will always be from the
;** instance data line buffer.
;** The compiled blit was entered with
;**   esi = source data (on the screen)
;**   edi = destination data (on the screen)
;**   direction flag set
;**
;** The initial ds:si are pushed on the stack.
;** For all the inner loop stuff ds:si will point to the
;** instance data line buffer.
;** The initial ds:si should be poped off the stack when the outter
;** loop is updated.
;*/

not_scrn1:


        subttl  Compile - Outer Loop
        page
;/*
;**       Create the outerloop code.  The first part of this code will save
;**       the scan line count register, destination pointer, and the source
;**       pointer (if there is a source and it is not the device).
;**
;**
;**       The generated code should look like:
;**
;**               push    cx              ;Save scan line count
;**               push    di              ;Save destination pointer
;**       <       push    si      >       ;Save source pointer
;*/

        mov     pfnBlt, edi             ; NewCon 8-14-89
        mov     bl,fbF0
        mov     ax,I_PUSH_ECX_PUSH_EDI    ;Save scan line count, destination ptr
        stosw
;/*
;** In all case that we are dealing with a source we want to
;** save the source pointer.
;*/


        test    bl,F0_SRC_PRESENT       ;Is a source needed?
        jz      @F                      ;  No

        mov     al,I_PUSH_ESI            ;  Memory src, save source pointer
        stosb
@@:

        scrn_to_scrn_test <not_scrn3>   ; NewCon 8-13-89

        mov     eax, edi
        mov     cx, RST_LEN
        mov     esi, offset reset_source_temp
        rep     movsb

        mov     esi, eax


        mov     eax, offset instance_line_buffer
        cmp     iStepDir, STEPLEFT
        jne     @f
        add     eax, cxExt
@@:
        mov     [esi+RST_SEG], eax

not_scrn3:

        subttl  Compile - Pattern Fetch
        page
;/*
;**   Set up any pattern fetch code that might be needed.
;**   The pattern code has many fixups, so it isn't taken from a
;**   template.  It is just stuffed as it is created.
;**
;**
;**   Entry:  None
;**
;**   Exit:   DH = pattern
;**
;**   Uses:   AX,BX,CX,DH,flags
;**
;**
;**   For color brushes:
;**
;**    *  mov     bx,XXXXh            ;Load segment (immediate) of the brush
;**    *  mov     ax,ds               ;Save DS
;**    *  mov     ds,bx               ;DS:BX --> brush
;**       mov     bx,YYYYh            ;Load offset (immediate) of the brush
;**       mov     dh,7[bx]            ;Get initial brush byte
;**    *  mov     ds,ax               ;Restore DS
;**
;**   For monochrome brushes:
;**   For masks:
;**
;**    +  mov     dl,bgcolor          ;bg color if gray rop
;**    *  mov     bx,XXXXh            ;Load segment (immediate) of the brush
;**    *  mov     ax,ds               ;Save DS
;**    *  mov     ds,bx               ;DS:BX --> brush
;**       mov     bx,YYYYh            ;Load offset (immediate) of the brush
;**       mov     dh,7[bx]            ;Get next brush byte
;**    *  mov     ds,ax               ;Restore DS
;**    =  not     dh                  ;invert fg/bg bits
;**    =  rol     dh,n                ;phase transparency mask
;**
;**       Instructions marked with "*" are not present if there is no
;**       source bitmap or if there is a source bitmap and it is the
;**       device. The bitmap is what DS would otherwise be used for.
;**
;**       Instructions marked with "+" are not present if the rop is not
;**       the special gray ropcode.
;**
;**       Instructions marked with "=" are only present if the ropcode is
;**       the gray ropcode and the destination is a color bitmap.
;**
;*/

cblt_pattern_fetch:
        test    bl,F0_PAT_PRESENT       ;Is a pattern needed?
        jnz     @f
        jmp     cblt_initial_byte_fetch ;  No, skip pattern code
@@:

;/*
;** The special gray rop will erroneously lead to Color Pat Fetch.
;** It should be mono fetch of the transparency mask.
;*/

        test    fsBlt,BBF_GRAY_ROP
        jz      cblt_not_gray_rop

color_fetch_template:
        and     fbF0,not F0_COLOR_PAT ; really a mono (1 plane) fetch
        and     bl,not F0_COLOR_PAT           ; really a mono (1 plane) fetch


;/*
;** Take this oportunity to compile the "color fetch" code needed by the
;** Gray Rop.  This will compile code which will expand the background color
;** to all 0s or all 1s into DL if the destination is mono, else will just
;** stuff the background color into DL for color destinations.
;**
;** For color destinations:
;**
;**   mov dl,03h     ; grab the background color as an immediate value
;**
;** For mono destinations:
;**
;**   mov   dl,{0,0ffh}; 0 or ffh depending on lo bit of background color
;*/

        mov     ah,byte ptr ipcBrushBack

        test    bl,F0_DEST_IS_COLOR
        jnz     @F
        shr     ah,1
        sbb     ah,ah
@@:     mov     al,I_MOV_DL_BYTE_I   ; mov dl,PatBackColor(assume color)
        stosw
cblt_not_gray_rop:


        test    bl,F0_COLOR_PAT         ; color pattern fetch ?
        jz      @F                      ; no...
        test    fbBrushAccel,PA_SINGLE_CLR; solid brush ?
        jz      @F                      ; no...
        mov     al,I_MOV_DH_BYTE_I      ; else move fgcolor into DH for blt

        mov     ah,byte ptr ipcBrushFore

        stosw
        and     fbF0,NOT F0_PAT_PRESENT; pattern never needs to be
        and     bl,NOT F0_PAT_PRESENT   ; considered again
        jmp     cblt_pattern_fetch_end
@@:
        mov     dl_addr_pbrush,edi       ; save -> brush offset in code
        mov     dl_addr_pbrush_off, edi ; NewCon 8-15-89

        mov     al,I_MOV_EBX_DWORD_I      ;mov bx,pBrush.lo
        stosb
        mov     eax,pBrush
        test    bl,F0_COLOR_PAT         ; fetching color pattern ?
        jz      cblt_stuff_brush_addr   ; no...

        sub     edx,edx                   ; assume no middle,last brush fetches
        mov     dl_addr_pbrush_off_m,edx
        mov     dl_addr_pbrush_off_l,edx
        or      fbMore,F1_INNER_ONCE

        mov     dl,yPatRow              ; set initial pattern row
        and     dx,00000111b            ; keep pattern row in range

        shl     dx,3                    ;only concerned about 8bpp now
        movzx   edx,dx
        add     eax,edx                 ; -> start of 1st pattern row
        mov     dl_addr_pbrush_off_f,edi ; save -> brush offset

cblt_stuff_brush_addr:
ifdef   FIREWALLS
        mov     cx,di                   ; save offset of brush offset
endif
        stosd                           ; stuff initial brush offset

        mov     ax,I_MOV_DH_EBX_DISP8    ;mov dh,n[bx]
        stosw
BRUSH_INDEX_LESS_BRUSH_OFF      equ     6
ifdef   FIREWALLS
        add     cx,BRUSH_INDEX_LESS_BRUSH_OFF
endif
        mov     npbPatRow,edi    ;Save address of the brush index

        mov     al,byte ptr xDst        ; x origin of destination if color pat
        test    bl,F0_COLOR_PAT
        jnz     @F                      ; will be a color pattern fetch...
        mov     al,yPatRow              ; initial pattern row if mono pat
@@:     and     al,00000111b            ;Set brush index mask
        stosb
        mov     brush_index_f,al

;/*
;** gray rop mask needs to be correctly phased for color destinations
;*/

        test    fsBlt,BBF_GRAY_ROP
        jz      @F                      ; not special gray rop code
        test    bl,F0_DEST_IS_COLOR
        jz      @F                      ; gray rop, dest is mono
        mov     dl,byte ptr xDst
        and     dl,00000111b            ; mod PATTERNSIZE
        jz      @F                      ; no mask phasing needed
        mov     ax,I_ROL_DH_N           ; rol dh,n
        stosw
        mov     al,dl
        stosb
@@:

        mov     dl_addr_brush_end,edi    ; -> addr 1 beyond brush fetch code
cblt_pattern_fetch_end:


        subttl  Compile - Initial Byte Fetch
        page

;/*
;**        missing_code    <4bpp considerations -- Initial Byte Fetch>
;** - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
;**       Create the initial byte code.  This may consist of one or two
;**       initial fetches (if there is a source), followed by the required
;**       logic action.  The code should look something like:
;**
;**       BLTouterloop:
;**       <       mov     bp,mPhase >  ;Load phase mask for entire loop
;**       <       xor     bh,bh       >   ;Clear previous unused bits
;**
;**       ;       Perform first byte fetch
;**
;**       <       lodsb               >   ;Get source byte
;**       <       color<==>mono munge >   ;Color <==> mono conversion
;**       <       phase alignment     >   ;Align bits as needed
;**
;**       ;       If an optional second fetch is needed, perform one
;**
;**       <       lodsb               >   ;Get source byte
;**       <       color to mono munge >   ;Color to mono munging
;**       <       phase alignment     >   ;Align bits as needed
;**
;**               logical action          ;Perform logical action required
;**
;**               mov     ah,es:[di]      ;Get destination
;**               and     ax,cx           ;Saved unaltered bits
;**               or      al,ah           ;  and mask in altered bits
;**               stosb                   ;Save the result
;**
;**
;**       The starting address of the first fetch/logical combination will be
;**       saved so that the code can be copied later instead of recreating it
;**       (if there are two fecthes, the first fetch will not be copied)
;**
;**       The length of the code up to the masking for altered/unaltered bits
;**       will be saved so the code can be copied into the inner loop.
;*/


cblt_initial_byte_fetch:

        mov     cFetchCode,0    ; unknown fetch code size at start
        mov     npFetchStart,edi         ; save starting address of action

        test    bl,F0_SRC_PRESENT       ; is there a source?
        jnz     cblt_src_is_present     ;  yes, generate fetch code
        jmp     cblt_logical_action     ;  no, don't generate fetch code
cblt_src_is_present:

        test    bl,F0_GAG_CHOKE         ; color conversion ?
        jnz     @F                      ; yes -- no phasing needed...
        cmp     iHorzPhase,0            ; is the phase 0? (also get the phase)
        jz      @F                      ; yes -- no phase alignment needed...

        mov     al,I_MOV_BP_WORD_I      ; set up the phase mask
        stosb
        mov     ax,word ptr mPhase               ; place the mask into the instruction
        stosw
        mov     ax,I_XOR_BH_BH          ; clear previous unused bits
        stosw
        mov     npFetchStart,edi         ; phase mask not part of fetch
@@:

;/*
;**       Generate the required sequence of instructions for a fetch
;**       sequence.  Only the minimum code required is generated.
;**
;**       The code generated will look something like the following:
;**
;**       BLTfetch:
;**       <       lodsb                 > ;Get the next byte
;**       <       color munging         > ;Mono <==> color munging
;**
;**       ;       If the phase alignment isn't zero, then generate the minimum
;**       ;       phase alignment needed.  RORs or ROLs will be generated,
;**       ;       depending on the fastest sequence.  If the phase alignment
;**       ;       is zero, than no phase alignment code will be generated.
;**
;**       <       ror     al,1          > ;Rotate as needed
;**       <       ror     al,1          > ;Rotate as needed
;**       <       ror     al,1          > ;Rotate as needed
;**       <       ror     al,1          > ;Rotate as needed
;**       <       mov     ah,al         > ;Mask used, unused bits
;**       <       and     ax,bp         > ;(BP) = phase mask
;**       <       or      al,bh         > ;Mask in old unused bits
;**       <       mov     bh,ah         > ;Save new unused bits
;**
;**
;**       The nice thing about the above is it is possible for the fetch to
;**       degenerate into a simple LODSB instruction.
;**
;**       If this was a iAPX80286 implementation, if would be faster to
;**       make three or four rotates into a "ror al,n" instruction.
;**
;**       Currently:      BL = fbF0
;**                       BH = usMix[3]
;*/

        mov     bh,byte ptr fsBlt[1]     ; We will test BH several times
        .errnz  low BBF_TRANS
        .errnz  low BBF_ANTI_TRANS
        .errnz  low BBF_TRANS_NO_CC
        test    bl,F0_GAG_CHOKE          ; Color conversion?
        jnz     cblt_color_convert       ;   Yes, gag and choke on it

;/*
;**  No Color Conversion.
;**  If it is mono to mono transparent then we want to go filter thru the
;**  first part of the mono_to_color code just to get the
;**  tranparency stuff.  Being here means no color conversion and transparent
;**  means mono source, so if it is transparent than it is mono to mono.
;*/

        test    bh,(BBF_TRANS or BBF_ANTI_TRANS) shr 8
        jnz     cblt_gotta_transparency ; it's mono_to_mono transparency
        jmp     cblt_no_color_conversion

cblt_color_convert:
        test    bl,F0_SRC_IS_COLOR
        jnz     cblt_color_to_mono
cblt_gotta_transparency:
        jmp     cblt_mono_to_color


        subttl  Compile - Initial Byte Fetch, Color ==> Mono
        page
cblt_color_to_mono:

;/*
;**       Generate the code to go from color to mono.  Color to mono
;**       should map all colors that are background to 0's (black), and
;**       all colors which aren't background to 1's (white).
;**
;**       The generated code for bitmaps should look something like:
;**
;** - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
;**  Currently:
;**               DH = phase
;**
;** The RCR instruction (denoted with a *) in the code above is for the stepping
;** left case. If stepping right it will become a RCL instruction.
;**
;** NOTE: We expect that initialization code in the blt upper levels has already
;** accounted for the case where the destination dc image attributes foreground
;** and background MONOBITs are equal (ie: both 0 or both 1). In those cases the
;** source fetch can be ignored. The upper level therefore will have swizzled
;** the writing mode to be one of DDx or DDxn, both of which do not involve the
;** source and thus will not fall through to this code!
;*/



        mov     cx,CBLT_FETCH_8BPP_TO_MONO_LEN
        movzx   ecx,cx
        mov     cFetchCode,ecx
        mov     esi,OFFSET cblt_fetch_8bpp_to_mono
        rep     movs byte ptr [edi],byte ptr [esi]

;/*
;** get bit # in starting byte and convert to # pixels in the starting byte
;*/

        mov     dl,gl_start_bit         ; starting bit #
        inc     dl                      ; assume stepping left
        cmp     iStepDir,STEPLEFT
        je      @F                      ; we assumed correctly...
        neg     dl                      ; else adjust as though stepping right
        add     dl,8+1                  ; the +1 to adj for the STEPLEFT assume
        mov     cl,XOR_RCL_TO_RCR       ; Assembler was using ff as xor value
        xor     [edi][CBLT_FETCH_8BPP_TO_MONO_RCX],cl

@@:     sub     dh,dh                   ; zero-extend max pixels in 1st byte
        cmp     dx,word ptr cxExt                ; is > width of entire blt ?
        jbe     @F                      ; no -- = 0 or more pixels
        mov     dh,dl
        mov     dl,byte ptr cxExt       ; yes -- truncate pixels to fetch
        sub     dh,dl                   ; normalization rotation adjustment
@@:
        mov     [edi][CBLT_FETCH_8BPP_TO_MONO_COUNT],dl

        mov     dl,byte ptr ipcBkgnd[0]    ; fetch source dc background color
        mov     [edi][CBLT_FETCH_8BPP_TO_MONO_BGCOLOR],dl

        mov     cl,byte ptr ipcImageFore[0]
        shr     cl,1                    ; isolate fg mono bit in 'C'
        sbb     cl,cl                   ; replicated dest fg MONOBITs
        mov     ch,byte ptr ipcImageBack[0]
        shr     ch,1                    ; isolate bg mono bit in 'C'
        sbb     ch,ch                   ; replicated dest bg MONOBITs
        mov     [edi][CBLT_FETCH_8BPP_TO_MONO_MONOBITS],cx

        or      dh,dh                   ; only a first byte in blt ?
        jz      cblt_first_not_only     ; no...
        mov     ax,I_ROL_AL_N           ; assume stepping right
        cmp     iStepDir,STEPRIGHT
        je      @F                      ; we are stepping right...
        mov     ah,HIGH I_ROR_AL_N      ; else we are stepping left
        .errnz  LOW I_ROR_AL_N - LOW I_ROL_AL_N
@@:     stosw                           ; stuff instr. to normalize pixels
        mov     al,dh                   ; get rotate count
        stosb
cblt_first_not_only:

        or      fbMore,(F1_COLOR_MONO+F1_INNER_ONCE)
        and     fbFetch,NOT FF_TWO_INIT_FETCHES
        jmp     cblt_logical_action     ; go create logic code...


        subttl  Compile - Initial Byte Fetch, Mono ==> Color
        page

;/*
;**       The conversion is mono to color. (And it is complex)
;*/

cblt_mono_to_color:

        test    bl,F0_DEST_IS_COLOR
        jnz     cblt_mono_to_color_color; it's really a color dest...
;/*
;** Take care of the mono to mono case which filtered through here just to
;** get the transparency code added.
;*/

        mov     al,I_LODSB              ; fetch the source byte
        stosb
;/*
;** To make transparent rops needed for ImageData we need to do the phase
;** alignment before the color conversion.
;*/

        call    phase_align_generate
;/*
;**For transparency the compiled code must save the source
;**(now that it has been phase aligned) to use as a mask.  It will save
;**it in DH (where the pattern would be if there was one -- transparency
;**is being added for ImageData which does not use a pattern).
;*/

        mov     ax,I_MOV_DH_AL          ; op code for MOV DH,AL
        stosw
        test    bh,HIGH BBF_ANTI_TRANS
        jz      @F
        mov     ax,I_NOT_DH             ; op code for NOT DH
        stosw
@@:     jmp     already_phase_aligned

cblt_mono_to_color_color:

        or      fbMore,(F1_MONO_COLOR+F1_INNER_ONCE)
        and     fbFetch,NOT FF_TWO_INIT_FETCHES
;/*
;** The pattern fetch code must become part of the fetch code because
;** a pattern byte must be fetched for each destination pixel stored. We
;** will effect this on the initial byte fetch by moving up in the instance
;** data area the brush fetch code to allow the insertion of src byte fetch
;** code in addition to code for the load of a loop counter.
;*/

        sub     esi,esi                   ; assume no pattern

        test    bl,F0_PAT_PRESENT       ; only set if none or non-solid pattern
        jz      cblt_mono_to_color_no_pattern; no pattern...
;/*
;** The presence of the pattern fetch code in this initial byte fetch loop
;** requires that we also update the pattern index for each destination
;** byte processed.
;*/

        mov     al,I_MOV_AL_MEM
        stosb                           ;mov al,[xxxx]
        mov     esi,npbPatRow
        add     esi,MONO_8BPP_FETCH_LEN
        mov     npbPatRow,esi
        mov     eax,esi
        stosd
        mov     al,I_ADD_AL_BYTE_I      ; update brush index
        mov     ah,INCREASE             ; assume STEPRIGHT
        .errnz   INCREASE - 1           ; must be a 1
        .errnz   DECREASE + 1           ; must be a -1
        cmp     iStepDir,STEPRIGHT
        je      @F
        neg     ah                      ; step increment is opposite direction
@@:     stosw
        mov     ax,(I_AND_AL_BYTE_I+(7*256)); and al,BrushIndexMask
        stosw
        mov     al,I_MOV_MEM_AL
        stosb                           ;mov ss:[xxxx],bl
        mov     eax,esi
        stosd
;/*
;** Now we relocate the brush fetch code, vacating space for the initial mono
;** byte fetch, save, and normalization code.
;*/

        mov     esi,edi                   ; -> end of brush fetch code
        mov     ecx,edi                   ; -> end of brush fetch code
        sub     ecx,dl_addr_pbrush      ; now # bytes of code to relocate
        mov     eax,MONO_8BPP_FETCH_LEN  ; length of code for mono to 8 bpp fetch
        add     edi,eax                   ; -> end of relocated code
        add     dl_addr_pbrush,eax       ; adjust -> start of brush fetch code
        add     dl_addr_brush_end,eax    ; ditto -> end of brush fetch code
        add     dl_addr_pbrush_off_f,eax ; ditto -> first brush fetch offset
        add     dl_addr_pbrush_off, eax ; NewCon 8-15-89
        mov     eax,ecx                   ; save length of brush fetch code

        std                             ; work backwards
        rep     movs byte ptr[edi],byte ptr[esi]; relocate brush fetch code

;/*
;** the last byte of the move did not make it.
;** All the other logic here assumes SI and DI to be exactly where
;** the repeat move string istruction has left them.
;** This fix just moves the last byte, with out altering SI and DI.
;*/

        mov     cl, [esi]               ; NewCon 8-9-89
        mov     [edi], cl               ; NewCon 8-9-89

        cld                             ; ensure working upwards again

        mov     edi,esi                   ; DI is -> where to put src byte fetch
        mov     npFetchStart,edi         ; this is new start of fetch,logic op
        mov     esi,eax                   ; SI is # bytes to add to get to next ip
cblt_mono_to_color_no_pattern:

ifdef   FIREWALLS
        push    di                      ; save start of fetch code
        mov     cx,di                   ; here too
endif
        mov     al,I_LODSB              ; fetch a src byte of <= 8 mono pixels
        stosb
        mov     ax,I_MOV_CH_AL          ; store fetched byte in CH
        stosw
        mov     ax,I_ROL_CH_N           ; normalize src byte (assume STEPRIGHT)
        mov     dl,byte ptr xSrc
        and     dl,07h                  ; rotate left count
        cmp     iStepDir,STEPRIGHT; assumption correct ?
        je      @F                      ; yes...
        mov     ah,HIGH I_ROR_CH_N      ; no -- convert to opposite rotation
        add     dl,byte ptr cxExt       ; adjust to right end of scan + 1
        neg     dl                      ; adjust for stepping left
        and     dl,7                    ;  by making 8 - stepright count
@@:     stosw
MONO_8BPP_NORMALIZE_OFF equ     5       ; offset to rotation count for norm.
ifdef   FIREWALLS
        add     cx,MONO_8BPP_NORMALIZE_OFF
endif
        mov     al,dl
        stosb
        mov     al,I_MOV_CL_BYTE_I      ; stuff loop counter
        neg     dl                      ; 8 - rotation factor = # pixels to out
        add     dl,8
        sub     dh,dh                   ; zero-extend for compare
        cmp     dx,word ptr cxExt                ; blt width >= # pixels max to process?
        jbe     @F                      ; yes...
        mov     dl,byte ptr cxExt       ; no -- adjust pixels to process
@@:     mov     ah,dl
        stosw                           ; stuff loop count
MONO_8BPP_LOOP_COUNT_OFF equ    7       ; offset to loop count byte
MONO_8BPP_FETCH_LEN     equ     8
ifdef   FIREWALLS
        add     cx,MONO_8BPP_LOOP_COUNT_OFF - MONO_8BPP_NORMALIZE_OFF + 1
        pop     ax                      ; get back -> before fetch code
        add     ax,MONO_8BPP_FETCH_LEN  ; add in length of fetch code
endif
        push    edi                      ; save -> top of loop (will be popped
                                        ; off in the store logic)
        add     edi,esi                   ; get back -> next addr to build code

;/*
;** The code has now been altered so that it looks somewhat like this:
;**
;**       <mono byte fetch, store, normalization>
;**  loop:<brush fetch>
;**       <brush index update>
;**
;** We have to add code in the loop which pulls mono pixels out of the mono
;** byte fetched, and converts it to either a foreground and/or background
;** color index. This must precede the insertion of the logical action code.
;** There are three cases:
;**
;**  no transparency:
;**  ----------------
;**       mov     al,<fgcolor>            ; assume a 1 bit in mono src
;**       ro{l,r} ch,1                    ; get next mono pixel to carry
;**       jc      @F                      ; is fg pixel...
;**       mov     bl,<bgcolor>            ; else we load a bg pixel index
;**   @@:
;**
;**  transparency:
;**  -------------
;**       ro{l,r} ch,1                    ; get next mono pixel to carry
;**       jnc     @F                      ; skip <logic op>,<store> if transparent
;**       mov     al,<fgcolor>            ; else load fg pixel index
;**
;**  anti-transparency:
;**  ------------------
;**       ro{l,r} ch,1                    ; get next mono pixel to carry
;**       jc      @F                      ; skip <logic op>,<store> if transparent
;**       mov     al,<bgcolor>            ; else load bg pixel index
;*/

        mov     dx,ipcImageBoth ; DH = bgcolor, DL = fgcolor
        mov     cl,iStepDir     ; cache step direction
        mov     ch,byte ptr fsBlt[1]
        .errnz  BBF_TRANS and 255
        .errnz  BBF_ANTI_TRANS and 255
        test    ch,HIGH (BBF_TRANS or BBF_ANTI_TRANS)
        jnz     cblt_mono_to_color_trans; do transparency stuff...

;/*
;** The no transparency case --
;*/

        mov     al,I_MOV_AL_BYTE_I      ; MOV AL,<fgcolor>
        mov     ah,dl
        stosw
        mov     ax,I_ROL_CH_1           ; fetch mono pixel (assume STEPRIGHT)
        cmp     cl,INCREASE             ; is that right ?
        je      @F                      ; yes...
        mov     ah,HIGH I_ROR_CH_1      ; no -- rotate the other direction
        .errnz  LOW I_ROL_CH_1 - LOW I_ROR_CH_1
@@:     stosw
        mov     ax,I_JC_P4H             ; JC $+4
        stosw
        mov     al,I_MOV_AL_BYTE_I      ; MOV AL,<bgcolor>
        mov     ah,dh
        stosw
        jmp     cblt_logical_action

cblt_mono_to_color_trans:
;/*
;** Swizzle colors in case we are doing transparency without color conversion
;** (for ImageData dual pass blts).
;*/

        .errnz  LOW BBF_TRANS_NO_CC
        test    ch,HIGH BBF_TRANS_NO_CC
        jz      @F
        mov     dx,0ffffh
@@:

;/*
;** The transparency or anti-transparency case --
;*/

        mov     ax,I_ROL_CH_1           ; fetch mono pixel (assume STEPRIGHT)
        cmp     cl,INCREASE             ; is that right ?
        je      @F                      ; yes...
        mov     ah,HIGH I_ROR_CH_1      ; no -- rotate the other direction
        .errnz  LOW I_ROL_CH_1 - LOW I_ROR_CH_1
@@:     stosw
        mov     al,I_JNC                 ; assume transparency case
        test    ch,HIGH BBF_TRANS
        jnz     @F                      ; assumption was correct...
        mov     al,I_JC                ; was anti-transparency case
        mov     dl,dh                   ; so will be using bgcolor
@@:     stosw                           ; offset is filled in at <store> time
        push    edi                      ; save -> beyond where offset lives
        mov     al,I_MOV_AL_BYTE_I      ; pixel not transparent
        mov     ah,dl                   ; fetch color to load
        stosw
        jmp     short cblt_logical_action

;/*
;**       There is no conversion. We are going mono to mono or color to
;**       color.
;*/

cblt_no_color_conversion:


;/*
;** Just need to generate the normal fetch sequence (lodsb)
;*/

cblt_fetch_mem:
        mov     cFetchCode,1    ; size of normal fetch sequence
        mov     al,I_LODSB              ; generate source fetch
        stosb

cblt_phase_align:
        call    phase_align_generate

already_phase_aligned:
        test    fbFetch,FF_TWO_INIT_FETCHES   ; Generate another fetch?
        jz      cblt_logical_action                  ;  No

; A second fetch needs to be stuffed.  Copy the one just created.

        mov     esi,edi                   ;Get start of fetch logic
        xchg    esi,npFetchStart         ;Set new start, get old
        mov     ecx,edi                   ;Compute how long fetch is
        sub     ecx,esi                   ;  and move the bytes
        rep     movs byte ptr [edi],byte ptr [esi]


        subttl  Compile - ROP Generation
        page
;/*
;**       Create the logic action code
;**
;**       The given ROP will be converted into the actual code that
;**       performs the ROP.
;*/

cblt_logical_action:

;/*
;** Compare bkmix for BM_SRCTRANSPARENT or BM_DESTTRANSPARENT.
;** If either bkmix is required, emit correct template DCR37
;*/
        cmp     bkmix,BM_SRCTRANSPARENT
        jnz     not_srctransparent
        mov     esi,offset src_transparent_template
        mov     src_transparent_stack,edi        ;Save start address of template
        mov     ecx,SRC_TRANSPARENT_TEMPLATE_LENGTH
        rep     movsb                            ;Move the template
        mov     cl,bkclr                         ;Add color to template
        mov     byte ptr[edi+SRC_TRANSPARENT_TEMPLATE_BKCLR],cl
        cmp     iStepDir,STEPRIGHT               ;Check direction
        jnz     @f
        mov     cl,I_INC_EDI                     ;Correct direction of template
        mov     byte ptr[edi+SRC_TRANSPARENT_TEMPLATE_DIRECTION],cl
@@:
        jmp     bkmix_done
not_srctransparent:
        cmp     bkmix,BM_DESTTRANSPARENT
        jnz     bkmix_done
        mov     esi,offset dest_transparent_template
        mov     dest_transparent_stack,edi       ;Save start address of template
        mov     ecx,DEST_TRANSPARENT_TEMPLATE_LENGTH
        rep     movsb                            ;Move the template
        mov     cl,bkclr                         ;Add color to template
        mov     byte ptr[edi+DEST_TRANSPARENT_TEMPLATE_BKCLR],cl
        cmp     iStepDir,STEPRIGHT               ;Check direction
        jnz     @f
        mov     cl,I_INC_EDI                     ;Correct direction of template
        mov     byte ptr[edi+DEST_TRANSPARENT_TEMPLATE_DIRECTION],cl
@@:

bkmix_done:


        mov     eax,usMixData            ; get rop data

;/*
;** The special gray rop will erroneously lead to Color Pat Fetch.
;** It should be mono fetch of the transparency mask.
;*/

        test    fsBlt,BBF_GRAY_ROP
        jz      @F                      ; cblt_not_special_gray_rop

        mov     cx,LENGTH_GRAY_ROP_TEMPLATE_MONO
        mov     esi,OFFSET gray_rop_template_mono

        test    bl,F0_DEST_IS_COLOR
        jz      cblt_copy_template

        mov     cx,LENGTH_GRAY_ROP_TEMPLATE_8BPP
        mov     esi,OFFSET gray_rop_template_8bpp
        jmp     short   cblt_copy_template
@@:


        xchg    ah,al                   ; swap rop hi/lo bytes
        mov     si,ax                   ; get count of number of bits to move
        movzx   esi,si
        and     si,HIGH ROPLength
        shr     si,2
        .errnz   ROPLength - 0001110000000000b
        mov     cl,byte ptr ropcode[esi]; get length into cx
        xor     ch,ch

        xchg    ah,al                   ; restore rop hi/lo bytes
        mov     si,ax                   ; get offset of the template
        and     si,ROPOffset
        jz      cblt_srccopy            ; source copy
        lea     esi,ropcode[esi]       ; --> the template
cblt_copy_template:
        rep     movsb                   ; Move the template
cblt_srccopy:

        or      ah,ah                   ; Generate a negate?
        .errnz  (NEGATE_NEEDED AND 8000h) - 8000h
        jns     cblt_no_NOT             ;  No
        mov     ax,I_NOT_AL
        stosw
cblt_no_NOT:

;/*
;** Compare bkmix for BM_SRCTRANSPARENT or BM_DESTTRANSPARENT.
;** If either bkmix is required, add jump offset to template on the stack
;** to jump after the destination byte is output.
;** (edi is updated instead) DCR37
;*/

        cmp     bkmix,BM_SRCTRANSPARENT
        jnz     @f
        mov     ecx,edi                                        ;Current destination
        mov     eax,src_transparent_stack                      ;Start of template on stack
        sub     ecx,eax                                        ;Sub start from current
        sub     ecx,SRC_TRANSPARENT_TEMPLATE_LENGTH-1          ;Sub template length
        mov     byte ptr[eax+SRC_TRANSPARENT_TEMPLATE_JUMP],cl ;Jump offset
        jmp     bkmix_known
@@:
        cmp     bkmix,BM_DESTTRANSPARENT
        jnz     bkmix_known
        mov     ecx,edi                                        ;Current destination
        mov     eax,dest_transparent_stack                     ;Start of template on stack
        sub     ecx,eax                                        ;Sub start from current
        sub     ecx,DEST_TRANSPARENT_TEMPLATE_LENGTH-1         ;Sub template length
        mov     byte ptr[eax+DEST_TRANSPARENT_TEMPLATE_JUMP],cl;Jump offset
bkmix_known:



;/*
;** Add transparency mask if needed:
;*/

        test    fsBlt,BBF_TRANS or BBF_ANTI_TRANS
        jz      cblt_transparency_not_needed
        test    bl,F0_DEST_IS_COLOR
        jnz     cblt_transparency_not_needed

        mov     cx,LENGTH_TRANSPARENCY_TEMPLATE_MONO
        mov     esi,OFFSET transparency_template_mono
        rep     movsb
cblt_transparency_not_needed:

        mov     npFetchEnd,edi           ; Save end of fetch/logic operation

        subttl  Compile - Mask And Save
        page
;/*
;**       Generate code to mask and save the result.  If the destination
;**       isn't in a register, it will be loaded from ES:[DI] first.  The
;**       mask operation will then be performed, and the result stored.
;*/

        test    bl,F0_DEST_IS_COLOR     ; color destination?
        jnz @f
        jmp      cblt_mask_store_needed ; no...
@@:
        mov     al,I_STOSB              ; assume color to color or mono to
                                        ; to color with no transparency

        test    bl,F0_GAG_CHOKE         ; converting mono to color ?
        jnz     mono_to_color1          ; yes...
;/*
;** Color to color blt just requires a straight 8bpp stuff. We don't yet
;** handle the 4bpp case.
;*/

        stosb
        jmp     cblt_no_mask_store_needed
;/*
;** Mono to color conversion requires a more complex store operation because
;** of the looping logic required.
;*/

mono_to_color1:
        test    fsBlt,BBF_TRANS or BBF_ANTI_TRANS
        jnz     @F                      ; handle transparency below...
        stosb                           ; store pixel in non-transparent case
        jmp     short cblt_store_loop_logic; continue with looping logic...

@@:
        mov     ax,I_MOV_DEST_AL
        stosw
        pop     esi                      ; -> 1 beyond JC or JNC instruction
        mov     eax,edi
        sub     eax,esi                   ; now offset to the INC DI
        mov     [esi][-1],al          ; stuff relative jump offset
        mov     al,I_INC_EDI             ; increment destination pointer
        cmp     iDir, INCREASE          ;               We must check
        je      @f                      ;               for the possibility
        mov     al,I_DEC_EDI            ;               of stepping backwards.
@@:                                     ;          
        stosb

cblt_store_loop_logic:

        mov     ax,I_DEC_CL             ; decrement pixel in byte counter
        stosw
        pop     eax                      ; -> top of loop (pushed in fetch code)
        sub     eax,edi                   ; calculate # bytes back from current ->
        sub     al,2                    ; account for JNZ instruction
        mov     ah,al
        mov     al,I_JNZ                ; jump back to top of loop if more
        stosw
        mov     npFetchEnd,edi           ; fetch,logic stuff includes all now
        mov     eax,edi                   ; calculate fetch code size
        sub     eax,npFetchStart
        mov     cFetchCode,eax   ; and store for last byte processing
        jmp     short cblt_no_mask_store_needed
;/*
;** Mono destinations require that the result of the fetch,logic operation
;** be masked, with unused portions being pipelined to ensuing operations, and
;** only used portions being combined with the current destination.
;*/

cblt_mask_store_needed:

        mov     ax,I_MOV_AH_DEST
        stosw
        mov     esi,OFFSET masked_store_mono; add masked store template
        movsd
        movsd
        .errnz  MASKED_STORE_LEN_MONO - 8; must be six bytes long
        mov     ax,mStart       ; stuff start mask into the template
        xchg    ah,al
        movzx   eax,ax
        mov     MASKED_STORE_MASK_MONO[edi],eax

cblt_no_mask_store_needed:

        mov     npFetchEnds,edi        ; Save end of fetch/logic/store operation


        subttl  Compile - Inner Loop Generation
        page
;/*
;**       Now for the hard stuff; The inner loop (said with a "gasp!").
;**
;**       If there is no innerloop, then no code will be generated
;**       (now that's fast!).
;*/

cblt_5000:
        mov     edx,cInnerByte   ; Get the loop count
        or      dx,dx                   ; If the count is null
        jz      cblt_5080               ;  don't generate any code.

;/*
;**       We have something for a loop count.  If this just happens to be
;**       a source copy (S) with a phase of zero, then the innerloop degenerates
;**       to a repeated MOVSB instruction.  This little special case is
;**       worth checking for and handling!
;**
;**       Also, if this is one of the special cases {P, Pn, DDx, DDxn}, then it
;**       will also be special cased since these are all pattern fills (pattern,
;**       not pattern, 0, 1).
;**
;**       The same code can be shared for these routines, with the exception
;**       that patterns use a STOSx instruction instead of a MOVSx instruction
;**       and need a value loaded in AX
;**
;**       For the special cases {P, Pn, DDx, DDxn}, color conversion is
;**       not possible, so ignore it for them.
;*/

        mov     bh,byte ptr usMix       ; Get the raster op

        cmp     bh,BB_ROP_S
        je      cblt_rop_s

        mov     ax,((0ffh shl 8) + I_MOV_AX_WORD_I); assume all 1s fill
        cmp     bh,BB_ROP_DDxn
        je      @F

        not     ah                      ; assumes all 0s fill
        cmp     bh,BB_ROP_DDx
        jne     cblt_not_0s_1s

@@:     stosb                           ; MOV AX,{0ffffffffh,000000000h}
        mov     al,ah
        push    bx
        mov     bx,ax
        shl     eax,16
        mov     ax,bx
        pop     bx
        stosd
        mov     si,I_STOSB              ; set up for repeated code processor

        jmp     short cblt_can_use_rep

cblt_not_0s_1s:

        cmp     bh,BB_ROP_P
        je      @F                      ; can be special-cased
        cmp     bh,BB_ROP_Pn
        jne     cblt_cannot_rep         ; cannot special case it
@@:     test    bl,F0_COLOR_PAT         ; using color pattern fetch ?
        jnz     cblt_cannot_rep         ; yes -- then cannot use fast code...
        mov     si,I_STOSB              ; Set up for repeated code processor

        mov     ax,I_MOV_AL_DH          ; fetch pattern byte
        stosw
        mov     ax,I_MOV_AH_AL          ; replicate it in AX
        stosw

        cmp     bh,BB_ROP_P             ; writing just pattern
        je      cblt_can_use_rep

        mov     ax,I_NOT_AX             ; writing inverse pattern
        stosw
        jmp     short cblt_can_use_rep


cblt_rop_s:

;
;/*
;** Several circumstances prevent us from utilizing the rep code. These are:
;**
;**       a) Device source (ie: we set up the hardware in byte mode which
;**          means that insb with or without rep will not work properly).
;**
;**       b) Color conversion because too much other      must go on for us
;**          to be able to use the more efficient code.
;**
;**       c) Nonzero phase because masking must occur.
;*/

        test    bl,(F0_SRC_IS_DEV or F0_GAG_CHOKE); first two conditions?
        jnz     cblt_cannot_rep         ; either one gets us out of here...
        cmp     iHorzPhase,0            ; is horizontal phase zero?
        jne     cblt_cannot_rep         ; no -- can't condense source copy...

; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Compare bkmix for BM_SRCTRANSPARENT or BM_DESTTRANSPARENT.            ;
; If either bkmix is required, cannot use REP MOVSB. DCR37              ;
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
        cmp     bkmix,BM_SRCTRANSPARENT
        jnz     @f
        jmp     cblt_cannot_rep
@@:
        cmp     bkmix,BM_DESTTRANSPARENT
        jnz     @f
        jmp     cblt_cannot_rep
@@:

        mov     si,I_MOVSB              ; Set register for moving bytes

;/*
;**       This is a source copy or pattern fill.  Process an odd byte with
;**       a MOVSB or STOSB, then process the rest of the bytes with a REP
;**       MOVSW or a REP STOSW.  If the REP isn't needed, leave it out.
;**
;**       Don't get caught on this like I did!  If the direction of the
;**       BLT is from right to left (decrementing addresses), then both
;**       the source and destination pointers must be decremented by one
;**       so that the next two bytes are processed, not the next byte and
;**       the byte just processed.  Also, after all words have been processed,
;**       the source and destination pointers must be incremented by one to
;**       point to the last byte (since the last MOVSW or STOSW would have
;**       decremented both pointers by 2).
;**
;**       If the target machine is an 8086, then it would be well worth the
;**       extra logic to align the fields on word boundaries before the MOVSxs
;**       if at all possible.
;**
;**       The generated code should look something like:
;**
;**       WARP8:                               ;This code for moving left to right
;**               movsb                        ;Process an odd byte
;**               ld      cx,cInnerByte/2 ;Set word count
;**               rep                          ;If a count, then repeat is needed
;**               movsw                        ;Move words until done
;**
;**
;**       WARP8:                               ;This code for moving left to right
;**               movsb                        ;Process an odd byte
;**               dec     si                   ;adjust pointer for moving words
;**               dec     di
;**               ld      cx,cInnerByte/2 ;Set word count
;**               rep                          ;If a count, then repeat is needed
;**               movsw                        ;Move words until done
;**               inc     si                   ;adjust since words were moved
;**               inc     di
;**
;**
;**       Of course, if any part of the above routine isn't needed, it isn't
;**       generated (i.e. the generated code might just be a single MOVSB)
;*/

cblt_can_use_rep:
        or      dx,dx
cblt_5080:                    ; This is being used as a double jmp pt for jz's
        jz      cblt_5140               ;No more bytes to move
cblt_5100:
        cmp     dx,1                    ;Move one word or many words?
        jz      cblt_5120               ;  Only one word
        mov     al,I_MOV_ECX_DWORD_I      ;  Many words, load count
        stosb
        movzx   eax,dx                  ;CMVC_49407 @DMS
        stosd
        mov     al,I_REP                ;  and a repeat instruction
        stosb
cblt_5120:
        mov     ax,si                   ;Set the word instruction
        stosb
        .errnz   I_MOVSW-I_MOVSB-1       ;The word form of the instruction
        .errnz   I_STOSW-I_STOSB-1       ;  must be the byte form + 1

cblt_no_inner_loop_code:
cblt_5140:
        jmp     cblt_inner_loop_done    ; Done setting up the innerloop

        page
;/*
;**       There is some count for the innerloop of the BLT.  Generate the
;**       required BLT. Two or four copies of the BLT will be placed on the
;**       stack.   This allows the LOOP instruction at the end to be distributed
;**       over two or four bytes instead of 1, saving 11 or 12 clocks for each
;**       byte (for 4).  Multiply 12 clocks by ~ 16K and you save a lot of
;**       clocks!
;**
;**       If there are less than four (two) bytes to be BLTed, then no looping
;**       instructions will be generated.  If there are more than four (two)
;**       bytes, then there is the possibility of an initial jump instruction
;**       to enter the loop to handle the modulo n result of the loop count.
;**
;**       The innerloop code will look something like:
;**
;**
;**       <       mov     cx,loopcount/n> ;load count if >n innerloop bytes
;**       <       jmp     short ???     > ;If a first jump is needed, do one
;**
;**       BLTloop:
;**               replicate initial byte BLT code up to n times
;**
;**       <       loop    BLTloop >       ;Loop until all bytes processed
;*/


cblt_cannot_rep:

; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
; If inner loop once code, use CX as loop counter but must push it for
; ensuing pop also.

        mov     bh,fbMore       ; will be used a few times
        test    bh,F1_INNER_ONCE        ; only 1 fetch copy in loop ?
        jz      @F                      ; no...

        mov     al,I_MOV_ECX_DWORD_I    ; store loop count in CX
        stosb
        mov     ax,dx                   ; fetch inner loop byte count
        movzx   eax,ax
        stosd

        mov     edx,edi                   ; save offset of loop start

        mov     al,I_PUSH_ECX            ; save loop count
        stosb
@@:


;/*
;** If fetching a non-solid color pattern then a pattern fetch must occur
;** for each byte processed.
;*/

        test    bl,F0_PAT_PRESENT       ; is pattern being used ?
        jz      cblt_inner_no_pat       ; no...
        test    bl,F0_COLOR_PAT         ; color pattern fetch ?
        jz      cblt_inner_no_pat       ; no...
        test    bh,F1_MONO_COLOR        ; converting mono to color?
        jnz     cblt_inner_no_pat       ; yes -- ignore this code...

        inc     edi                      ; point to where brush offset is
        mov     dl_addr_pbrush_off_m,edi ; store for later reference
        dec     edi                      ; restore current code offset

        mov     eax,npbPatRow    ; -> the 7 of 7[BX]
        mov     esi,dl_addr_pbrush       ; fetch brush code from here
        sub     eax,esi                   ; offset to the index
        add     eax,edi                   ; now -> where index will live

        mov     ecx,dl_addr_brush_end    ; calculate # bytes to move
        sub     ecx,esi
        rep     movs byte ptr [edi],byte ptr [esi]

        mov     esi,eax                   ; point to index
        mov     ch,iDir         ; set brush index
        add     byte ptr [esi],ch     ; point to next brush byte
        .errnz   INCREASE - 1           ; must be a 1
        .errnz   DECREASE + 1           ; must be a -1
        and     byte ptr [esi],7      ; keep mod 8
        mov     al,[esi]
        mov     brush_index_m,al

        mov     al,I_MOV_AL_MEM
        stosb                           ;mov al,ss:[xxxx]
        mov     eax,esi
        stosd
        mov     al,I_ADD_AL_BYTE_I
        mov     ah,ch                   ; set brush index
        .errnz   INCREASE - 1           ; must be a 1
        .errnz   DECREASE + 1           ; must be a -1
        stosw
        mov     ax,(I_AND_AL_BYTE_I+(7*256)); and al,BrushIndexMask
        stosw
        mov     al,I_MOV_MEM_AL
        stosb                           ;mov ss:[xxxx],al
        mov     eax,esi
        stosd
cblt_inner_no_pat:


;/*
;** Certain circumstances (ie: color conversion, color pattern fetch)
;** are too unwieldy to replicate in the inner loop, because of number
;** of fixups and/or size of fetch code. These cases will be isolated
;** here and the inner loop will be just one copy.
;*/

        test    bh,F1_INNER_ONCE        ; 1 inner loop copy ?
        jnz @f
        jmp      cblt_inner_multiple    ; no...
@@:
        mov     ecx,npFetchEnd           ; get size of the fetch code
        mov     esi,npFetchStart         ; and load -> to it also
        sub     ecx,esi
        mov     eax,edi                   ; keep copy to start of fetch
        rep     movs byte ptr [edi],byte ptr [esi]

        test    bh,F1_COLOR_MONO
        jz      cblt_inner_not_color_mono

        mov     esi,eax                   ; ES:SI is -> fetch code start

@@:
        push    eax
        mov     al,8
        mov     byte ptr[esi][POS_CBLT_FETCH_8BPP_TO_MONO_COUNT],al 
        pop     eax

cblt_inner_not_color_mono:

        test    bh,F1_MONO_COLOR        ; converting mono to color ?
        jz      cblt_inner_not_mono_color; no...

        mov     esi,eax                   ; ES:SI is -> fetch code start

        test    bl,F0_PAT_PRESENT       ; pattern to account for ?
        jz      @F                      ; no...
        mov     ch,[esi][MONO_8BPP_LOOP_COUNT_OFF]; # initial pixels
        mov     eax,npbPatRow    ; -> initial brush index -
        sub     eax,dl_addr_pbrush       ; -> brush start is index offset;
        add     esi,eax                   ; offset to it less fetch code
        mov     cl,[esi][MONO_8BPP_FETCH_LEN]; fetch brush index
        add     cl,ch                   ; update brush index
        and     cl,SIZE_PATTERN-1       ; keep mod SIZE_PATTERN
        mov     [esi][MONO_8BPP_FETCH_LEN],cl; store new brush index
        lea     ecx,[esi][MONO_8BPP_FETCH_LEN]

        ; NewCon 8-12-89
        ; dl_addr_pbrush_off_m should point to the offset of the brush
        ; in the code, not the offset into the brush for a specific pixel

        sub     ecx, 6

        mov     dl_addr_pbrush_off_m,ecx ; save -> inner loop brush index;
        sub     esi,eax                   ; rewind fetch code start ->
@@:

        mov     byte ptr[esi][MONO_8BPP_NORMALIZE_OFF],0
        mov     byte ptr[esi][MONO_8BPP_LOOP_COUNT_OFF],8; full bytes
        jmp     short recover_fetch     ; skip store logic...
cblt_inner_not_mono_color:
        mov     al,I_STOSB              ; store logic
        stosb
recover_fetch:
        mov     al,I_POP_ECX            ; recover fetch counter
        stosb

        mov     esi,edx                   ; SI is -> start of loop
        jmp     cblt_5590               ; to common code below...
cblt_inner_multiple:


        mov     ebx,npFetchEnd           ; compute size of the fetch code
        sub     ebx,npFetchStart
        inc     ebx                      ; a stosb will be appended
        mov     esi,4                    ; assume replication 4 times
        mov     cl,2                    ;  (shift count two bits left)
        cmp     ebx,32                   ; small enough for 4 times?
        jc      cblt_5520               ;  yes, replicate 4 times
        shr     esi,1                    ;  no,  replicate 2 times
        dec     cx

cblt_5520:
        cmp     edx,esi                   ;Generate a loop?
        jle     cblt_5540               ;  No, just copy code
        mov     al,I_MOV_ECX_DWORD_I
        stosb                           ;mov cx,loopcount/n
        mov     eax,edx                   ;Compute loop count
        shr     eax,cl
        stosd
        shl     eax,cl                   ;See if loopcount MOD n is 0
        sub     eax,edx
        jz      cblt_5540               ;Zero, no odd count to handle

;/*
;**       There is an odd portion of bytes to be processed.  Increment
;**       the loop counter for the odd pass through the loop and then
;**       compute the displacement for entering the loop.
;**
;**       To compute the displacement, subtract the number of odd bytes
;**       from the modulus being used  (i.e. 4-3=1).  This gives the
;**       number of bytes to skip over the first time through the loop.
;**
;**       Multiply this by the number of bytes for a logic sequence,
;**       and the result will be the displacement for the jump.
;*/

        inc     word ptr [edi-4]      ;Not zero, adjust for partial loop
        add     eax,esi                   ;Compute where to enter the loop at
        mul     bl
        mov     ecx,eax
        mov     al,I_JMP_NEAR           ;Stuff jump instruction
        stosb
        mov     eax,ecx                   ;Stuff displacement for jump
        stosd
;/*
;**       Currently:      DX = loop count
;**                       SI = loop modulus
;**                       BX = size of one logic operation
;**                       DI --> next location in the loop
;*/

cblt_5540:
        mov     ecx,ebx                   ;Set move count
        mov     ebx,edx                   ;Set maximum for move
        cmp     ebx,esi                   ;Is the max > what's left?
        jle     cblt_5560               ;  No, just use what's left
        mov     ebx,esi                   ;  Yes, copy the max

cblt_5560:
        sub     edx,esi                   ;If dx > 0, then loop logic needed
        mov     esi,npFetchStart         ;--> fetch code to copy
        mov     eax,ecx                   ;Save a copy of fetch length
        rep     movs byte ptr [edi],byte ptr [esi];Move fetch code and stuff stosb
        mov     esi,edi                   ;--> new source (and top of loop)
        sub     esi,eax
        mov     byte ptr [edi-1],I_STOSB
        dec     bl                      ;One copy has been made
        mul     bl                      ;Compute # bytes left to move
        mov     ecx,eax                   ;Set move count
        rep     movs byte ptr [edi],byte ptr [esi];Move fetch code and stuff stosb
        sub     esi,eax                   ;Restore pointer to start of loop
;/*
;**       The innermost BLT code has been created and needs the looping
;**       logic added to it.  If there is any looping to be done, then
;**       generate the loop code.  The code within the innerloop may be
;**       greater than 126 bytes, so a LOOP instruction may not be used
;**       in this case.
;**
;**       Currently:
;**               DX      =       # bytes remaining to process
;**               SI      =       -> start of loop
;**               DI      =       -> next place to compile code
;*/

cblt_5580:
        or      edx,edx                   ; need a loop?
        jle     cblt_inner_loop_done    ;   no, don't generate one

cblt_5590:
        mov     eax,esi                   ; compute offset of loop
        sub     eax,edi
        cmp     eax,-125                 ; can this be a short label?
        jc      cblt_5600               ;   no, must make it a near jmp

        sub     al,2                    ; bias offset by length of LOOP inst.
        mov     ah,al
        mov     al,I_LOOP
        stosw                           ; set the loop instruction
        jmp     short cblt_inner_loop_done; go process the last byte code

cblt_5600:
        mov     esi,OFFSET jmp_cx_nz ; move in the dec CX jnz code
        movsd
        movsb
        .errnz   JMP_CX_NZ_LEN-5         ; must be four bytes long
        sub     eax,JMP_CX_NZ_LEN + 4                    ; adjust jump bias
        stosd                           ;  and store it into jump

cblt_inner_loop_done:


        subttl  Compile - Last Byte Processing
        page
;/*
;**       All the innerloop stuff has been processed.  Now generate the code for
;**       the final byte if there is one.  This code is almost identical to the
;**       code for the first byte except there will only be one fetch (if a
;**       fetch is needed at all).
;**
;**       The code generated will look something like:
;**
;**       <       fetch           >       ; Get source byte
;**       <       align           >       ; Align source if needed
;**               action                  ; Perform desired action
;**               mask and store
;*/

        mov     bh,fbF0         ; will be used a lot
        mov     bl,fbMore       ; ditto

last_byte_processing:                    ;!!!GSS        debug

        mov     dx,mLast                ; get last byte mask
        or      dl,dl                   ; is there a last byte to be processed?
        jnz     @f
        jmp     cblt_no_last_byte       ; no...
@@:

        mov     cl,bh                                   
        and     cl,F0_DEST_IS_COLOR or F0_SRC_IS_COLOR  
        xor     cl,F0_DEST_IS_COLOR or F0_SRC_IS_COLOR  
        jnz     @f
        jmp     cblt_no_last_byte       
@@:
;/*
;** A non-solid color pattern fetch must occur for last byte processed.
;*/


        test    bh,F0_PAT_PRESENT       ; is pattern being used ?
        jz      cblt_last_no_pat        ; no...
        test    bh,F0_COLOR_PAT         ; color pattern fetch ?
        jz      cblt_last_no_pat        ; no...
        test    bl,F1_MONO_COLOR        ; color converting?
        jnz     cblt_last_no_pat        ; yes -- don't update brush yet

        mov     eax, dl_addr_pbrush_off
        sub     eax, dl_addr_pbrush
        add     eax, edi
        inc     eax
        mov     dl_addr_pbrush_off_l, eax
        mov     eax,npbPatRow    ; -> the 7 of 7[BX]
        mov     esi,dl_addr_pbrush       ; fetch brush code from here
        sub     eax,esi                   ; offset to the index
        add     eax,edi                   ; now -> where index will live

        mov     ecx,dl_addr_brush_end    ; calculate # bytes to move
        sub     ecx,esi
        rep     movs byte ptr [edi],byte ptr [esi]

        mov     esi,eax                   ; ES:SI is -> index
        mov     al,byte ptr [esi]     ; fetch starting index
        add     al,byte ptr cInnerByte; offset by bytes output
        and     al,7                    ; keep mod pattern size
        mov     byte ptr [esi],al     ; store back
        mov     brush_index_l,al
cblt_last_no_pat:


        mov     esi,npFetchStart         ; -> start of fetch,logic code
        test    bh,F0_SRC_PRESENT       ; was there a source ?
        jnz     @F                      ; yes...
cblt_was_no_fetch_relay:
        jmp     cblt_was_no_fetch       ; no fetch if no source...
@@:

        mov     ecx,cFetchCode   ; size of the fetch code only

        test    bl,F1_COLOR_CONVERSION  ; test for color conversion
        jnz     cblt_include_fetch      ; yes -- always include the fetch...
        test    fbFetch,FF_NO_LAST_FETCH
        jz      cblt_include_fetch

        add     esi,ecx                   ; assume skipping fetch
        cmp     iHorzPhase,0            ; phase zero case is not combined
                                        ; into innerloop as it should be.
                                        ; if the final byte is full then we
                                        ; better not remove the lodsb ( i.e.
        jne     cblt_was_no_fetch_relay ; 0-0=0 would make us think we could)
        sub     esi,ecx                   ; will be including fetch

cblt_include_fetch:

        mov     eax,edi                   ; save -> start of fetch code
        rep     movs byte ptr[edi],byte ptr[esi]; copy fetch code


;/*
;** If color converting from mono to color we have some fixups to do.
;*/

        test    bl,F1_MONO_COLOR        ; converting mono -> color ?
        jz      cblt_last_not_mono_color; no...

        mov     esi,eax                   ; ES:SI is -> last byte fetch

        test    bh,F0_PAT_PRESENT       ; pattern to account for ?
        jz      @F                      ; no...
        mov     ch,[esi][MONO_8BPP_LOOP_COUNT_OFF]; # initial pixels
        mov     cl,byte ptr cInnerByte; # inner loop bytes
        shl     cl,3                    ; now # inner loop pixels
        add     ch,cl                   ; now # pixels up to last byte
        mov     eax,npbPatRow    ; -> initial brush index -
        sub     eax,dl_addr_pbrush       ; -> brush start is index offset
        add     esi,eax                   ; offset to it less fetch code
        mov     cl,[esi][MONO_8BPP_FETCH_LEN]; fetch brush index
        add     cl,ch                   ; update brush index
        and     cl,SIZE_PATTERN-1       ; keep mod SIZE_PATTERN
        mov     [esi][MONO_8BPP_FETCH_LEN],cl; store new brush index
        lea     ecx,[esi][MONO_8BPP_FETCH_LEN]
;/*
;** dl_addr_pbrush_off_m should point to the offset of the brush
;** in the code, not the offset into the brush for a specific pixel
;*/

        sub     ecx, 6
        mov     dl_addr_pbrush_off_l,ecx ; save -> last byte brush index
        sub     esi,eax                   ; rewind fetch code start ->
@@:

        mov     byte ptr[esi][MONO_8BPP_NORMALIZE_OFF],0
        mov     dl,byte ptr xSrc        ; assume stepping left
        and     dl,7                    ; get pixels in left end
        neg     dl
        add     dl,8
        cmp     iStepDir,STEPLEFT; step left correct ?
        je      @F                      ; yes...
        mov     dl,byte ptr xSrc        ; get pixels in right end byte
        add     dl,byte ptr cxExt       ; now last pixel addr + 1
        dec     dl                      ; now just last pixel addr
        and     dl,7                    ; keep mod 8
        inc     dl                      ; absolute # pixels
@@:     mov     byte ptr[esi][MONO_8BPP_LOOP_COUNT_OFF],dl

        jmp     short cblt_last_done    ; skip remainder...
cblt_last_not_mono_color:



;/*
;** If color converting from color to mono we have some fixups to do.
;*/

        test    bl,F1_COLOR_MONO        ; converting color -> mono ?
        jz      cblt_last_not_color_mono; no...



;/*
;** get bit # in last byte and convert to # pixels in the last byte
;*/

        mov     bl,gl_last_bit          ; last bit #
        inc     bl                      ; assume stepping right
        cmp     iStepDir,STEPRIGHT
        je      @F                      ; we assumed correctly...
        neg     bl                      ; else adjust for stepping left
        add     bl,8+1                  ; the +1 to adj for the
@@:                                     ;       STEPRIGHT assume

        sub     eax,eax                   ; assume src is bitmap

@@:     sub     edi,eax                   ; adjust for XCHG SI,DX ?
        mov     [edi][CBLT_FETCH_8BPP_TO_MONO_COUNT],bl
        add     edi,eax                   ; reverse adj. for XCHG SI,DX ?
cblt_last_color_mono:

        neg     bl                      ; 8 - bit # = normalization rot
        add     bl,8
        mov     ax,I_ROL_AL_N           ; assume norm. to left end byte
        cmp     iStepDir,STEPRIGHT
        je      @F                      ; stepping right...
        mov     ah,HIGH I_ROR_AL_N      ; must norm. to right end byte
        .errnz  LOW I_ROR_AL_N - LOW I_ROL_AL_N
@@:     stosw
        mov     al,bl                   ; normalization rotation count
        stosb
cblt_last_not_color_mono:


cblt_was_no_fetch:

        mov     ecx,npFetchEnds          ; calculate bytes in logic,store code
        sub     ecx,npFetchStart
        sub     ecx,cFetchCode
        rep     movs byte ptr[edi],byte ptr[esi]; copy logic,store code
        mov     MASKED_STORE_MASK_MONO[edi],dx; stuff last byte mask
cblt_8bpp_to_8bpp:           ;!!!GS   Do we want the above instruction if
cblt_last_done:              ;!!!GS   the dest is color? (but src mono)


        subttl  Compile - Looping Logic
        page

;/*
;**       Looping logic.
;**
;**       The looping logic must handle monochrome bitmaps, color bitmaps,
;**       huge bitmaps, the device, the presence or absence of a source
;**       or pattern, and mono <==> color interactions.
;**
;**       The type of looping logic is always based on the destination.
;*/

;/*
;**       Get saved parameters off of the stack.
;**
;**       <       pop     si            > ;Get source pointer
;**               pop     di              ;Get destination pointer
;**               pop     cx              ;Get loop count
;*/

cblt_no_last_byte:

        test    bh,F0_SRC_PRESENT       ;Is a source needed?
        jz      @F                      ;  No

        mov     al,I_POP_ESI             ;  Memory src, get source pointer
        stosb
@@:

        mov     ax,I_POP_EDI_POP_ECX      ;Get destination pointer
        stosw                           ;Get loop count


        subttl  Looping Logic - Brush Update
        page
;/*
;** If a pattern was involved in the blt, two actions must be accounted
;** for at the end of each scanline depending on whether or not we had a
;** mono or color pattern fetch.
;**
;** Mono pattern fetches require that we update the index into the pattern
;** at the end of each scanline. This is done here very simply.
;**
;** Color pattern fetches are a bit more complicated. We advanced the pattern
;** index for each byte we processed in the blt along the scanline. The
;** starting index is the same for each scanline. HOWEVER, the scan of the
;** pattern that we index into must be updated. This is what we do at this
;** time.
;*/

        test    bh,F0_PAT_PRESENT       ; is a pattern involved?
        jnz     @F
        jmp     cblt_6300               ; no...
@@:

        test    bh,F0_COLOR_PAT         ; is it a color pattern fetch ?
        jnz     @F                      ; yes...

;/*
;** mono or mask brushes just need to be updated to the next byte in the brush:
;*/

        mov     al,I_MOV_AL_MEM
        stosb                           ;mov al,ss:[xxxx]
        mov     edx,npbPatRow
        mov     eax,edx
        stosd
        mov     al,I_ADD_AL_BYTE_I
        mov     ah,iDir         ; add al,bias
        .errnz   INCREASE-1              ; must be a 1
        .errnz   DECREASE+1              ; must be a -1
        stosw
        mov     ax,0700h+I_AND_AL_BYTE_I;and al,00000111b
        stosw
        mov     al,I_MOV_MEM_AL
        stosb                           ;mov ss:[xxxx],al
        mov     eax,edx
        stosd
        jmp     cblt_6300
;/*
;** color brushes are more complicated. We must update to the next row of the
;** brush (ie: change the offset in the MOV BX,YYYY instructions) in each of the
;** possible three places in the compiled code where the color pattern fetch may
;** occur. The first fetch is always present, while the inner loop fetch and the
;** last byte fetch may not.
;**
;** In addition, we may possibly have to reset the indices for the first,middle,
;** and last brush fetches back to their initial values. (This is because the
;** middle brush fetch is within a loop that advances the index to the next
;** brush byte; this may also be the case for the first and last brush fetches
;** if we are doing mono to color conversion).
;*/


@@:

;/*
;** Adjust initial brush fetch starting offset:
;*/

        mov     al,I_MOV_EAX_MEM
        stosb
        mov     esi,dl_addr_pbrush_off_f ; -> first brush offset
        mov     eax,esi
        stosd
        mov     al,I_ADD_EAX_DWORD_I
        mov     dh,iDir
        or      dh,dh
        jns     @F
        mov     al,I_SUB_EAX_DWORD_I
@@:     stosb
        mov     eax,SIZE_PATTERN
        stosd
        mov     al,I_CMP_EAX_DWORD_I
        stosb
        mov     ecx,pBrush
        mov     eax,ecx
        or      dh,dh
        js      @F
        add     eax,SIZE_PATTERN * SIZE_PATTERN
@@:     stosd
        mov     ax,I_JC_P7H
        or      dh,dh
        jns     @F
        mov     ax,I_JNC_P7H
@@:     stosw
        mov     al,I_MOV_EAX_DWORD_I
        stosb
        mov     eax,ecx
        or      dh,dh
        jns     @F
        add     eax,(SIZE_PATTERN * SIZE_PATTERN) - SIZE_PATTERN
@@:     stosd
        mov     al,I_MOV_MEM_AX
        stosb
        mov     eax,esi
        stosd

;/*
;** Reinitialize first brush fetch starting index:
;*/
        mov     al,I_PUSH_EAX
        stosb
        push    esi
        lea     esi,get_brush_index_f
        mov     ecx,brush_index_f_len
        rep     movsb
        pop     esi
        mov     al,I_MOV_MEM_AL
        stosb
        add     esi,BRUSH_INDEX_LESS_BRUSH_OFF
        mov     eax,esi
        stosd
        mov     al,I_POP_EAX
        stosb
        mov     esi,dl_addr_pbrush_off_m
        or      esi,esi
        jz      @F                      ; no inner loop pattern fetch...

;/*
;** Adjust middle brush fetch starting offset:
;*/

        mov     al,I_MOV_MEM_AX
        stosb
        mov     eax,esi
        stosd

;/*
;** Reinitialize middle brush fetch starting index:
;*/
        mov     al,I_PUSH_EAX
        stosb
        push    esi
        lea     esi,get_brush_index_m
        mov     ecx,brush_index_m_len
        rep     movsb
        pop     esi
        mov     al,I_MOV_MEM_AL
        stosb
        add     esi,BRUSH_INDEX_LESS_BRUSH_OFF
        mov     eax,esi
        stosd
        mov     al,I_POP_EAX
        stosb

@@:

        mov     esi,dl_addr_pbrush_off_l
        or      esi,esi
        jz      @F                      ; no last byte pattern fetch...

;/*
;** Adjust last brush fetch starting offset:
;*/

        mov     al,I_MOV_MEM_AX
        stosb
        mov     eax,esi
        stosd

;/*
;** Reinitialize last brush fetch starting index:
;*/
        push    esi
        lea     esi,get_brush_index_l
        mov     ecx,brush_index_l_len
        rep     movsb
        pop     esi
        mov     al,I_MOV_MEM_AL
        stosb
        add     esi,BRUSH_INDEX_LESS_BRUSH_OFF
        mov     eax,esi
        stosd

@@:
cblt_6300:


        subttl  Looping Logic - Scan Line Update
        page
;/*
;**       Generate the next scanline code.  The next scan line code must
;**       handle monochrome bitmaps, the device, huge bitmaps, the presence
;**       or absence of a source.
;**
;**       Also color bitmaps, and mono <==> color interactions.
;**
;**       <       add si,devSrc.next_scan> ;Normal source scan line update
;**       <       Huge Bitmap Update     > ;>64K source update code
;**               add di,devDst.next_scan ;Normal destination scan line update
;**       <       Huge Bitmap Update     > ;>64K destination update code
;**
;*/

        ; determine if this is a screen to screen blit

src_move2:
        scrn_to_scrn_test <not_scrn2>

        ; NewCon 8-13-89
        ; Emit the code for the end of a line

        mov     eax, edi
        mov     esi, offset usl_template
        mov     cx, USL_LEN
        movzx   ecx,cx
        rep     movsb

        ; now do all the fixups in the template

        mov     esi, eax                        ; si = beginning of template on stack
        mov     ax, I_ADD_SI_AX         ; next source scan line fixup
        cmp     iDir, INCREASE
        je      @f
        mov     ax, I_SUB_SI_AX
@@:
        mov     [esi+USL_SRC_ADDSUB], ax

        mov     ax, I_ADD_DI_AX         ; next dest scan line fixup
        cmp     iDir, INCREASE
        je      @f
        mov     ax, I_SUB_DI_AX
@@:
        mov     [esi+USL_DST_ADDSUB], ax

        mov     ax, ySorc               ; ax = initial source line

        mov     dx, SCREEN_CBSCAN       ; dx = bytes per scan line
        mul     dx                      ; ax = scan line offset, dx = bank
        mov     cblt_src_current_bank,dx

        mov     ax, 1                   ; set the next bank bump factor
        cmp     iDir, INCREASE
        je      @f
        neg     ax
@@:
        mov     [esi+USL_SRC_NBANK], ax   ; set the source bank
        mov     [esi+USL_DST_NBANK], ax   ; set the dest bank

        mov     eax, offset instance_line_buffer
        cmp     iStepDir, STEPLEFT
        jne     @f
        mov     ebx, cxExt
        add     eax, ebx
@@:
        mov     [esi+USL_SEG1], eax

        mov     eax, cxExt
        mov     [esi+USL_COUNT], eax

        mov     ax, yDest               ; ax = initial destination line

        mov     dx, SCREEN_CBSCAN       ; dx = bytes per scan line
        mul     dx                      ; ax = scan line offset, dx = bank
        mov     cblt_dst_current_bank,dx

        mov     eax, pfnBlt
        sub     eax, edi
        jmp     cblt_6400

not_scrn2:

        mov     ch,iDir                 ;Load this for YUpdate code

        mov     bl, fbF0                ; cache the flags
        test    bh,F0_SRC_PRESENT       ;Is there a source?
        jz      cblt_6340               ;  No, skip source processing

        mov     dx,I_ADD_ESI_DWORD_I      ;add si,increment
        push    ebp
        lea     ebp,devSrc
        call    y_update                ;Generate the Y scan line update code
        pop     ebp                      ;Restore frame pointer

cblt_6340:
        mov     dx,I_ADD_EDI_DWORD_I      ;add reg,increment
        push    ebp
        lea     ebp,devDst               ;--> destination data
        call    y_update                ;Generate the Y scan line update code
        pop     ebp                      ;Restore frame pointer
;/*
;**       Compile the scan line loop.  The code simply jumps to the start
;**       of the outer loop if more scans exist to be processed.
;*/

cblt_6380:
        mov     eax,pfnBlt   ;Compute relative offset of
        sub     eax,edi                   ;  start of loop
        cmp     eax,-125                 ;Can this be a short label?
        jc      cblt_6400               ;  No, must make it a near jmp
        sub     al,2                    ;Bias offset by length of LOOP inst.
        mov     ah,al
        mov     al,I_LOOP
        stosw                           ;Set the loop instruction
        jmp     short cblt_6420

cblt_6400:
        mov     esi,OFFSET jmp_cx_nz ;Move in the dec CX jnz code
        movsd
        movsb
        .errnz   JMP_CX_NZ_LEN-5         ;Must be four bytes long
        sub     eax,JMP_CX_NZ_LEN + 4                    ; adjust jump bias
        stosd                           ;  and store it into jump

cblt_6420:
        mov     al,I_RET            ;Stuff the far return instruction
        stosb

        ret

CBLT ENDP


        subttl  Phase Align Code Generation
        page
;/***************************************************************************
;*
;* FUNCTION NAME = phase_align_generate
;*
;* DESCRIPTION   = Generate the phase alignment if any.
;*
;*                 It is assumed that AL contains the source byte
;*
;*                 Registers Destroyed:
;*                       AX,CX,SI
;*
;* INPUT         = DH = phase alignment
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        public  phase_align_generate
phase_align_generate PROC SYSCALL,
 pddcDst  :DWORD, ;Destination ddc
 xDst     :DWORD, ;Destination x origin
 yDst     :DWORD, ;Destination y origin
 psdSrc   :DWORD, ;Source surface definition
 xSrc     :DWORD, ;Source x origin
 ySrc     :DWORD, ;Source y origin
 cxExt    :DWORD, ;x extent of the BLT
 cyExt    :DWORD, ;y extent of the BLT
 usMix    :DWORD, ;Mix mode
 ipcBkgnd :DWORD, ;Color to mono background match color
 fsBlt    :DWORD

        include frame.blt

        mov     cl,iHorzPhase           ; fetch horizontal phase
        or      cl,cl                   ; any phase alignment ?
        jz      cblt_phase0             ; no, so skip alignment...

        mov     ax,I_ROL_AL_N           ; assume rotate left n times
        cmp     cl,5                    ; 4 or less rotates?
        jc      @F                      ;  yes...
        neg     cl                      ;  no, compute ROR count
        add     cl,8
        mov     ah,HIGH I_ROR_AL_N
        .errnz   (LOW I_ROL_AL_N) - (LOW I_ROR_AL_N)
@@:     stosw                           ;Stuff the phase alignment rotates
        mov     al,cl                   ;  then the phase alignment code
        stosb
;/*
;** Do not generate phase masking if there is only 1 src AND only 1 dest byte.
;** This is not just an optimization, see comments where these flags are set.
;*/

        mov     al,fbFetch
        and     al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
        xor     al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
        jz      @F

        mov     esi,OFFSET phase_align
        xor     ch,ch
        mov     cl,(PHASE_ALIGN_LEN SHR 1)
        rep     movs    word ptr [edi],word ptr [esi]
if      PHASE_ALIGN_LEN AND 1
        movsb                           
endif
@@:

cblt_phase0:
        ret
phase_align_generate        ENDP


        subttl  Scan Line Update Generation
        page
;/***************************************************************************
;*
;* FUNCTION NAME = y_update
;*
;* DESCRIPTION   =
;*
;*     Generate Y update code.
;*
;*
;*     The Y update code is generated as follows:
;*
;*     For the display, small bitmaps, and huge bitmaps where the BLT
;*     doesn't span a segment bounday, all that need be done is add
;*     next_scan to the offset portion of the bits pointer. next_scan
;*     is a 2's complement if the BLT is Y-, so an addition can always
;*     be done.
;*
;*         < add   si,next_scan >
;*           add   di,next_scan
;*
;*
;*     For huge bitmaps where the BLT spans a segment boundary, the
;*     above update must be performed, and the overflow/undeflow
;*     detected.  This isn't too hard to detect.
;*
;*     For any huge bitmap, there can be a maximum of Planes*bmWidthBytes-1
;*     unused bytes in a 64K segment.  The minimum is 0.  The scan line
;*     update always updates to the first plane of the next (previous) scan.
;*
;*
;*     When the BLT is Y+, if the new offset is anywhere within the
;*     unused bytes of a segment, or in the first scan of a segment,
;*     then overflow must have occured:
;*
;*           -bmFillBytes <= offset < Planes*bmWidthBytes
;*
;*     Since the update is always made to the first plane of a scan,
;*     Planes in the above equation can be thrown out.  Also, if
;*     bmFillBytes is added to both sides of the equation:
;*
;*           0 <= offset < bmWidthBytes+bmFillBytes   (unsigned compare)
;*
;*     will be true if overflow occurs.  The Y+ overflow check will
;*     look like:
;*
;*
;*         lea ax,bmFillBytes[si]                      ;Adjust for fill bytes now
;*         cmp ax,bmWidthBytes+bmFillBytes            ;Overflow occur?
;*         jnc NoOverflow                              ;  No
;*         cmp cx,2                                    ;Any more scans?
;*         jnc NoOverflow                              ;  No, don't update selector
;*         add si,bmFillBytes                          ;Step over fill bytes
;*         mov ax,ds                                   ;Compute new selector
;*         add ax,bmSegmentIndex
;*         mov ds,ax
;*
;*       NoOverflow:
;*
;*
;*
;*     For Y- BLTs, the test is almost the same.  The equation becomes
;*
;*        -(Planes*bmWidthBytes) > offset             (unsigned compare)
;*
;*     then underflow occurs.  Planes in the above equation cannot be
;*     thrown out.  The Y- underflow check will look like:
;*
;*         mov ax,si
;*         cmp ax,-(Planes*bmWidthBytes)              ;Overflow occur?
;*         jc  NoOverflow                              ;  No
;*         cmp cx,2                                    ;Any more scans?
;*         jnc NoOverflow                              ;  No, don't update selector
;*         add si,bmFillBytes                          ;Step over fill bytes
;*         mov ax,ds                                   ;Compute new selector
;*         add ax,bmSegmentIndex
;*         mov ds,ax
;*
;*     bmFillBytes and bmSegment index will be the 2's complement by
;*     now if the BLT is Y-.
;*
;*                  Registers Preserved:
;*                        DX,SI
;*                  Registers Destroyed:
;*                        AX,DI,flags
;*
;* INPUT         = SS:BP --> source or destination data
;*                 SS:DI --> where to generate the code
;*                 DX     =  update register (add si,wordI & mov ax,si)
;*                 BL     =  lea register (SI or DI)
;*                 BH     =  mov si,ax   or   mov di,ax register
;*                 CL     =  segment register (DS or ES)
;*                 CH     =  Direction
;*
;* OUTPUT        = SS:BP --> source or destination data
;*                 SS:DI --> where to generate the code
;*                 BL     =  lea register (SI or DI)
;*                 BH     =  mov si,ax   or   mov di,ax register
;*                 CL     =  segment register (DS or ES)
;*                 CH     =  Direction
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


public  y_update
y_update PROC SYSCALL

;/*
;**       Stuff the scan line increment for the source or destination
;**
;**       <   add     si,1234h    >       ;Update source
;**       <   add     di,9ABCh    >       ;Update destination
;*/


        mov     eax,[ebp].DEV.next_scan       ;Get the increment
        or      eax,eax                   ;If zero, don't generate the code
        jz      y_update_10
        xchg    eax,edx                   ;Set opcode
        stosw
        xchg    eax,edx                   ;Set increment
        stosd

y_update_10:
        test    [ebp].DEV.dev_flags,DEV_IS_DEVICE
        jz      @f                        ;Check spans segment on device only
        test    [ebp].DEV.dev_flags,DEV_SPANS_SEG
        jnz     spans_a_segment
@@:
        ret
;/*
;**       The BLT spans a segment.  The code to detect when the segment is
;**       crossed must be generated, as given above.
;*/

spans_a_segment:
        mov al, 55h                     ; push ebp
        stosb

        mov al, 0BDh                    ; load ebp immediate
        stosb                           ; emit it
        mov     eax,ebp
        stosd

        mov     ax,I_DEC_EBP
        stosw
        mov     al,DEV.line_left
        stosb

        mov     ax,I_CMP_EBP_BYTE_I
        stosw
        mov     al,DEV.line_left
        stosb
        mov     al,0
        stosb

        mov     al,I_JNZ
        mov     ah, BANK_SELECT_CODE_LEN  + 48
        stosw

        mov     ax,I_MOV_AL_EBP
        stosw
        mov     al,DEV.line_per_bank
        stosb

        mov     ax,I_MOV_EBP_AL
        stosw
        mov     al,DEV.line_left
        stosb

        cmp     ch,DECREASE
        je      y_neg
        mov     ax,I_SUB_ESI_DWORD_I
        test    bl, F0_DEST_IS_DEV      ; Is the dest the device?
        jz      src_is_dev
        mov     ax,I_SUB_EDI_DWORD_I
src_is_dev:
        stosw
        mov     eax,BANK_SIZE
        stosd
        jmp     short   y_set
y_neg:
        mov     ax,I_ADD_ESI_DWORD_I
        test    bl, F0_DEST_IS_DEV      ; Is the dest the device?
        jz      src_is_dev1
        mov     ax,I_ADD_EDI_DWORD_I
src_is_dev1:
        stosw
        mov     eax,BANK_SIZE
        stosd
y_set:

        mov     ax,I_CMP_ECX_2
        stosw

        mov     ax,2+((LOW I_JC_P0DH)*256)
        stosw

        mov al, BANK_SELECT_CODE_LEN + 31
cblt_2303:

        stosb
        .errnz   (LOW I_MOV_ESI_EAX)-(LOW I_MOV_EDI_EAX)

        xchg    ax,dx                   ;Get add si, or add di,
        stosw
        mov     eax,[ebp].DEV.fill_bytes
        stosd

;/*
;** This is the code emitter for a bank select
;** It will copy a bank select routine into the stack.
;**---
;** We need 2 bytes of local storage on the stack.
;** Jump around a word that will be used as the storage area.
;*/

        mov ax, 04EBh                   ; emit jmp short $+2
        stosw

        mov ebx, edi                    ; save the variables addr

;/*
;** emit the current and next initial values
;*/

        mov al, byte ptr [ebp].DEV.current_bank
        mov ah, byte ptr [ebp].DEV.next_bank
        movzx   eax,ax
        stosd                   ; emit the values
;/*
;** this is the destination of the jump.
;** update current and put the new value into dl
;**---
;** emit pushes for the registers used.
;** push ax, bp, and dx
;*/

        mov al, 52h                     ; push dx
        stosb
        mov al, 50h                     ; push ax
        stosb
;/*
;**---
;** bx = location of variables
;**---
;** emit --> mov bp, <variables addr>
;*/

        mov al, 0BDh                    ; load bp immediate
        stosb                           ; emit it
        mov eax, ebx                    ; ax = addr of variables
        stosd                           ; emit it
;/*
;** emit --> mov ax, [bp]
;** three byte instruction
;*/
        mov al, 8Bh
        stosb
        mov ax, 0045h
        stosw

;/*
;** emit --> add al, ah
;*/

        mov ax, 0C402h                  ; add al, ah
        stosw

;/*
;** emit --> mov [bp], ax
;*/
        mov al, 89h
        stosb
        mov ax, 0045h
        stosw

;/*
;** emit --> mov dl, al
;*/

        mov ax, 0D08Ah
        stosw

        pushf                           ; save the flags and regs we will need
        push ecx
        push esi

        cld                             ; clear the direction flag
        mov esi, offset bank_select_logic ; ds:si = bank select template
        mov ecx, BANK_SELECT_CODE_LEN   ; cx = length of template

        test ecx, 1                     ; optimize for word moves
        jz @f                           ; if an odd number of bytes must move
        movsb                           ; move the first byte
        dec ecx                         ; update the count
@@:
        shr ecx, 1                      ; cx = length of template in words
        rep movsw                       ; move template to stack

        pop esi                         ; restore the registers and flags
        pop ecx
        popf

        ; emit --> pop bp, ax, dx

        mov al, 58h                     ; pop ax
        stosb
        mov al, 5Ah                     ; pop dx
        stosb


y_update_40:
        mov al, 5Dh                     ; pop bp
        stosb
        ret
y_update        endp


END
