;*DDK*************************************************************************/
;
; COPYRIGHT    Copyright (C) 1995 IBM Corporation
;
;    The following IBM OS/2 WARP source code is provided to you solely for
;    the purpose of assisting you in your development of OS/2 WARP device
;    drivers. You may use this code in accordance with the IBM License
;    Agreement provided in the IBM Device Driver Source Kit for OS/2. This
;    Copyright statement may not be removed.;
;*****************************************************************************/
        PAGE     55,132
        TITLE    BitBlt
        SUBTITLE Header
;/*****************************************************************************
;*
;* SOURCE FILE NAME = BITBLT.ASM
;*
;* DESCRIPTIVE NAME = BitBLT at level of device driver.
;*
;*
;* VERSION      V2.0
;*
;* DATE
;*
;* DESCRIPTION
;*
;*       This is the main module of those comprising the source to BitBLT
;*       (Bit BLock Transfer) for display drivers. It
;*       defines the procedure, and performs general preprocessing for all BLT
;*       requests.
;*
;*       BitBLT  transfers a rectangle of bits from source to destination,
;*       doing some useful operations on the way, namely:
;*
;*       o     excluding the cursor;
;*
;*       o     performing a user-specified raster operation, out of
;*             a vast array of choices, which takes the form
;*
;*             D = f(S,D,P)
;*
;*             where S = source bit, D = destination bit, P = pattern
;*             bit, and  f  is a sequence of logical operations (AND, OR,
;*             XOR, NOT) on S, D, and P;
;*
;*       o     recognizing common special cases for accelerated processing.
;*
;*       For a detailed explanation of the contortions BitBLT goes through
;*       to put your bits in place, see the file COMMENT.BLT.
;*
;*
;*       BitBLT consists of the following files:
;*
;*             BITBLT.ASM        procedure definition
;*             CBLT.ASM          procedure to compile arbitrary BLT on stack
;*
;*             FRAME.BLT         function parameters and locals
;*             CONSTANT.BLT      constants
;*             DATADEFS.BLT      compiled code templates and data
;*             ROPTABLE.BLT      table of ROP templates and definitions
;*             SURFACE.BLT       Surface processing
;*             PATTERN.BLT       pattern preprocessing
;*
;*             COMPUTEY.BLT      compute y-related values
;*             EXIT.BLT          device-specific cleanup before exiting
;*
;*             SPECIAL.BLT       special case code
;*             COMMENT.BLT       overview of history and design
;*
;*
;* FUNCTIONS   OemBitblt
;*             phase_processing
;*             far_do_cblt
;*
;* NOTES       NONE
;*
;* STRUCTURES   NONE
;*
;* EXTERNAL REFERENCES
;*
;*              NONE
;*
;* EXTERNAL FUNCTIONS
;*
;*              NONE
;*
;* CHANGE ACTIVITY =
;*   DATE      FLAG        APAR   CHANGE DESCRIPTION
;*   --------  ----------  -----  --------------------------------------
;*   mm/dd/yy  @Vr.mpppxx  xxxxx  xxxxxxx
;*   08/27/90                     Viroon Touranachun [viroont] Subroutinized
;*                                phase_processing from OEMBitBlt
;*
;*****************************************************************************/

        .386
        .MODEL FLAT,SYSCALL
        ASSUME  SS:FLAT, DS:FLAT, CS:FLAT, ES:FLAT

BLGROUP  group  _TUNE,_TUNEOS,_TEXT
_TUNE segment USE32 PUBLIC 'CODE'
_TUNE ENDS
_TUNEOS segment USE32 PUBLIC 'CODE'
_TUNEOS ENDS
_TEXT segment USE32 PUBLIC 'CODE'
_TEXT ENDS
        .errnz  BITS_PEL - 4                      ;Some uses of BITS_PEL expect it
                                                  ;  to be 4  -- should clean up
        .xlist
        include pmgre.inc
DINCL_BB_ROPS   equ     1
DINCL_BITMAP    equ     1
DINCL_ENABLE    equ     1
        include driver.inc
        include extern.inc
        include protos.inc
        include egafam.inc
        include oemblt.inc
        include assert.mac
        .list

        include constant.blt
.DATA
        include gendata.blt
DEFINE_ROPTABLE    equ     1
        include roptable.blt
.list

;/*
;** This is the BitBlt entry point.
;** Therefore we DO need to generate a stack frame
;*/

.CODE
_TUNE segment  USE32 PUBLIC 'CODE'
OPTION PROLOGUE:PROLOGUEDEF
OPTION EPILOGUE:EPILOGUEDEF

;/***************************************************************************
;*
;* FUNCTION NAME = OemBitblt
;*
;* DESCRIPTION   = Generate a stack frame for the BitBlt entry point.
;*
;* INPUT         = pddcDst  :DWORD
;*                 xDst     :DWORD
;*                 yDst     :DWORD
;*                 psdSrc   :DWORD
;*                 xSrc     :DWORD
;*                 ySrc     :DWORD
;*                 cxExt    :DWORD
;*                 cyExt    :DWORD
;*                 usMix    :DWORD
;*                 ipcBkgnd :DWORD
;*                 fsBlt    :DWORD
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL =
;* RETURN-ERROR  =
;*
;**************************************************************************/

ALIGN 4
OemBitblt PROC SYSCALL USES ESI EDI,
 pddcDst  :DWORD, ;Destination ddc
 xDst     :DWORD, ;Destination x origin
 yDst     :DWORD, ;Destination y origin
 psdSrc   :DWORD, ;Source surface definition
 xSrc     :DWORD, ;Source x origin
 ySrc     :DWORD, ;Source y origin
 cxExt    :DWORD, ;x extent of the BLT
 cyExt    :DWORD, ;y extent of the BLT
 usMix    :DWORD, ;Mix mode
 ipcBkgnd :DWORD, ;Color to mono background match color
 fsBlt    :DWORD

include frame.blt

ifdef FIREWALLS
        mov     eax,pddcDst
        ddc?    eax,<SURFACE>
        mov     selScreen,INVALID_ADDRESS
endif

;/*
;** Get the encoded raster operation, and map the raster op if needed.
;**
;** To map the ROPS 80h through FFh to 00h through 7Fh, take the
;** 1's complement of the ROP, and invert the "negate needed" flag.
;*/

        cld                                       ;Let's make no assumptions about this!
        mov     eax,0                             ;Assume not 80h : FFh
        mov     ebx,usMix
        or      bl,bl                             ;Is this in the second half (80-FF)?
        jns     @F                                ;  No, rop index is correct
        not     bl                                ;  Yes, want the inverse
        mov     ah,HIGH NEGATE_NEEDED            ;Want to invert the not flag
        .errnz  (LOW NEGATE_NEEDED)
@@:
        add     ebx,ebx                           ;Turn into a word index
        xor     ax,roptable[ebx]                 ;Get ROP, maybe toggle negate flag
        movzx   eax,ax
        mov     usMixData,eax                     ;Save data for compiling
        mov     bl,ah                             ;Set fbF0 for source and pattern
        and     bl,high (SOURCE_PRESENT or PATTERN_PRESENT)
        ror     bl,1
        .ERRNZ  (SOURCE_PRESENT -  0010000000000000b)
        .ERRNZ  (PATTERN_PRESENT - 0100000000000000b)
        .ERRNZ  (F0_SRC_PRESENT -  00010000b)
        .ERRNZ  (F0_PAT_PRESENT -  00100000b)


;/*
;** We have picked up the raster op information.   Get the source, pattern,
;** and destination parameters as needed.
;*/

        call    pdevice_processing
        call    pattern_preprocessing
page

;/*
;** Cursor Exclusion
;**
;** If either device or both devices are for the display, then the cursor
;** must be excluded.  If both devices are the display, then a union of
;** both rectangles must be performed to determine the exclusion area.
;*/

        mov     al,bh
        and     al,F0_SRC_IS_DEV or F0_DEST_IS_DEV
        jz      cur_exclusion_end                ;Both are memory bitmaps
        mov     esi,cxExt
        mov     edi,cyExt
        dec     esi                               ;Make extents inclusive of last point
        dec     edi
        mov     ecx,xDst                          ;Assume only a destination on the
        mov     edx,yDst                          ;  display
        test    al,F0_SRC_IS_DEV                 ;Is the source a memory bitmap?
        jz      cur_exclusion_no_union           ;  Yes, go set right and bottom
        test    al,F0_DEST_IS_DEV                ;  (set 'Z' if dest is memory)
        mov     eax,ecx                           ;  No, prepare for the union
        mov     ebx,edx
        mov     ecx,xSrc                          ;Set source org
        mov     edx,ySrc
        jz      cur_exclusion_no_union           ;Dest is memory. Set right and bottom

;/*
;** If the source starting Y is greater than surface allows, then we're
;** supporting a restore_screen_bitmap call.  In this case, we only want
;** to exclude the dest rectangle.
;*/

        cmp     edx,devSrc.height                ;If source is off the screen
        jl      cur_exclusion_not_sbb            ; then only use dest rectangle.
        xchg    eax,ecx
        mov     edx,ebx
        jmp     cur_exclusion_no_union
ALIGN 4

;/*
;** The union of the two rectangles must be performed.  The top left corner
;** will be the smallest x and smallest y.  The bottom right corner will be
;** the largest x and the largest y added into the extents
;*/

cur_exclusion_not_sbb::
        cmp     ecx,eax                           ;Get smallest x
        jle     cur_exclusion_y                  ;CX is smallest
        xchg    eax,ecx                           ;AX is smallest

cur_exclusion_y::
        cmp     edx,ebx                           ;Get smallest y
        jle     cur_exclusion_union              ;DX is smallest
        xchg    edx,ebx                           ;BX is smallest

cur_exclusion_union::
        add     esi,eax                           ;Set right
        add     edi,ebx                           ;Set bottom
        jmp     cur_exclusion_do_it;Go do exclusion
ALIGN 4

cur_exclusion_no_union::
        add     esi,ecx                           ;Set right
        add     edi,edx                           ;Set bottom

cur_exclusion_do_it::
        INVOKE  far_exclude                       ;Exclude the area from the screen

cur_exclusion_end::

;/*
;** Now we dispatch to an appropriate low-level BitBlt routine.
;*/


ifdef SLITE
        test    [fStarlight],1          ;are we on a Starlight ?
        jz      @F                      ;no--continue
        call    check_starlight_special_cases   ;yes--check for Starlight
                                                ;speed ups
        jc      bitblt_exit             ;C ==> BLT done w/special case
@@:
endif

        call    BitBlt_Dispatch

bitblt_exit::
        call    clean_up_before_exit
        cld                                       ;Leave direction cleared
        INVOKE  far_unexclude                     ;Remove any exclusion area
        mov     eax,BBRC_NORMAL                   ;Want pass 2 if image data blt
        fw_zero <ecx>
        ret
OemBitblt ENDP
;_TUNE ENDS
page

;/***************************************************************************
;*
;* FUNCTION NAME = phase_processing
;*
;* DESCRIPTION   = The following routine determines the bltting phase and other
;*                 parameters for CBLT to correctly compile code on the stack.
;*
;*                 Registers Destroyed:
;*                       AX,BX,CX,DX,SI,DI,DS,ES,flags
;*                 Registers Preserved:
;*                       BP
;*
;* INPUT         = SS:BP --> BitBLT local variable frame
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE

ALIGN 4
phase_processing PROC SYSCALL,
 pddcDst  :DWORD, ;Destination ddc
 xDst     :DWORD, ;Destination x origin
 yDst     :DWORD, ;Destination y origin
 psdSrc   :DWORD, ;Source surface definition
 xSrc     :DWORD, ;Source x origin
 ySrc     :DWORD, ;Source y origin
 cxExt    :DWORD, ;x extent of the BLT
 cyExt    :DWORD, ;y extent of the BLT
 usMix    :DWORD, ;Mix mode
 ipcBkgnd :DWORD, ;Color to mono background match color
 fsBlt    :DWORD

        include frame.blt

;/*
;** Now the real work comes along:  In which direction will the copy be
;** done?  Refer to the 10 possible types of overlap that can occur (10
;** cases, 4 resulting types of action required).
;**
;** If there is no source bitmap involved in this particular BLT, then
;** the path followed must allow for this.  This is done by setting both
;** the destination and source parameters equal.
;*/

        mov     edx,cxExt                         ;Get X extent
        dec     edx                               ;Make X extent inclusive
        mov     ebx,xDst                          ;Get destination X origin
        mov     edi,ebx
        and     ebx,00000111b                     ;Get offset of destination within byte
                                                  ;   and set up BX for a base register

;/*
;** If there is no source, then just use the pointer to the destination
;** bitmap and load the same parameters, which will cause the "equality"
;** path to be followed in the set-up code.  This path is the favored
;** path for the case of no source bitmap.
;*/

        mov     eax,edi                           ;Assume no source needed
        test    fbF0,F0_SRC_PRESENT              ;Is a source needed?
        jz      phase_proc_10                     ;  No, just use destination parameters
        mov     eax,xSrc                          ;  Yes, get source origin X

;/*
;** Next: Assume two initial fetches (if no source, then it will be
;** set = 1 later) -- and zero the other flags.
;*/

        mov     fbFetch,FF_TWO_INIT_FETCHES

phase_proc_10::
        mov     esi,eax
        and     eax,00000111b                     ;Get offset of source within byte
        cmp     esi,edi                           ;Which direction will we be moving?
        jl      phase_proc_stepping_left;Move from right to left

phase_proc_stepping_right::

;/*
;** The starting X of the source rectangle is >= the starting X of
;** the destination rectangle, therefore we will be moving bytes
;** starting from the left and stepping right.
;**
;** Alternatively, this is the path taken if there is no source
;** bitmap for the current BLT.
;**
;** Rectangle cases: 3,4,5,6,8
;*/

        sub     al,bl                             ;Compute horiz. phase  (source-dest)
        mov     iStepDir,STEPRIGHT               ;Set direction of move
        mov     ah,bitmask_tbl1[ebx]             ;Get starting byte mask
        ja      phase_proc_two_fetches           ;Scan line case 2, everything is
                                                  ;  already set for this case.

;/*
;** Scan line cases 1 and 3:
;**
;** The correct first byte fetch needs to be set for the beginning of
;** the outer loop, and the phase must be made into a positive number.
;**
;** This is the path that will be followed if there is no source bitmap
;** for the current BLT.
;**
;** For speed use MOV 0 rather than AND NOT TWO_INIT_FETCHES
;** (The other flags in gl_fetch_flags have not yet been set)
;*/

        mov     fbFetch,FF_ONE_INIT_FETCH
        jmp     pp_only_one_init_fetch
ALIGN 4


;/*
;** If we get all the bits we need in the first fetch then a second
;** (unnecessary) fetch could cause a GP Fault.  So let's examine this:
;** The number of bits from (SI mod 8) to the end of the byte is the number
;** of available bits we get on the first fetch.   This is (8 - (SI mod 8)).
;** If this is greater than or equal to cxExt then we have all the bits we
;** need and we better not do the second fetch (even though the phase
;** relationship may suggest we need it).
;**
;** Conclusion: If (8 - (SI mod 8)) >= cxExt then DO NOT make second fetch.
;*/

phase_proc_two_fetches::

ifdef FIREWALLS
        push    ebx
        push    ecx                               ;Note that (SI mod 8) is currently
        mov     ecx,esi                           ;  (AL + BL)
        and     cl,7                              ;This gives (SI mod 8) in CL
        add     bl,al                             ;This gives AL+BL in BL
        assert  bl,E,cl                           ;Are they the same?
        pop     ecx
        pop     ebx
endif
        mov     ecx,8
        sub     cl,bl
        sub     cl,al

;/*
;** We can save a couple cycles here since cxExt - 1 is already in DX.
;** The condition CX >= cxExt is the same as CX > DX.
;*/

        cmp     ecx,edx                           ;CX = (SI mod 8), DX = (cxExt - 1)
        jle     pp_second_fetch_really_needed


;/*
;** We are here BECAUSE the cxExt is so small that we can get all the bits
;** on the scanline with a single lodsb (no byte boundary is crossed) AND
;** the phase relationship indicates that a second initial fetch is needed.
;**
;** We will override it and only do one fetch.  However, if we simply
;** fail to do the second fetch then the phase code will       us.
;** It will be expecting the bits to get fetched in the first fetch, saved
;** after the rotate, and mixed in in the second fetch's phase code.
;** So after the first fetch the bits have been saved in BH, and ANDed out
;** of the src data in AL.
;**
;** The solution is to set a flag here that tells the phase generation code
;** not to generate the usual masking part of the phase code.
;**
;** Short Bitblt Cases:                             (8 bits or less)
;**
;**       1) neither crosses byte boundary.
;**
;**          a) phase requires second initial fetch
;**
;**             Kill the phase masking.  It will       us.  There will
;**             be just one lodsb and one stosb and the first byte mask
;**             will protect the dest bits that should not get hit.
;**             Furthermore if a se
;**
;**          b) phase requires only one initial fetch
;**
;**             Phase masking is irrelevant.  Removing it would
;**             be an optimiztation.
;**
;**       2) dest crosses byte boundary, but src does not
;**
;**          a) phase requires second initial fetch
;**
;**             impossible situation:  the way we determine that a 2nd fetch
;**             is necessary is if the first fetch does not get enough needed
;**             bits to satisfy the first dest byte.  Here the first fetch
;**             gets ALL the bits and the first dest byte needs less than
;**             ALL because it crosses a byte boundary.
;**
;**          b) phase requires only one initial fetch
;**
;**             Intervention would be bad.  None is necessary since the 2nd
;**             initial fetch will not be done.  If we do intervene we will
;**             cause trouble:  Killing the masking will prevent the
;**             "saved bits" from being saved.  The first byte masking
;**             can kill off these bits in AL and they will never
;**             make it to the second stosb.
;**
;**       3) src crosses byte boundary  (dest may or may not)
;**          (this is known to be untrue at this point)
;**
;**          There are bits we need in the second fetch, so a second
;**          initial fetch can not cause a GP fault.  Therefore do
;**          everything the same as we would have before.
;**
;**
;** Conclusion:  Intervention to kill the phase masking is
;**              necessary if
;**                 [src does not cross byte boundary] AND
;**                 dest does not cross byte boundary  AND
;**                 [phase requires second initial fetch].
;**              and     if
;**                 dest crosses byte boundary, but [src does not]
;**
;** Statements in [] are known to be true at this point.
;**
;** Solution:
;**
;** If we always kill the phase-masking when neither crosses a byte
;** boundary and never kill it otherwise then everyone will be happy
;** (regardless of other conditions like whether phase requests a 2nd
;** initial fetch).
;*/

        mov     fbFetch,FF_ONLY_1_SRC_BYTE
        .ERRNZ  FF_ONE_INIT_FETCH

pp_second_fetch_really_needed::
pp_only_one_init_fetch::
        mov     ch,ah

;/*
;** We now have the correct phase and the correct first character fetch
;** routine set.  Save the phase and ...
;**
;** currently:
;**       AL = phase
;**       BL = dest start mod 8
;**       CH = first byte mask
;**       DX = inclusive X bit count
;**       SI = source X start (if there is a source)
;**       DI = destination X start
;*/

        add     al,8                              ;Phase must be positive
        and     al,00000111b


;/*
;** To calculate the last byte mask, the inclusive count can be added to
;** the start X MOD 8 value, and the result taken MOD 8. This is attractive
;** since this is what is needed later for calculating the inclusive byte
;** count, so save the result of the addition for later.
;*/

        add     ebx,edx                           ;Add inclusive extent to dest MOD 8
        mov     edx,ebx                           ;Save for innerloop count !!
        and     ebx,00000111b                     ;Set up bx for a base reg
        mov     cl,bitmask_tbl2[ebx]             ;Get last byte mask

;/*
;** To avoid GP faults must never do an extra fetch we don't need.
;** When we're ready for the last fetch there may already be enough bits
;** saved from the previous fetch (which we plan to combine with the bits
;** in the fetch we are about to do).  If so then we'd better not do this
;** last fetch (it could cause a GP fault).
;**
;** The number of bits we have left from the previous byte is (8 - AL)
;** AL is the phase.  (1 + BL) is the number of bits we actually need
;** to write to the final destination byte.
;**
;** So if  (8 - AL) >= (1 + BL)  then DO NOT do the last fetch.  This
;** simplifies:  if  (BL + AL) <= 7  then DO NOT do the last fetch.
;*/

        add     bl,al
        cmp     bl,7
        jg      phase_proc_last_fetch_needed
        or      fbFetch,FF_NO_LAST_FETCH
phase_proc_last_fetch_needed::

        mov     bl,al                             ;Compute offset into phase mask table
        add     ebx,ebx                           ;Note BH still = 0
        mov     bx,phase_tbl1[ebx]               ;Get the phase mask

;/*
;**  Currently:
;**       AL = phase
;**       BX = phase mask
;**       CL = last byte mask
;**       CH = first byte mask
;**       DX = inclusive bit count + dest start MOD 8
;**       SI = source X start (if there is a source)
;**       DI = destination starting X
;*/

        jmp     phase_proc_both_directions
ALIGN 4


;/*
;** The starting X of the source rectangle is < the X of the destination
;** rectangle, therefore we will be moving bytes starting from the right
;** and stepping left.
;**
;** This code should never be reached if there is no source bitmap
;** for the current BLT.
;**
;** Rectangle cases: 1,2,7
;*/

phase_proc_stepping_left::
        mov     iStepDir,ah                       ;Set direction of move
        .ERRNZ  STEPLEFT
        mov     cl,bitmask_tbl1[ebx]             ;Get last byte mask
        push    ebx
        add     eax,edx                           ;Find end of the source

;/*
;** To calculate the first byte mask, the inclusive count is added to the
;** start MOD 8 value, and the result taken MOD 8. This is attractive since
;** this is what is needed later for calculating the inclusive byte count,
;** so save the result of the addition for later.
;*/

        add     ebx,edx                           ;Find end of the destination
        add     edi,edx                           ;Will need to update dest start address
        add     esi,edx                           ;  and source's too
        mov     edx,ebx                           ;Save inclusive bit count + start MOD 8
        and     ebx,00000111b                     ;Get dest   offset within byte
        and     eax,00000111b                     ;Get source offset within byte
        mov     ch,bitmask_tbl2[ebx]             ;Get start byte mask
        cmp     al,bl                             ;Compute horiz. phase  (source - dest)
        jb      pp_double_fetch                  ;Scan line case 5, everything is

                                                  ;  already set for this case.
;/*
;** Scan line cases 4 and 6:
;**
;** The correct first byte fetch needs to be set for the beginning
;** of the outer loop
;*/

        mov     fbFetch,FF_ONE_INIT_FETCH
        jmp     pp_one_initial_fetch
ALIGN 4


;/*
;** If only-one-fetch is already set, then the following is a NOP.
;** It doesn't seem worth the effort to check and jmp around.
;**
;** If we get all the bits we need in the first fetch then a second
;** (unnecessary) fetch could cause a GP Fault.  So let's examine this:
;**
;** (DX + SI) points to the first pel (remember we're stepping left).
;** So the number of needed bits we get in the first fetch is
;** ((DX + SI + 1) mod 8).  This is currently equal to AX.
;** If AX >= cxExt then we'd better not do two init fetches.
;*/

pp_double_fetch::
        dec     cxExt
        cmp     eax,cxExt
        jl      pp_double_fetch_really_needed
        mov     fbFetch,FF_ONLY_1_SRC_BYTE
        .ERRNZ  FF_ONE_INIT_FETCH
pp_double_fetch_really_needed::
        inc     cxExt

pp_one_initial_fetch::
        sub     al,bl                             ;Compute horiz. phase  (source-dest)
        add     al,8                              ;Ensure phase positive
        and     al,00000111b

;/*
;** To avoid GP faults must never do an extra fetch we don't need.
;** The last byte fetch is unnecessary if Phase is greater than or equal
;** to 8 - BL.  Phase is the number of bits we still have from the previous
;** fetch. 8 - BL is the number of bits we actually need to write to the
;** final destination byte.  So if AL - (8 - BL) >= 0  skip the last fetch.
;*/

        pop     ebx
        add     bl,al
        sub     bl,8
        jl      pp_need_last_fetch
        or      fbFetch,FF_NO_LAST_FETCH
pp_need_last_fetch::

;/*
;** We now have the correct phase and the correct first character fetch
;** routine set.  Generate the phase mask and save it.
;**
;** currently:   AL = phase
;**              CH = first byte mask
;**              CL = last byte mask
;**              DX = inclusive bit count + start MOD 8
;*/

        mov     ah,cl                             ;Save last mask
        mov     cl,al                             ;Create the phase mask
        mov     ebx,00FFh                         ;  by shifting this
        shl     ebx,cl                            ;  according to the phase
        mov     cl,ah                             ;Restore last mask

phase_proc_both_directions::

;/*
;** Go compute # of bytes to BLT.
;** The different processing for the different X directions has been
;** completed, and the processing which is the same regardless of
;** the X direction is about to begin.
;**
;** The phase mask, the first/last byte masks, the X byte offsets,
;** and the number of innerloop bytes must be calculated.
;**
;**
;** Nasty stuff coming up here!  We now have to determine how
;** many bits will be BLTed and how they are aligned within the bytes.
;** Here's how we'll do it:
;**
;** The (inclusive) number of bits is added to the start MOD 8 value
;** (the left side of the rectangle, minimum X value),
;** then the result is divided by 8. Then:
;**
;**
;**    1) If the result is 0, then only one destination byte is being
;**       BLTed.  In this case, the start & ending masks will be ANDed
;**       together, the innerloop count (# of full bytes to BLT) will
;**       be zeroed, and the gl_last_mask set to all 0's (don't alter any
;**       bits in last byte which will be the byte following the first
;**       (and only) byte).
;**
;**               |      x x x x x|                 |
;**               |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                0 1 2 3 4 5 6 7
;**
;**               start MOD 8 = 3,  extent-1 = 4
;**               3+7 DIV 8 = 0, only altering one byte
;**
;**
;**
;**    2) If the result is 1, then only two bytes will be BLTed.
;**       In this case, the start and ending masks are valid, and
;**       all that needs to be done is set the innerloop count to 0.
;**       (it is true that the last byte could have all bits affected
;**       the same as if the innerloop count was set to 1 and the
;**       last byte mask was set to 0, but I don't think there would be
;**       much time saved special casing this).
;**
;**               |  x x x x x x x|x x x x x x x|
;**               |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                0 1 2 3 4 5 6 7
;**
;**               start MOD 8 = 1,  extent-1 = 14
;**               3+14 DIV 8 = 1.  There is a first and last
;**               byte but no innerloop count
;**
;**
;**
;**    3) If the result is >1, then there is some number of entire
;**       bytes to be BLted by the innerloop.  In this case the
;**       number of innerloop bytes will be the result - 1.
;**
;**               |              x|x x x x x x x x|x
;**               |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                0 1 2 3 4 5 6 7
;**
;**               start MOD 8 = 7,  extent-1 = 9
;**               7+9  DIV 8 = 2.  There is a first and last
;**               byte and an innerloop count of 1 (result - 1)
;**
;**       Currently:      AL = horizontal phase
;**                       BX = horizontal phase mask
;**                       CH = first byte mask
;**                       CL = last byte mask
;**                       DX = left side X MOD 8 + inclusive X count
;**                       SI = source start X
;**                       DI = dest   start X
;*/

        mov     iHorzPhase,al                     ;Save horizontal phase
        mov     mPhase,ebx                        ;Save phase mask
        shr     edx,3                             ;/8 to get full byte count
        jnz     phase_proc_60                     ;Result is >0, check it out

;/*
;** There will only be one byte affected.  Therefore the two byte masks
;** must be combined, the last byte mask cleared, and the innerloop
;** count set to zero.
;*/

        or      fbFetch,FF_ONLY_1_DEST_BYTE
        and     ch,cl                             ;Combine the two masks
        xor     cl,cl                             ;Clear out the last byte mask
        inc     edx                               ;Now just fall through to set
        errn$   phase_proc_60         ;  the innerloop count to 0!

phase_proc_60::
        dec     edx                               ;Dec count (might become 0 just like
        mov     cInnerByte,edx                    ;  we want), and save it
        mov     bl,ch
        mov     ch,cl                             ;Compute last byte mask
        not     cl                                ;  and save it
        mov     mLast,cx
        mov     bh,bl                             ;Compute start byte mask
        not     bl                                ;  and save it
        mov     mStart,bx



;/*
;** There may or may not be a source bitmap for the following address
;** computation.  If there is no source, then the vertical setup code
;** will be entered with both the source and destination Y's set to the
;** destination Y and the address calculation skipped.  If there is a
;** source, then the address calculation will be performed and the
;** vertical setup code entered with both the source and destination Y's.
;*/

phase_processing_y::
        shr     edi,3
        add     devDst.lp_bits,edi
        mov     edx,yDst                          ;Get destination Y origin
        mov     eax,edx                           ;Assume no source
        mov     cl,fbF0
        test    cl,F0_SRC_PRESENT                ;Is a source needed?
        jz      phase_proc_70                     ;  No, skip source set-up
        shr     esi,3                             ;Compute byte offset of source
        add     devSrc.lp_bits,esi               ;  and add to current source offset
        mov     eax,ySrc                          ;Get source Y origin


;/*
;** The horizontal parameters have been calculated.  Now the vertical
;** parameters must be calculated.
;**
;** Currently:
;**       DX = destination Y origin
;**       AX = source Y origin (destination origin if no source)
;**       CL = fbF0
;*/

phase_proc_70::
        mov     ebx,cyExt                         ;Get the Y extent of the BLT
        dec     ebx                               ;Make it inclusive


;/*
;** The BLT will be Y+ if the top of the source is below or equal
;** to the top of the destination (cases: 1,4,5,7,8).  The BLT
;** will be Y- if the top of the source is above the top of the
;** destination (cases: 2,3,6)
;**
;**
;**           !...................!
;**           !D                                   !
;**       ____!             ..x                    !
;**      |S   !               :                    !     Start at top of S walking down
;**      |    !                                    !
;**      |    !...................!
;**      |                    :
;**      |____________________:
;**
;**
;**       __________________
;**      |S                 |
;**      |    .....................     Start at bottom of S walking up
;**      |    !D                                   !
;**      |    !             :                      !
;**      |____!           ..x                      !
;**           !                                    !
;**           !....................
;*/

        mov     ch,INCREASE                       ;Set Y direction for top to bottom
        cmp     eax,edx                           ;Which direction do we move?
        jge     phase_proc_80                     ;Step down screen (cases: 1,4,5,7,8)

;/*
;** Direction will be from bottom of the screen up (Y-)
;**
;** This code will not be executed if there is no source since
;** both Y's were set to the destination Y.
;*/

        add     edx,ebx                           ;Find bottom scan line index for
        add     eax,ebx                           ;  destination and source
        mov     ch,DECREASE                       ;Set pattern increment

phase_proc_80::
        add     yPatRow,dl                        ;Set pattern row and increment
        mov     iDir,ch
        sar     ch,1                              ;Map FF==>FF, 01==>00
        .ERRNZ  DECREASE - (-1)
        .ERRNZ  INCREASE -   1

;/*
;** The Y direction has been computed.  Compute the rest of the
;** Y parameters.  These include the actual starting address,
;** the scan line and plane increment values, and whether or not
;** the extents will cross a 64K boundary.
;**
;** Currently:
;**       DX = Y of starting destination scan
;**       AX = Y of starting source scan
;**       CH = BLT direction
;**               00 = increasing BLT, Y+
;**               FF = decreasing BLT, Y-
;**       CL = fbF0
;**       BX = inclusive Y extent
;*/

phase_proc_90::
        test    cl,F0_SRC_PRESENT                ;Is a source needed?
        mov     cl,ch                             ;  (Want CX = +/- 1)
        jz      phase_proc_100                    ;  No, skip source set-up
        push    edx                               ;Save destination Y
        push    ebp                               ;Mustn't trash frame pointer
        lea     ebp,devSrc                        ;--> source data structure
        INVOKE  compute_y                         ;Process as needed
        pop     ebp
        pop     edx                               ;Restore destination Y

phase_proc_100::
        push    ebp                               ;Mustn't trash frame pointer
        mov     eax,edx                           ;Put destination Y in ax
        lea     ebp,devDst                        ;--> destination data structure
        INVOKE  compute_y
        pop     ebp                               ;Restore frame pointer
        ret
phase_processing ENDP

;/*
;** This code is in a separate segment since it's relatively rare.
;**
;** Allow room for the BLT code. The maximum that can be generated
;** is defined by the variable MAX_BLT_SIZE.  This variable must be
;** an even number.
;*/

;_TUNE segment USE32 PUBLIC 'CODE'
;/***************************************************************************
;*
;* FUNCTION NAME = far_do_cblt
;*
;* DESCRIPTION   = Compile the blt on the stack and execute it
;*
;* INPUT         = pddcDst  :DWORD, ;Destination ddc
;*                 xDst     :DWORD, ;Destination x origin
;*                 yDst     :DWORD, ;Destination y origin
;*                 psdSrc   :DWORD, ;Source surface definition
;*                 xSrc     :DWORD, ;Source x origin
;*                 ySrc     :DWORD, ;Source y origin
;*                 cxExt    :DWORD, ;x extent of the BLT
;*                 cyExt    :DWORD, ;y extent of the BLT
;*                 usMix    :DWORD, ;Mix mode
;*                 ipcBkgnd :DWORD, ;Color to mono background match color
;*                 fsBlt    :DWORD
;*
;* OUTPUT        =
;*
;* RETURN-NORMAL =
;* RETURN-ERROR  =
;*
;**************************************************************************/

ALIGN 4
far_do_cblt PROC SYSCALL,
 pddcDst  :DWORD, ;Destination ddc
 xDst     :DWORD, ;Destination x origin
 yDst     :DWORD, ;Destination y origin
 psdSrc   :DWORD, ;Source surface definition
 xSrc     :DWORD, ;Source x origin
 ySrc     :DWORD, ;Source y origin
 cxExt    :DWORD, ;x extent of the BLT
 cyExt    :DWORD, ;y extent of the BLT
 usMix    :DWORD, ;Mix mode
 ipcBkgnd :DWORD, ;Color to mono background match color
 fsBlt    :DWORD

       include frame.blt
        mov     edi,OFFSET proc_stack_area
        mov     pfnBlt,edi

ifdef FIREWALLS
        push    edi                               ;Clean the blackboard
        mov     eax,0CCCCh
        mov     ecx,MAX_BLT_SIZE / 4
        rep     stosd
        pop     edi
endif

        xor     ecx,ecx                           ;Clear out count register
        CALL    CBLT                              ;compile the BLT onto the stack

;/*
;** The BLT has been created on the stack.  Set up the initial registers,
;** set the direction flag as needed, and execute the BLT.
;*/

        test    fbF0,F0_SRC_PRESENT              ;Is there a source?
        jz      call_blt_no_source               ;  No, don't load its pointer
        mov     esi,devSrc.lp_bits               ;--> source device's first byte
        jmp     call_blt_get_dest_bits
ALIGN 4
call_blt_no_source::
        test    fbF0,F0_PAT_PRESENT              ;Is there a pattern?
        jz      call_blt_get_dest_bits           ;  No, we won't be using DS

;/*
;** If DS is not used for the Src then
;**  we can leave it pointing at the Pat.
;*/

call_blt_get_dest_bits::
        mov     edi,devDst.lp_bits               ;--> destination device's first byte
        mov     ecx,cyExt                         ;Get count of lines to BLT
        cld                                       ;Assume this is the direction
        cmp     iStepDir,STEPRIGHT               ;Stepping to the right?
        jz      call_stackblt                     ;  Yes
        std

call_stackblt::
        push    ebp                               ;MUST SAVE THIS
        call    pfnBlt                            ;call the FAR process
        pop     ebp

call_blt_exit::
        ret                                       ;Hey, we're done!

far_do_cblt ENDP

;/*
;**       Subroutines.  These have been included with the aim of
;**       segregating device dependent code from independent code,
;**       while cleanly preserving the local variable frame.
;*/
        include surface.blt                       ;Surface preprocessing
        include pattern.blt                       ;Pattern preprocessing
        include computey.blt                      ;compute_y procedure
        include special.blt                       ;non-compiled blt subroutines

ifdef SLITE
        include starlite.blt            ;Starlight non-compiled blt subroutines
endif

        include exit.blt                          ;device-specific cleanup before exit

        public  cur_exclusion_not_sbb
        public  cur_exclusion_y
        public  cur_exclusion_union
        public  cur_exclusion_no_union
        public  cur_exclusion_do_it
        public  cur_exclusion_end
        public  phase_proc_10
        public  phase_proc_stepping_right
        public  phase_proc_two_fetches
        public  pp_second_fetch_really_needed
        public  pp_only_one_init_fetch
        public  phase_proc_last_fetch_needed
        public  phase_proc_stepping_left
        public  pp_double_fetch
        public  pp_double_fetch_really_needed
        public  pp_one_initial_fetch
        public  pp_need_last_fetch
        public  phase_proc_both_directions
        public  phase_proc_60
        public  phase_processing_y
        public  phase_proc_70
        public  phase_proc_80
        public  phase_proc_90
        public  phase_proc_100
        public  bitblt_exit
        public  far_do_cblt
        public  call_blt_no_source
        public  call_blt_get_dest_bits
        public  call_stackblt
        public  call_blt_exit

_TUNE ENDS
end
