;*DDK*************************************************************************/
;
; COPYRIGHT (C) Microsoft Corporation, 1989
; COPYRIGHT    Copyright (C) 1995 IBM Corporation
;
;    The following IBM OS/2 WARP source code is provided to you solely for
;    the purpose of assisting you in your development of OS/2 WARP device
;    drivers. You may use this code in accordance with the IBM License
;    Agreement provided in the IBM Device Driver Source Kit for OS/2. This
;    Copyright statement may not be removed.;
;*****************************************************************************/
        page    ,132
;/*****************************************************************************
;*
;* SOURCE FILE NAME = MEMBLT.ASM
;*
;* DESCRIPTIVE NAME = BitBLT at level of device driver. 
;*
;*
;* VERSION      V2.0
;*
;* DATE         1983
;*
;* DESCRIPTION  
;*
;*    This is the main module of those comprising the source to BitBLT
;*    (Bit BLock Transfer) for Microsoft         display drivers. It
;*    defines the procedure, and performs general preprocessing for all BLT
;*    requests.
;*   
;*    BitBLT  transfers a rectangle of bits from source to destination,
;*    doing some useful operations on the way, namely:
;*   
;*    o     excluding the cursor;
;*   
;*    o     performing a user-specified raster operation, out of
;*          a vast array of choices, which takes the form
;*   
;*          D = f(S,D,P)
;*   
;*          where S = source bit, D = destination bit, P = pattern
;*          bit, and  f  is a sequence of logical operations (AND, OR,
;*          XOR, NOT) on S, D, and P;
;*                  
;*    o     recognizing common special cases for accelerated processing.
;*   
;*   
;*    For a detailed explanation of the contortions BitBLT goes through
;*    to put your bits in place, see the file COMMENT.BLT.
;*   
;*   
;*    BitBLT consists of the following files:
;*   
;*          BITBLT.ASM                       procedure definition
;*          CBLT.ASM                         procedure to compile arbitrary BLT on stack
;*   
;*          FRAME.BLT                        function parameters and locals
;*          CONSTANT.BLT                     constants
;*          DATADEFS.BLT                     compiled code templates and data
;*          ROPTABLE.BLT                     table of ROP templates and definitions
;*          SURFACE.BLT                      Surface processing
;*          PATTERN.BLT                      pattern preprocessing
;*   
;*          COMPUTEY.BLT                     compute y-related values
;*          SPECIAL.BLT                      special case code
;*   
;*          COMMENT.BLT                      overview of history and design
;*              
;* FUNCTIONS    MemBlt
;*              far_do_cblt
;*
;* NOTES        NONE
;*
;* STRUCTURES   NONE
;*
;* EXTERNAL REFERENCES
;*
;*              NONE
;*
;* EXTERNAL FUNCTIONS
;*
;*              NONE
;*
;* CHANGE ACTIVITY =
;*   DATE      FLAG        APAR   CHANGE DESCRIPTION
;*   --------  ----------  -----  --------------------------------------
;*   mm/dd/yy  @Vr.mpppxx  xxxxx  xxxxxxx
;*
;*****************************************************************************/

        ??_out  MemBlt

        .286p
        .xlist
        include cmacros.inc
        include pmgre.inc
DINCL_BB_ROPS   equ     1
        include driver.inc
        include display.inc
        include 8514.inc
        include 8514mem.inc
        include oemblt.inc
        include assert.mac
        include njmp.mac
        .list


;/*
;** command definitions to send to 8514/A hw during inquiry blts from the hw
;** to memory bitmaps for the compiled blt code.
;*/

CMD_CBLT_8BPP   =       (\
                         CMD_C_HRECT+CMD_WORD+CMD_FV_VAR+\
                         CMD_DX+CMD_DY+CMD_MA_ACCESS+CMD_PA_ONE+CMD_RW_R\
                        )

CMD_CBLT_4BPP   =       (\
                         CMD_C_HRECT+CMD_BYTE+CMD_BYTE_LO+CMD_FV_VAR+\
                         CMD_DX+CMD_DY+CMD_MA_ACCESS+CMD_PA_ONE+CMD_RW_R\
                        )


        externA DOSHUGEINCR
        externFP far_exclude                      ; Exclude area from screen
        externFP far_unexclude                    ; Restore excluded area to screen

sBegin  Bitmap
        externNP CBLT                             ; (must be NP, even though defined
                                                  ;  as FAR -- see CBLT.ASM)
sEnd    Bitmap

sBegin  PtrData
        externB screen_busy                       ; cursor exclusion semaphore
sEnd    PtrData


sBegin  Data
        externW hwFlags                           ; hw flags
        externW ipc_index_mask                    ; mask to just bpp
        externW selCompileCode
sEnd    Data


sBegin  Code
        assumes cs,Code
        assumes ds,nothing
        assumes es,nothing

        include constant.blt
DEFINE_ROPTABLE    equ     1
        include roptable.blt

        externW CodeData
        externW MyPtrCodeData                     ; for access to pointer data segment
        externNP copy_dev                         ; included in boardblt.asm

cProc   MemBlt,<FAR,PUBLIC,NODATA>,<si,di>
        include frame.blt

;/*
;** These are used by all or most of the low level routines
;*/

        localW  ScanOff                           ; offset into each dest scan
        localW  TwiceDestScan                     ; double(?) the # of bytes / dest scan
        localW  HalfDestScan                      ; half(?) the # of bytes / dest scan
        localW  SolidFillColor                    ; for rops DDx,DDno,P,Pn in special code

        localW  LeftEdge                          ; flag left edge or not
        localW  CenterWords                       ; # whole words in center of blt
        localW  RightEdge                         ; flag right edge or not


cBegin

ifdef FIREWALLS
        fw_zero <ds,es>
        mov     ax,pddcDst.off
        ddc?    ax,<SURFACE>
endif

        subttl  ROP Preprocessing
        page

;/*
;**       Get the encoded raster operation, and map the raster op if needed.
;**
;**       To map the ROPS 80h through FFh to 00h through 7Fh, take the
;**       1's complement of the ROP, and invert the "negate needed" flag.
;*/

        cld                                       ; Let's make no assumptions about this!
        xor     ax,ax                             ; Assume not 80h : FFh
        mov     bx,usMix
        or      bl,bl                             ; Is this in the second half (80-FF)?
        jns     parse_10                          ;   No, rop index is correct
        not     bl                                ;   Yes, want the inverse
        mov     ah,HIGH NEGATE_NEEDED             ; Want to invert the not flag
        .errnz  LOW NEGATE_NEEDED

parse_10:
        add     bx,bx                             ; Turn into a word index
        xor     ax,roptable[bx]                   ; Get ROP, maybe toggle negate flag
        mov     usMixData,ax                      ; Save the encoded raster operation

        mov     bl,ah                             ; Set fbF0 for src and pattern
        and     bl,HIGH (SOURCE_PRESENT or PATTERN_PRESENT)
        ror     bl,1

        .errnz  SOURCE_PRESENT                   - 0010000000000000b
        .errnz  PATTERN_PRESENT - 0100000000000000b
        .errnz  F0_SRC_PRESENT                   -   00010000b
        .errnz  F0_PAT_PRESENT                   -   00100000b


;/*
;** We have picked up the raster op information.   Get the source, pattern,
;** and destination parameters as needed.
;*/

        mov     ax,ss                             ;Leave ES set to frame segment
        mov     es,ax
        assumes es,nothing

        call    pdevice_processing
        assumes ds,Data                           ;Will be data from here on out
        call    pattern_preprocessing
page

;/*
;**  Cursor Exclusion
;** 
;**  We only do memory to memory or board to memory blts here, so we
;**  only have to check whether or not the source bitmap is the
;**  device. If it is we should exclude the cursor from the source
;**  rectangle.
;*/

cursor_exclusion:

        mov     si,cxExt
        mov     di,cyExt
        test    fbF0,F0_SRC_IS_DEV; is the source on the device ?
        jz      @F                                ; no -- no exclusion needed...

        mov     cx,xSrc                          ; set exclusion rect ul x
        mov     dx,ySrc                          ; set exclusion rect ul y
        add     si,cx                             ; set right + 1
        dec     si                                ; set exclusion rect lr x
        add     di,dx                             ; set bottom + 1
        dec     di                                ; set exclusion rect lr y
        call    far_exclude                           ; exclude the area from the screen

        GrabScreen    memblt                      ; don't allow cursor to interfere

        WaitQ   4
        outwQ   XMIN,(XMIN_2DECODE+0)            ; min left side
        outwQ   YMIN,(YMIN_2DECODE+0)            ; min top side
        outwQ   XMAX,(XMAX_2DECODE+1023); max right side
        outwQ   YMAX,(YMAX_2DECODE+1023); max bottom side
@@:

;/*
;** Check for cases able to be done without full compilation of code.
;*/

        call    check_device_special_cases
        jnc     @F                                ; blt not done with special case code...
        jnz     bitblt_exit                       ; error from special case code...
        jmp     bitblt_exit_fail                 ; error from special case code...
@@:

;/*
;** Use the generic compiled blt to execute any blt cases not handled via
;** special case routines in check_device_special_cases (above):
;*/

        call    far ptr far_do_cblt

        page

;/*
;**     exit - leave BitBLT
;**
;**     Well, the BLT has been processed.  Restore the stack to its
;**     original status, restore the saved user registers, show no
;**     error, and return to the caller.
;**
;**     Entry:  None
;**
;**     Exit:   AX = 1
;**
;**     Uses:   All
;*/

bitblt_exit:
        mov     ax,1                              ;Clear out error register (good exit)
        errn$   bitblt_exit_fail

;/*
;**     bitblt_exit_fail - exit because of failure
;**
;**     The BLT is exited.
;**
;**     Entry:  AX = error code (0 if error)
;*/

bitblt_exit_fail:
        cld                                       ; leave direction cleared
        call    far_unexclude                     ; remove any exclusion area

        ReleaseScreen  memblt                     ; allow cursor interference now

        mov     ax,BBRC_NORMAL                    ;Want pass 2 if image data blt
        fw_zero <cx,ds,es>
cEnd

;/*
;**       Subroutines.  These have been included with the aim of
;**       segregating device dependent code from independent code,
;**       while cleanly preserving the local variable frame.
;*/

        include surface.blt                       ;Surface preprocessing
        include pattern.blt                       ;Pattern preprocessing
        include special.blt                       ;non-compiled blt subroutines

sEnd    Code

sEnd    Code

        subttl  Phase Processing (X)
        page

sBegin  Bitmap
        assumes cs,Bitmap

        externW BitmapData

;/*
;** This code is in a separate segment since it's relatively rare.
;*/

        include computey.blt                      ;compute_y procedure
        include gendata.blt

;/*
;** Now the real work comes along:  In which direction will the
;** copy be done?  Refer to the 10 possible types of overlap that
;** can occur (10 cases, 4 resulting types of action required).
;**
;** If there is no source bitmap involved in this particular BLT,
;** then the path followed must allow for this.  This is done by
;** setting both the destination and source parameters equal.
;*/


cProc   far_do_cblt,<PUBLIC,FAR,NODATA,NONWIN>
cBegin

phase_processing:
phase_processing_x:

        mov     cl,fbF0                          ; we'll use these flags a bit

        mov     dx,cxExt                          ;Get X extent
        dec     dx                                ;Make X extent inclusive

        mov     di,xDst                          ; get dest x origin

ifdef   4BPP_USED
        test    cl,F0_DEST_IS_COLOR              ; color destination ?
        jz      phase_dest_mono                   ; no, mask to bit within byte...

        sub     bx,bx                             ; assume 8 bpp, 0 phase
        test    dl_hwFlags,HW_8_BPP              ; correct ?
        jnz     @F                                ; yes...
        test    di,1                              ; no -- 4 bpp, phase is 0 or 4
        jz      @F                                ; was left 4 bpp pixel, 0 phase
        mov     bl,4                              ; was right 4 bpp pixel, 4 phase
        jmp     short @F
else
        sub     bx,bx                             ; assume 8 bpp, 0 phase
        test    cl,F0_DEST_IS_COLOR              ; color destination ?
        jnz     @F                                ; no, mask to bit within byte...
endif;  4BPP_USED

phase_dest_mono:
        mov     bx,di                             ; get dest x origin
        and     bx,00000111b                      ; mono dest, get bit within byte
@@:

;/*
;**  If there is no source, then just use the pointer to the destination
;**  bitmap and load the same parameters, which will cause the "equality"
;**  path to be followed in the set-up code.  This path is the favored
;**  path for the case of no source bitmap.
;*/

        mov     si,di                             ;Assume no source needed
        test    cl,F0_SRC_PRESENT                ;Is a source needed?
        jz      phase_proc_10                     ;  No, just use destination parameters
        mov     si,xSrc                          ;  Yes, get source origin X

;/*
;**  Next: Assume two initial fetches (if no source, then it will be
;**  set = 1 later) -- and zero the other flags.
;*/

        mov     fbFetch,FF_TWO_INIT_FETCHES
        shr     cl,2                              ; SRC_IS_COLOR flag to DEST_IS_COLOR pos

phase_proc_10:

ifdef   4BPP_USED
        test    cl,(F0_SRC_IS_COLOR shr 2); color source ?
        .errnz  ((F0_SRC_IS_COLOR shr 2) xor F0_DEST_IS_COLOR)
        jz      phase_src_mono                    ; no, mask to bit within byte...

        sub     ax,ax                             ; assume 8 bpp, 0 phase
        test    dl_hwFlags,HW_8_BPP              ; correct ?
        jnz     @F                                ; yes...
        test    si,1                              ; no -- 4 bpp, phase is 0 or 4
        jz      @F                                ; was left 4 bpp pixel, 0 phase
        mov     al,4                              ; was right 4 bpp pixel, 4 phase
        jmp     @F
else
        sub     ax,ax                             ; assume 8 bpp, 0 phase
        test    cl,(F0_SRC_IS_COLOR shr 2); color source ?
        .errnz  ((F0_SRC_IS_COLOR shr 2) xor F0_DEST_IS_COLOR)
        jnz     @F                                ; no, mask to bit within byte...
endif;  4BPP_USED

phase_src_mono:
        mov     ax,si                             ; get source x origin
        and     ax,00000111b                      ; mono src, get bit within byte
@@:

        cmp     si,di                             ; Which direction will we be moving?
        jge     @F                                ; move from left to right...
        jmp     phase_proc_stepping_left; Move from right to left
@@:

phase_proc_stepping_right:

;/*
;**  The starting X of the source rectangle is >= the starting X of
;**  the destination rectangle, therefore we will be moving bytes
;**  starting from the left and stepping right.
;**
;**  Alternatively, this is the path taken if there is no source
;**  bitmap for the current BLT.
;**
;**  Rectangle cases: 3,4,5,6,8
;*/

        mov     iStepDir,STEPRIGHT;Set direction of move
        mov     ah,bitmask_tbl1[bx]              ;Get starting byte mask
        mov     gl_start_bit,bl                   ; save starting byte bit #

        sub     al,bl                             ;Compute horiz. phase  (source-dest)
        ja      phase_proc_two_fetches           ;Scan line case 2, everything is
                                                  ;  already set for this case.

;/*
;**  Scan line cases 1 and 3:
;**  The correct first byte fetch needs to be set for the beginning
;**  of the outer loop, and the phase must be made into a positive
;**  number.
;**
;**  This is the path that will be followed if there is no source bitmap
;**  for the current BLT.
;**  For speed use MOV 0 rather than AND NOT TWO_INIT_FETCHES
;**  (The other flags in fbFetch have not yet been set)
;*/

        mov     fbFetch,FF_ONE_INIT_FETCH
        jmp     short pp_only_one_inifetch

phase_proc_two_fetches:

;/*
;**  If we get all the bits we need in the first fetch then a second
;**  (unnecessary) fetch could cause a GP Fault.   So let's examine this:
;**  The number of bits from (SI mod 8) to the end of the byte is the number
;**  of available bits we get on the first fetch.  This is (8 - (SI mod 8)).
;**  If this is greater than or equal to cxExt then we have all the bits we
;**  need and we better not do the second fetch (even tho the phase
;**  relationship may suggest we need it).
;**
;**  Conclusion:  if (8 - (SI mod 8)) >= cxExt then DO NOT make second fetch.
;*/

ifdef FIREWALLS
        push    bx
        push    cx        ; Note that (SI mod 8) is currently (AL + BL)
        mov     cx,si
        and     cl,7      ; this gives (SI mod 8) in CL
        add     bl,al     ; this gives AL+BL in BL
        assert  bl,E,cl   ; are they the same?
        pop     cx
        pop     bx
endif
        mov     cx,8
        sub     cl,bl
        sub     cl,al

;/*
;**  We can save a couple cycles here since cxExt - 1 is already in DX.
;**  The condition CX >= cxExt is the same as CX > DX.
;*/

        cmp     cx,dx                            ; CX = (SI mod 8),  DX = (cxExt - 1)
        jle     pp_second_fetch_really_needed

;/*
;**  We are here BECAUSE the cxExt is so small that we can get all the bits
;**  on the scanline with a single lodsb (no byte boundary is crossed) AND
;**  the phase relationship indicates that a second initial fetch is needed.
;**
;**  We will override it and only do one fetch.  However, if we simply
;**  fail to do the second fetch then the phase code will       us.
;**  It will be expecting the bits to get fetched in the first fetch, saved
;**  after the rotate, and mixed in in the second fetch's phase code.
;**  So after the first fetch the bits have been saved in BH, and ANDed out
;**  of the src data in AL.
;**
;**  The solution is to set a flag here that tells the phase generation code
;**  not to generate the usual masking part of the phase code.
;**
;** Short Bitblt Cases:                             (8 bits or less)
;**
;**       1) neither crosses byte boundary.
;**
;**          a) phase requires second initial fetch
;**
;**             Kill the phase masking.  It will       us.  There will
;**             be just one lodsb and one stosb and the first byte mask
;**             will protect the dest bits that should not get hit.
;**             Furthermore if a se
;**
;**          b) phase requires only one initial fetch
;**
;**             Phase masking is irrelevant.  Removing it would
;**             be an optimiztation.
;**
;**       2) dest crosses byte boundary, but src does not
;**
;**          a) phase requires second initial fetch
;**
;**             impossible situation:  the way we determine that a 2nd fetch
;**             is necessary is if the first fetch does not get enough needed
;**             bits to satisfy the first dest byte.  Here the first fetch
;**             gets ALL the bits and the first dest byte needs less than
;**             ALL because it crosses a byte boundary.
;**
;**          b) phase requires only one initial fetch
;**
;**             Intervention would be bad.  None is necessary since the 2nd
;**             initial fetch will not be done.  If we do intervene we will
;**             cause trouble:  Killing the masking will prevent the
;**             "saved bits" from being saved.  The first byte masking
;**             can kill off these bits in AL and they will never
;**             make it to the second stosb.
;**
;**       3) src crosses byte boundary  (dest may or may not)
;**          (this is known to be untrue at this point)
;**
;**          There are bits we need in the second fetch, so a second
;**          initial fetch can not cause a GP fault.  Therefore do
;**          everything the same as we would have before.
;**
;**
;** Conclusion:  Intervention to kill the phase masking is
;**              necessary iff
;**                 [src does not cross byte boundary] AND
;**                 dest does not cross byte boundary  AND
;**                 [phase requires second initial fetch].
;**              and     if
;**                 dest crosses byte boundary, but [src does not]
;**
;** Statements in [] are known to be true at this point.
;**
;** Solution:
;**
;** If we always kill the phase-masking when neither crosses a byte boundary
;** and never kill it otherwise then everyone will be happy.  (regardless
;** of other conditions like whether phase requests a 2nd initial fetch).
;*/

;/*
;**  We know that fbFetch == FF_TWO_INIT_FETCHES
;**  But we want to clear this bit (i.e. we want it to do only one initial
;**  fetch so it wont GP) and we want to set FF_ONLY_1_SRC_BYTE so if the
;**  dest bits don't cross a byte boundary we will know to kill the
;**  phase-masking code.  We can skip the ANDs and ORs and just move:
;*/

        mov     fbFetch,FF_ONLY_1_SRC_BYTE
        .errnz  FF_ONE_INIT_FETCH
pp_second_fetch_really_needed:
pp_only_one_inifetch:
        mov     ch,ah

;/*
;**     We now have the correct phase and the correct first character fetch
;**     routine set.  Save the phase and ...
;**
;**     currently:   AL = phase
;**                  BL = dest start mod 8
;**                  CH = first byte mask
;**                  DX = inclusive X bit count
;**                  SI = source X start (if there is a source)
;**                  DI = destination X start
;*/

        add     al,8                              ;Phase must be positive
        and     al,00000111b

;/*
;**  To calculate the last byte mask, the inclusive count can be
;**  added to the start X MOD 8 value, and the result taken MOD 8.
;**  This is attractive since this is what is needed later for
;**  calculating the inclusive byte count, so save the result
;**  of the addition for later.
;*/

ifdef   4BPP_USED
        test    fbF0,F0_DEST_IS_COLOR
        jnz     @F

geoff_src_mono:
        add     bx,dx                             ;Add inclusive extent to dest MOD 8
        mov     dx,bx                             ;Save for innerloop count !!
        jmp     short wes_r

@@:     test    fbF0,F0_GAG_CHOKE; source must be mono if this set
        jnz     geoff_src_mono
        test    dl_hwFlags,HW_8_BPP              ; 8 bpp destination?
        jz      @F                                ; no...

        sub     cx,cx                             ; no last byte ever
        jmp     short geoff_r

@@:     shr     bx,2                              ; 4->1, 0->0
        add     bx,dx
        and     bx,00000001b
        shl     bx,2                              ; 0->0, 1->4

else
        test    fbF0,F0_DEST_IS_COLOR
        jz      geoff_src_mono
        test    fbF0,F0_GAG_CHOKE; source must be mono if this set
        jnz     geoff_src_mono
        sub     cx,cx                             ; no last byte ever
        jmp     short geoff_r

geoff_src_mono:
        add     bx,dx                             ;Add inclusive extent to dest MOD 8
        mov     dx,bx                             ;Save for innerloop count !!
endif;  4BPP_USED

        public  wes_r
wes_r:

        and     bx,00000111b                      ;Set up bx for a base reg
        mov     cl,bitmask_tbl2[bx]              ;Get last byte mask
        mov     gl_last_bit,bl                    ; save last byte bit #

geoff_r:

;/*
;**  To avoid GP faults must never do an extra fetch we don't need.
;**  When we're ready for the last fetch there may already be enough bits
;**  saved from the previous fetch (which we plan to combine with the bits
;**  in the fetch we are about to do).  If so then we'd better not do this
;**  last fetch (it could cause a GP fault).
;**
;**  The number of bits we have left from the previous byte is (8 - AL)
;**  AL is the phase.  (1 + BL) is the number of bits we actually need
;**  to write to the final destination byte.
;**
;**  So if  (8 - AL) >= (1 + BL)                   then DO NOT do the last fetch. This
;**  simplifies:  if  (BL + AL) <= 7  then DO NOT do the last fetch.
;*/

        add     bl,al
        cmp     bl,7
        jg      phase_proc_last_fetch_needed
        or      fbFetch,FF_NO_LAST_FETCH
phase_proc_last_fetch_needed:

        mov     bl,al                             ; Compute offset into phase mask table
        add     bx,bx                             ; Note BH still = 0
        mov     bx,phase_tbl1[bx]                 ; Get the phase mask

;/*
;**  Currently:
;**          AL = phase
;**          BX = phase mask
;**          CL = last byte mask
;**          CH = first byte mask
;**          DX = inclusive bit count + dest start MOD 8
;**          SI = source X start (if there is a source)
;**          DI = destination starting X
;*/

        jmp     short phase_proc_both_directions


phase_proc_stepping_left:

;/*
;**       The starting X of the source rectangle is < the X of the destination
;**       rectangle, therefore we will be moving bytes starting from the right
;**       and stepping left.
;**
;**       This code should never be reached if there is no source bitmap
;**       for the current BLT.
;**
;**       Rectangle cases: 1,2,7
;*/

        mov     iStepDir,ah                      ;Set direction of move
        .errnz  STEPLEFT

        mov     cl,bitmask_tbl1[bx]              ;Get last byte mask
        mov     gl_last_bit,bl                    ; save last byte bit #
        push    bx                                ;!!! can this be avoided?
        add     ax,dx                             ;Find end of the source

;/*
;**  To calculate the first byte mask, the inclusive count is
;**  added to the start MOD 8 value, and the result taken MOD 8.
;**  This is attractive since this is what is needed later for
;**  calculating the inclusive byte count, so save the result
;**  of the addition for later.
;*/

        add     bx,dx                             ;Find end of the destination
        add     di,dx                             ;Will need to update dest start address
        add     si,dx                             ;  and source's too
        mov     dx,bx                             ;Save inclusive bit count + start MOD 8
        mov     ch,fbF0                 ;but if src is mono and dst is color,
        xor     ch,F0_DEST_IS_COLOR     ;it is in AX
        test    ch,F0_SRC_IS_COLOR or F0_DEST_IS_COLOR
        jnz     @F
        mov     dx,ax                   ;Save inclusive bit count + start MOD 8
@@:
        and     bx,00000111b                      ;Get dest   offset within byte
        and     ax,00000111b                      ;Get source offset within byte
        mov     ch,bitmask_tbl2[bx]              ;Get start byte mask
        mov     gl_start_bit,bl                   ; save starting byte bit #
        cmp     al,bl                             ;Compute horiz. phase  (source - dest)
        jb      pp_double_fetch                  ;Scan line case 5, everything is
                                                  ;  already set for this case.
                                                  ;/*
;**  Scan line cases 4 and 6:
;**  The correct first byte fetch needs to be set for the beginning
;**  of the outer loop
;*/

        mov     fbFetch,FF_ONE_INIT_FETCH
        jmp     short pp_one_inifetch
pp_double_fetch:

;/*
;**  If only-one-fetch is already set, then the following is a NOP.
;**  It doesn't seem worth the effort to check and jmp around.
;**
;**  If we get all the bits we need in the first fetch then a second
;**  (unnecessary) fetch could cause a GP Fault.   So let's examine this:
;**
;**  (DX + SI) points to the first pel (remember we're stepping left).
;**  So the number of needed bits we get in the first fetch is
;**  ((DX + SI + 1) mod 8).  This is currently equal to AX.
;**  If AX >= cxExt then we'd better not do two init fetches.
;*/

        cmp     ax,cxExt
        jl      pp_double_fetch_really_needed
        mov     fbFetch,FF_ONLY_1_SRC_BYTE
        .errnz  FF_ONE_INIT_FETCH
pp_double_fetch_really_needed:
pp_one_inifetch:

        sub     al,bl                             ;Compute horiz. phase  (source-dest)
        add     al,8                              ;Ensure phase positive
        and     al,00000111b

;/*
;**  To avoid GP faults must never do an extra fetch we don't need.
;**  The last byte fetch is unnecessary if Phase is greater than or equal to
;**  8 - BL.  Phase is the number of bits we still have from the previous fetch.
;**  8 - BL is the number of bits we actually need to write to the final
;**  destination byte.  So if AL - (8 - BL) >= 0   skip the last fetch.
;*/

        pop     bx
        add     bl,al
        sub     bl,8
        jl      pp_need_last_fetch
        or      fbFetch,FF_NO_LAST_FETCH
pp_need_last_fetch:

;/*
;**       We now have the correct phase and the correct first character fetch
;**       routine set.  Generate the phase mask and save it.
;**
;**       currently:   AL = phase
;**                    CH = first byte mask
;**                    CL = last byte mask
;**                    DX = inclusive bit count + start MOD 8
;*/

        mov     ah,cl                             ;Save last mask
        mov     cl,al                             ;Create the phase mask
        mov     bx,00FFh                          ;  by shifting this
        shl     bx,cl                             ;  according to the phase
        mov     cl,ah                             ;Restore last mask

phase_proc_both_directions:

;/*
;**  Go compute # of bytes to BLT.
;**  The different processing for the different X directions has been
;**  completed, and the processing which is the same regardless of
;**  the X direction is about to begin.
;**
;**  The phase mask, the first/last byte masks, the X byte offsets,
;**  and the number of innerloop bytes must be calculated.
;**
;**
;**  Nasty stuff coming up here!                   We now have to determine how
;**  many bits will be BLTed and how they are aligned within the bytes.
;**  Here's how we'll do it:
;**
;**  The (inclusive) number of bits is added to the start MOD 8 value
;**  (the left side of the rectangle, minimum X value),
;**  then the result is divided by 8. Then:
;**
;**
;**     1) If the result is 0, then only one destination byte is being
;**        BLTed.  In this case, the start & ending masks will be ANDed
;**        together, the innerloop count (# of full bytes to BLT) will
;**        be zeroed, and the mLast set to all 0's (don't alter any
;**        bits in last byte which will be the byte following the first
;**        (and only) byte).
;**
;**                |      x x x x x|                 |
;**                |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                 0 1 2 3 4 5 6 7
;**
;**                start MOD 8 = 3,  extent-1 = 4
;**                3+7 DIV 8 = 0, only altering one byte
;**
;**
;**
;**     2) If the result is 1, then only two bytes will be BLTed.
;**        In this case, the start and ending masks are valid, and
;**        all that needs to be done is set the innerloop count to 0.
;**        (it is true that the last byte could have all bits affected
;**        the same as if the innerloop count was set to 1 and the
;**        last byte mask was set to 0, but I don't think there would be
;**        much time saved special casing this).
;**
;**                |  x x x x x x x|x x x x x x x|
;**                |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                 0 1 2 3 4 5 6 7
;**
;**                start MOD 8 = 1,  extent-1 = 14
;**                3+14 DIV 8 = 1.  There is a first and last
;**                byte but no innerloop count
;**
;**
;**
;**     3) If the result is >1, then there is some number of entire
;**        bytes to be BLted by the innerloop.  In this case the
;**        number of innerloop bytes will be the result - 1.
;**
;**                |                               x|x x x x x x x x|x
;**                |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                 0 1 2 3 4 5 6 7
;**
;**                start MOD 8 = 7,  extent-1 = 9
;**                7+9  DIV 8 = 2.  There is a first and last
;**                byte and an innerloop count of 1 (result - 1)
;**
;**        Currently:      AL = horizontal phase
;**                        BX = horizontal phase mask
;**                        CH = first byte mask
;**                        CL = last byte mask
;**                        DX = left side X MOD 8 + inclusive X count
;**                        SI = source start X
;**                        DI = dest   start X
;*/

        mov     iHorzPhase,al                     ;Save horizontal phase
        mov     mPhase,bx                         ;Save phase mask

        test    fbF0,F0_DEST_IS_COLOR
        jz      @F
        test    fbF0,F0_GAG_CHOKE; is there a source and is it mono?
        jnz     @F                                ; yes -- treat as mono...

        or      dx,dx
        jz      phase_proc_55
        inc     dx
        jmp     short phase_proc_60

@@:     shr     dx,3                              ;/8 to get full byte count
        jnz     phase_proc_60                     ;Result is >0, check it out

phase_proc_55:

;/*
;**       There will only be one byte affected.  Therefore the two byte masks
;**       must be combined, the last byte mask cleared, and the innerloop
;**       count set to zero.
;*/

        or      fbFetch,FF_ONLY_1_DEST_BYTE
        and     ch,cl                             ;Combine the two masks
        xor     cl,cl                             ;Clear out the last byte mask
        inc     dx                                ;Now just fall through to set
        errn$   phase_proc_60                     ;  the innerloop count to 0!


phase_proc_60:
        dec     dx                                ;Dec count (might become 0 just like
        mov     cInnerByte,dx                    ;  we want), and save it
        mov     bl,ch
        mov     ch,cl                             ;Compute last byte mask
        not     ch                                ;  and save it
        mov     mLast,cx
        mov     bh,bl                             ;Compute start byte mask
        not     bl                                ;  and save it
        mov     mStart,bx

;/*
;**  There may or may not be a source bitmap for the following address
;**  computation.  If there is no source, then the vertical setup code
;**  will be entered with both the source and destination Y's set to the
;**  destination Y and the address calculation skipped.  If there is a
;**  source, then the address calculation will be performed and the
;**  vertical setup code entered with both the source and destination Y's.
;*/

phase_processing_y:
        mov     cl,fbF0
        test    cl,F0_DEST_IS_COLOR
        jnz     @F
        shr     di,3                              ;Compute byte offset of destination
                                                  ;  and add to current destination
                                                  ;  offset
@@:     add     devDst.lp_bits.lo,di

        mov     dx,yDst                          ;Get destination Y origin
        mov     ax,dx                             ;Assume no source

        test    cl,F0_SRC_PRESENT                ;Is a source needed?
        jz      phase_proc_70                     ;  No, skip source set-up

        test    cl,F0_SRC_IS_COLOR
        jnz     @F
        shr     si,3                              ;Compute byte offset of source
                                                  ;  and add to current source offset
@@:     add     devSrc.lp_bits.lo,si
        mov     ax,ySrc                          ;Get source Y origin


        subttl  Phase Processing (Y)
        page

;/*
;**       The horizontal parameters have been calculated.  Now the vertical
;**       parameters must be calculated.
;**
;**       Currently:
;**               DX = destination Y origin
;**               AX = source Y origin (destination origin if no source)
;**               CL = fbF0
;*/

phase_proc_70:
        mov     bx,cyExt                          ;Get the Y extent of the BLT
        dec     bx                                ;Make it inclusive


;/*
;**     The BLT will be Y+ if the top of the source is below or equal
;**     to the top of the destination (cases: 1,4,5,7,8).  The BLT
;**     will be Y- if the top of the source is above the top of the
;**     destination (cases: 2,3,6)
;**
;**
;**               !...................!
;**               !D                                   !
;**           ____!                              ..x   !
;**          |S   !                                :   !     Start at top of S walking down
;**          |    !                                    !
;**          |    !...................!
;**          |                                     :
;**          |____________________:
;**
;**
;**           __________________
;**          |S                                  |
;**          |    .....................     Start at bottom of S walking up
;**          |    !D                                   !
;**          |    !                              :     !
;**          |____!           ..x     !
;**               !                                    !
;**               !....................
;*/


        mov     ch,INCREASE                       ;Set Y direction for top to bottom
        cmp     ax,dx                             ;Which direction do we move?
        jge     phase_proc_80                     ;Step down screen (cases: 1,4,5,7,8)


;/*
;**       Direction will be from bottom of the screen up (Y-)
;**
;**       This code will not be executed if there is no source since
;**       both Y's were set to the destination Y.
;*/


        add     dx,bx                             ;Find bottom scan line index for
        add     ax,bx                             ;  destination and source
        mov     ch,DECREASE                       ;Set pattern increment

phase_proc_80:
        add     yPatRow,dl                        ;Set pattern row and increment
        mov     iDir,ch
        sar     ch,1                              ;Map FF==>FF, 01==>00
        .errnz  DECREASE - (-1)
        .errnz  INCREASE -   1

;/*
;**     The Y direction has been computed.  Compute the rest of the
;**     Y parameters.  These include the actual starting address,
;**     the scan line and plane increment values, and whether or not
;**     the extents will cross a 64K boundary.
;**
;**     Currently:
;**             DX = Y of starting destination scan
;**             AX = Y of starting source scan
;**             CH = BLT direction
;**                    00 = increasing BLT, Y+
;**                    FF = decreasing BLT, Y-
;**             CL = fbF0
;**             BX = inclusive Y extent
;*/

phase_proc_90:
        test    cl,F0_SRC_PRESENT                ; is a source needed?
        jz      phase_proc_100                    ; no, skip source set-up...
        test    cl,F0_SRC_IS_DEV                 ; is the source the device?
        jnz     phase_proc_100                    ; yes -- no source set-up needed...

        mov     cl,ch                             ; want CX = +/- 1
        push    dx                                ; save destination Y
        push    bp                                ; mustn't trash frame pointer
        lea     bp,devSrc                         ; --> source data structure
        call    compute_y                         ; process as needed
        pop     bp                                ; restore frame pointer
        pop     dx                                ; restore destination Y

phase_proc_100:
        mov     cl,ch                             ; want CX = +/- 1
        push    bp                                ; mustn't trash frame pointer
        mov     ax,dx                             ; put destination Y in ax
        lea     bp,devDst                         ; --> destination data structure
        call    compute_y                         ; process as needed
        pop     bp                                ; restore frame pointer


        subttl  Memory allocation for BLT compilation
        page

;/*
;**     Allow room for the BLT code.  The maximum that can be generated
;**     is defined by the variable MAX_BLT_SIZE.  This variable must be
;**     an even number.
;*/

cblt_allocate:

        xor     ax,ax                             ;Error code !!!!
        cmp     sp,MAX_BLT_SIZE+80h              ; is there room for the blt ?
        jnb     @F                                ; yes...
        jmp     do_cblt_exit                      ; no: return 0.
@@:     sub     sp,MAX_BLT_SIZE                  ; take off the slop

        mov     di,sp
        mov     pfnBlt.lo,di                     ;Save the address for later
        mov     ax,ss                             ;Set the segment for the BLT
        mov     es,ax
        assumes es,nothing

ifdef FIREWALLS
        push    di                                ; Clean the blackboard
        mov     ax,0CCCCh
        mov     cx,MAX_BLT_SIZE / 2
        rep     stosw
        pop     di
endif

        mov     ds,BitmapData
        assumes ds,Data
        mov     ax,selCompileCode
        mov     pfnBlt.hi,ax                      ;Save the address for later
        mov     ax,cs                             ;Set data seg to CS so we can access
        mov     ds,ax                             ;  code without overrides
        assumes ds,Code
        xor     cx,cx                             ;Clear out count register

        cCall   CBLT                              ;compile the BLT onto the stack
        assumes ds,nothing
        assumes es,nothing

        page

;/*
;**       The BLT has been created on the stack.   Set up the initial registers,
;**       set the direction flag as needed, and execute the BLT.
;*/

        test    fbF0,F0_SRC_PRESENT;Is there a source?
        jz      call_blt_no_source               ; no -- don't load its pointer...
        test    fbF0,F0_SRC_IS_DEV; is the source the device ?
        jnz     call_blt_src_is_dev              ; yes -- don't load the pointer...

        lds     si,devSrc.lp_bits                ; --> source device's first byte
        assumes ds,nothing
        jmp     short   call_blt_get_dest_bits

call_blt_src_is_dev:

        WaitQ   7
        outbQ   READ_ENABLE,READ_ALL_PLANES ; read from all planes
        outwQ   MODE,(MODE_2DECODE+MD_PS_ONES+MD_AF_R_THRU)
        mov     cx,CMD_CBLT_8BPP                 ; assume running 8 bpp

ifdef   4BPP_USED
        test    dl_hwFlags,HW_8_BPP              ; assumption correct ?
        jnz     @F                                ; yes...
        mov     cx,CMD_CBLT_4BPP                 ; no -- load 4bpp command word
endif;  4BPP_USED

@@:     mov     si,cxExt                           ; 1-based width of blt
        dec     si                                ; 0-based width of blt
        outwQ   LX,si                             ; set width of blt
        mov     di,cyExt                           ; 1-based height of blt
        dec     di                                ; 0-based height of blt
        outwQ   LY,di                             ; set height of blt
        mov     ax,ySrc                          ; preload y origin
        cmp     iDir,DECREASE                    ; moving down in y ?
        jne     @F                                ; no...
        and     cx,NOT CMD_DY                     ; yes -- set for moving down in y
        add     ax,di                             ; and point to greatest y in blt
@@:     mov     bx,xSrc                          ; preload x origin
        cmp     iStepDir,STEPLEFT; moving left in x ?
        jne     @F                                ; no...
        and     cx,NOT CMD_DX                     ; yes -- set for moving left in x
        add     bx,si                             ; and point to greatest x in blt
@@:     outwQ   Y0,ax                             ; source ul y
        outwQ   X0,bx                             ; source ul x
        outwQ   CMD_FLAGS,cx

        WaitQIN                                   ; wait for available queued data
        mov     si,COLOR_1                        ; point SI at variable data port

call_blt_no_source:
        test    fbF0,F0_PAT_PRESENT              ;Is there a pattern?
        jz      call_blt_get_dest_bits           ;  No, we won't be using DS
        mov     ds,pBrush.sel                    ; If DS is not used for the src then
        assumes ds,nothing                        ; we can leave it pointing at the pat

call_blt_get_dest_bits:
 
        les     di,devDst.lp_bits                ;--> destination device's first byte
        assumes es,nothing
        mov     cx,cyExt                          ;Get count of lines to BLT
        cld                                       ;Assume this is the direction
        cmp     iStepDir,STEPRIGHT ;Stepping to the right?
        jz      call_stackblt                     ;  Yes
        std

        public  call_stackblt
call_stackblt:
        push    bp                                ;MUST SAVE THIS
        call    pfnBlt                           ;Call the FAR process
        pop     bp
        cld
        add     sp,MAX_BLT_SIZE                   ;Return BLT space

do_cblt_exit:

cEnd

sEnd    Bitmap

        public  parse_10
        public  cursor_exclusion
        public  phase_processing
        public  phase_processing_x
        public  phase_dest_mono
        public  phase_proc_10
        public  phase_src_mono
        public  phase_proc_stepping_right
        public  phase_proc_two_fetches
        public  pp_second_fetch_really_needed
        public  pp_only_one_inifetch
        public  geoff_src_mono
        public  geoff_r
        public  phase_proc_last_fetch_needed
        public  phase_proc_stepping_left
        public  pp_double_fetch
        public  pp_double_fetch_really_needed
        public  pp_one_inifetch
        public  pp_need_last_fetch
        public  phase_proc_both_directions
        public  phase_proc_55
        public  phase_proc_60
        public  phase_processing_y
        public  phase_proc_70
        public  phase_proc_80
        public  phase_proc_90
        public  phase_proc_100
        public  cblt_allocate
        public  call_blt_src_is_dev
        public  call_blt_no_source
        public  call_blt_get_dest_bits
        public  call_stackblt
        public  bitblt_exit
        public  bitblt_exit_fail


        end
