;*DDK*************************************************************************/
;
; COPYRIGHT    Copyright (C) 1995 IBM Corporation
;
;    The following IBM OS/2 WARP source code is provided to you solely for
;    the purpose of assisting you in your development of OS/2 WARP device
;    drivers. You may use this code in accordance with the IBM License
;    Agreement provided in the IBM Device Driver Source Kit for OS/2. This
;    Copyright statement may not be removed.;
;*****************************************************************************/
        page    ,132

;/*****************************************************************************
;*
;* SOURCE FILE NAME = STRCHBLT.ASM
;*
;* DESCRIPTIVE NAME = StretchBlt at level of device driver.
;*
;*
;* VERSION      V2.0
;*
;* DATE         04/03/90
;*
;* DESCRIPTION  StretchBlt at level of device driver.     
;*              
;*              
;* FUNCTIONS    OEMStretchBlt
;*
;* NOTES        NONE
;*
;* STRUCTURES   NONE
;*
;* EXTERNAL REFERENCES
;*
;*              NONE
;*
;* EXTERNAL FUNCTIONS
;*
;*              NONE
;*
;* CHANGE ACTIVITY =
;*   DATE      FLAG        APAR   CHANGE DESCRIPTION
;*   --------  ----------  -----  --------------------------------------
;*   mm/dd/yy  @Vr.mpppxx  xxxxx  xxxxxxx
;*   07/03/90                     Written by Viroon Touranachun 
;*
;*****************************************************************************/

        .386

        OPTION  OLDSTRUCTS

        .xlist

INCL_DDICOMFLAGS    EQU         1
INCL_DOSERRORS      EQU         1
INCL_FONTFILEFORMAT EQU         1
INCL_GRE_CLIP           equ     1
INCL_GRE_SCANS          equ     1
INCL_GPIREGIONS         equ     1
INCL_GPIERRORS          equ     1
INCL_GPIPRIMITIVES  EQU 1
INCL_GRE_DCS        EQU 1
INCL_SAADEFS        EQU 1
INCL_GPIBITMAPS         equ     1       ; for bitmap info structure
INCL_DDIMISC            equ     1       ; for BBP structure
DINCL_ENABLE            equ     1
DINCL_BITMAP            equ     1

        include pmgre.inc
        include driver.inc
        include display.inc
        include egafam.inc
        include oemblt.inc
        include assert.mac
        include extern.inc
        include protos.inc

        .list

        .MODEL FLAT
    
        ASSUME  CS:FLAT,SS:FLAT,DS:FLAT,ES:FLAT

        .DATA

        EXTERN  asDDARun        :DWORD         ; CMPLCODE.ASM
        EXTERN  asPlaneBuff     :BYTE         ; CMPLCODE.ASM

OSB_Y_SHRINK            equ     01h     ; compression in vertical direction
OSB_HUGE_BITMAP         equ     02h     ; huge source bitmap
XOS_NEXT_MONO_PLANE     equ     2       ; the mono=>color munge entry size

        .CODE

        EXTERN  clean_up_before_exit_no_test:NEAR       ; EXIT.BLT


;/***************************************************************************
;*
;* FUNCTION NAME = OEMStretchBlt 
;*
;* DESCRIPTION   = This function is a working routine to do the stretchblt  
;*                 from a memory bitmap directly (ROP_SRCCOPY) to the       
;*                 screen.                                                  
;*                                                                          
;*                 
;*                 
;*
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        ALIGN   4

OEMStretchBlt   PROC SYSCALL USES edi esi, pddcDst :DWORD,
                                        xDst    :DWORD,
                                        yDst    :DWORD,
                                        psdSrc  :DWORD,
                                        spacer  :DWORD,
                                        lpParm  :DWORD,
                                        cxExt   :DWORD,
                                        cyExt   :DWORD  
                LOCAL   xSrcOrg         :DWORD    
                LOCAL   ySrcOrg                  :DWORD
                LOCAL   cxSrcExt                 :DWORD
                LOCAL   cySrcExt                 :DWORD
                LOCAL   xDstOrg                  :DWORD
                LOCAL   yDstOrg                  :DWORD
                LOCAL   cxDstExt                 :DWORD
                LOCAL   cyDstExt                 :DWORD
                LOCAL   cyErrorTerm     :DWORD
                LOCAL   lpBits          :DWORD
                LOCAL   selFirstSeg     :DWORD
                LOCAL   npSrcLastScan   :DWORD
                LOCAL   lpScreen        :DWORD
                LOCAL   pfnXDDAScan     :DWORD
                LOCAL   pfnCopyScan     :DWORD
                LOCAL   cbSrcPlane      :DWORD
                LOCAL   cbSrcScan       :DWORD
                LOCAL   cbDstScan       :DWORD
                LOCAL   cxSrcVisPel     :DWORD
                LOCAL   cbDstMidByte    :DWORD
                LOCAL   XSI             :DWORD
                LOCAL   XDI             :DWORD
                LOCAL   XDX             :DWORD
                LOCAL   cxDstFirstShft  :BYTE
                LOCAL   cxDstLastShft   :BYTE
                LOCAL   bDstFirstMask   :BYTE
                LOCAL   bDstLastMask    :BYTE
                LOCAL   fbStBlt         :BYTE
                LOCAL   bForeClr        :BYTE
                LOCAL   bBackClr        :BYTE

        DebugMsg <OEMStrectchBlt STRCHBLT CLIFFL>

        cld
        xor     eax,eax
        mov     fbStBlt,al              ; initialize the flag


;/*
;** Get the source/destination bltting area's vertical/horizontal extents
;*/

        mov     esi,lpParm               ; ptr to stretchblt parameters

        ASSUME  esi:PTR STBLT

        mov     edi,[esi].stb_lpRectl  ; bltting area

        ASSUME  edi:PTR BITBLTPARAMETERS

        mov     eax,[edi].bbp_rclSrc.RECTL.rcl_xLeft
        mov     xSrcOrg,eax
        mov     ebx,[edi].bbp_rclSrc.RECTL.rcl_xRight
        sub     ebx,eax                   ; BX = cxSrcExt = SrcRight - SrcLeft
        mov     eax,[edi].bbp_rclSrc.RECTL.rcl_yBottom
        mov     ySrcOrg,eax
        mov     ecx,[edi].bbp_rclSrc.RECTL.rcl_yTop
        sub     ecx,eax                   ; CX = cySrcExt = SrcTop - SrcBottom

        mov     eax,[edi].bbp_rclTarg.RECTL.rcl_xLeft
        mov     xDstOrg,eax
        mov     edx,[edi].bbp_rclTarg.RECTL.rcl_xRight
        sub     edx,eax                   ; DX = cxDstExt = DstRight - DstLeft
        mov     eax,[edi].bbp_rclTarg.RECTL.rcl_yBottom
        mov     yDstOrg,eax
        mov     eax,[edi].bbp_rclTarg.RECTL.rcl_yTop
        sub     eax,yDstOrg              ; AX = cyDstExt = DstTop - DstBottom

osb_have_extent::

        assert  eax,G,0

        assert  ebx,G,0

        assert  ecx,G,0

        assert  edx,G,0

        mov     cyDstExt,eax
        mov     cxSrcExt,ebx
        mov     cySrcExt,ecx
        mov     cxDstExt,edx

;/*
;** Calculate the initial X/Y error terms and the horizontal DDA Runlength.
;** BX = cxSrcExt, DX = cxDstExt
;** CX = cySrcExt, AX = cyDstExt
;*/

osb_test_yshrink::

        cmp     ecx,eax
        jbe     osb_calc_yerr
        or      fbStBlt,OSB_Y_SHRINK    ;flag if we are vertically compressing
        xchg    ecx,eax

osb_calc_yerr::

        shr     ecx,1                    ;Initial error = MajExt - MinExt/2
        sub     eax,ecx
        mov     cyErrorTerm,eax

osb_test_xshrink::

        mov     ecx,OFFSET SMajorDDARun
        cmp     ebx,edx
        jae     osb_calc_xerr
        xchg    ebx,edx
        mov     ecx,OFFSET DMajorDDARun

osb_calc_xerr::

        shr     edx,1                    ;Initial error = MajExt - MinExt/2
        sub     ebx,edx                   ;BX = initial X error term

;/*
;** Before we calculate the DDA run, we need to transform the starting
;** coordinates of the bltting area from device to screen coordinate.
;*/

osb_xform_origin::

        add     esi,OFFSET STBLT.stb_xSrcDel    ;GS:SI => STBLT.stb_xSrcDel
        lodsd   
        add     xSrcOrg,eax
        lodsd
        add     ySrcOrg,eax
        lodsd
        add     xDstOrg,eax
        lodsd
        add     yDstOrg,eax

osb_calc_xdda::

        lea     edi,asDDARun             ;ES:DI => XDDA run array
        call    ecx                      ;calculate the DDA run

;/*
;** Calculate the offset to the start byte in the source bitmap that corresponds
;** to the origin of the clipped source bltting area and the offset to the first
;** pel
;*/

osb_calc_src_offset::

        mov     esi,psdSrc               ;the source surface

        ASSUME  esi:PTR SURFACE

        mov     eax,[esi].sd_pBits    ;the source bitmap
        mov     lpBits,eax

        mov     ebx,[esi].sd_dScan     ;the source scansize
        mov     cbSrcScan,ebx
        mov     eax,[esi].sd_cbScan
        mov     cbSrcPlane,eax           ;the source plane size (mono unused)
        mov     eax,[esi].sd_cy
        sub     eax,ySrcOrg
        dec     eax                      ;flip to HW coordinate

        mul     ebx                      ;AX = offset to start of the scan

        assert  edx,E,0

        add     lpBits,eax
        mov     eax,cxSrcVisPel          ;AX = offset to 1st visible src pel
        shr     eax,4
        shl     eax,1                    ;make it word offset
        add     lpBits,eax
        add     npSrcLastScan,eax        ;start from this byte in last scan
        and     cxSrcVisPel,15          ;offset of the 1st pel within word

;/*
;** We now need to decide which routine to use to blt each src plane, that is,
;** to use routines for color or monochrome source.
;*/

osb_check_src_color::

        mov     eax,OFFSET XFerColorScan   ;assume color source
        test    [esi].sd_fb,SD_COLOR
        mov     esi,pddcDst                    ;ES:SI => the dest. ddc
        jnz     osb_have_routine        ;check if source is color

;/*
;**  We have a monochrome source, therefore we need to know the destination's
;**  image foreground and background colors for color mapping.
;*/

        ASSUME  esi:PTR DDC

        movzx   eax,[esi].ddc_ia.ia_ba.ba_ipc
        mov     ah,BYTE PTR [esi].ddc_ia.ia_ba.ba_ipcBack.ipc_bClr
        mov     bForeClr,al
        mov     bBackClr,ah
        mov     eax,OFFSET XFerMonoScan

osb_have_routine::

        mov     pfnXDDAScan,eax          ;get the right routine
        mov     pfnCopyScan,eax          ;we cannot copy screen-screen until a
                                        ;scan is drawn

;/*
;** lpBits now points to the word that contains the 1st visible src pel      
;** now calculate the address of the start byte in the screen blt area.     
;*/

osb_calc_dst_offset::

        mov     esi,[esi].ddc_npsd     ; ES:SI => the dest. surface

        ASSUME  esi:PTR SURFACE

        mov     cbDstScan,SCREEN_DSCAN  ; destination scan width in bytes
        mov     eax,[esi].sd_cy        ; flip screen's y-coordinate to the
        sub     eax,yDst                 ; hardware convention
        dec     eax                      ; make extents inclusive
        mov     edi,eax                   ; save it for cursor exclusion
        mul     cbDstScan                        ; no of bytes in a scan

        assert  edx,E,0

        mov     ebx,xDst                          ; get the x origin
        shr     ebx,3                             ; get the no of bytes
        add     eax,ebx                           ; this is the start offset 
        add     eax,[esi].sd_pBits               ; the screen segment
        mov     lpScreen,eax

        call    ComputeInterval         ; compute interval parameters

;/*
;** Cursor Exclusion for a device destination
;*/

osb_cursor_exclude::

        mov     esi,cxExt
        dec     esi                               ;Make extents inclusive of last point
        mov     ecx,xDst                          ;Set left
        add     esi,ecx                           ;Set right

        mov     edx,cyExt
        neg     edx
        add     edx,edi                   ;DI = top, DX = bottom
        inc     edx                      ;Make DX exclusive

        INVOKE  far_exclude                               ;Exclude the area from the screen

;/*
;** Setup pointers to all the essential data structures.
;*/

osb_set_blt_param::

        lea     esi,asPlaneBuff          ;GS:SI => Plane Buffer
        mov     XSI,esi                  ;put it in the high word
        mov     esi,lpBits               ;the starting pel to blt
        mov     edi,lpScreen             ;the starting pel on screen
        mov     ebx,cyErrorTerm          ;initial y error term

;/*
;** Now we have all the essential information to perform stretchblt. So start
;** munging the bits. Do the DDA in the y direction. For each scan, call
;** XFerOneScan to do the DDA in the X direction.
;*/

osb_start_blt::

        test    fbStBlt,OSB_Y_SHRINK    ;we treat stretching and compressing
        jnz     osb_y_shrink      ;seperately

;/*
;** We are stretching the bitmap vertically. This means, in terms of Y
;** coordinate, destination is the major axis and source is minor. therefore,
;** every source scan will be output to screen. Note that since we may have
;** some destination scan clipped off, we will let the XDDA function decide
;** whether it is going to processthat scan.
;*/

osb_stretch_next_scan::

        call    [pfnXDDAScan]           ;walk along the current src scan
        jz      osb_exit                ;ZF set if all dest scans are processed

osb_stretch_scan_done::

        sub     ebx,cySrcExt             ;subtract minor axis extent
        jge     osb_stretch_copy_scan    ;are we repeating the same scan?
        add     ebx,cyDstExt             ;no! prepare to walk the next scan
        sub     esi,cbSrcScan            ;proceed to the next scan
        jmp     osb_stretch_next_scan

        ALIGN   4



osb_stretch_copy_scan::

        call    [pfnCopyScan]           ;copy the previous scan
        jnz     osb_stretch_scan_done
        jmp     osb_exit

        ALIGN   4


;/*
;** We are compressing the bitmap vertically. This means, in terms of Y
;** coordinate, source is the major axis and destination is minor. therefore,
;** some source scans will be ignored. Note that since we may have
;** some destination scan clipped off, we will let the XDDA function decide
;** whether it is going to processthat scan.
;*/


osb_y_shrink::

        assert  ebx,NE,0                 ;BX = YErrorTerm should not be 0

        call    [pfnXDDAScan]           ;walk along the current src scan
        jz      osb_exit          ;ZF set if all dest scans are processed

osb_elim_next_scan::

        sub     esi,cbSrcScan

osb_elim_this_scan::

        sub     ebx,cyDstExt             ;subtract minor axis extent
        jge     osb_elim_next_scan
        add     ebx,cySrcExt
        call    [pfnXDDAScan]
        jnz     osb_elim_next_scan


;/*
;** now reset the EGA/VGA parameters and bring back the cursor
;*/

osb_exit::

        call    clean_up_before_exit_no_test; restore EGA registers

        INVOKE  far_unexclude                     ; re-draw the cursor

osb_end::

        RET

        OPTION  PROLOGUE:None
        OPTION  EPILOGUE:None


;/***************************************************************************
;*
;* PUBLIC ROUTINE  SMajorDDARun
;*
;* DESCRIPTION   = This routine calculates a source major DDA run.  the run is   
;*                 the sequence of positive number, Ri, representing the interval
;*                 of the source pel for the current, ith, destination pel from  
;*                 the previous destination pel.                                 
;*
;*                 Registers Destroyed:                                                                      
;*                       AX,CX,DX,SI,flags                                                                   
;*
;* INPUT         = BX = X error term                     
;*                 ES:DI = address of the DDA run record 
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


        ALIGN   4

SMajorDDARun::

;/*
;** First, DDA walk until we find the first source pel corresponds to the
;** first visible pel on the destination.
;*/

        mov     eax,xSrcOrg
        mov     ecx,xDstOrg
        mov     edx,cxSrcExt
        mov     esi,cxDstExt
        cmp     ecx,xDst                 ;enter the visible area?
        je      sa_have_first_src

ifdef   FIREWALLS

        jl      sa_find_first_src

        rip     text,<OEMStretchBlt - xincorrect dst starting coordinates>

endif;  FIREWALLS
;/*
;** In the DDA loop:
;** AX = source x, CX = destination x, BX = error term
;** DX = major axis extent, SI = minor axis extent
;*/

sa_find_first_src::

        inc     eax                      ;step major axis
        sub     ebx,esi                   ;subtract minor axis extent
        jge     sa_find_first_src ;step minor axis?
        add     ebx,edx                   ;yes, add major extent
        inc     ecx                      ;step minor axis
        cmp     ecx,xDst                 ;enter the visible area?
        jl      sa_find_first_src
;/*
;** We now have the offset to the first visible source pel from the unclipped
;** origin, so remember this for address calculation.
;*/

sa_have_first_src::

        mov     cxSrcVisPel,eax          ;offset to the 1st visible pel
        mov     ecx,cxExt

        assert  ecx,G,0

        dec     ecx                      ;we need this # of dst pels
        jz      sa_exit
        xor     eax,eax
;/*
;** In the DDA loop:
;** AX = interval from the last visible source pel
;** CX = # visible dst pels left
;** BX = Error term
;** DX = major axis extent, SI = minor axis extent
;*/

sa_record_run::

        inc     eax                      ;step major axis
        sub     ebx,esi                  ;subtract minor axis extent
        jge     sa_record_run            ;step minor axis?
        add     ebx,edx                  ;yes, add major extent
        stosd                            ;AX = interval between visible pels
        xor     eax,eax                  ;reset the interval
        loop    sa_record_run

sa_exit:

        ret


;/***************************************************************************
;*
;* PUBLIC ROUTINE  DMajorDDARun    
;*
;* DESCRIPTION   = This routine calculates a source major DDA run.  the run is 
;*                 the sequence of positive number, Ri, representing the       
;*                 interval of the source pel for the current, ith, destination
;*                 pel from the previous destination pel.                      
;*
;*                 Registers Destroyed:                                                                   
;*                       AX,CX,DX,SI,flags                                                                
;*
;* INPUT         = BX = X error term                    
;*                 ES:DI = address of the DDA run record
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


        ALIGN   4

DMajorDDARun::

;/*
;** First, DDA walk until we find the first source pel corresponds to the
;** first visible pel on the destination.
;*/

        mov     eax,xSrcOrg
        mov     ecx,xDstOrg
        mov     edx,cxSrcExt
        mov     esi,cxDstExt

ifdef   FIREWALLS

        cmp     ecx,xDst                 ;enter the visible area?
        jle     da_find_first_src

        rip     text,<OEMStretchBlt - yincorrect dst starting coordinates>

endif;  FIREWALLS

;/*
;** In the DDA loop:
;** AX = source x, CX = destination x, BX = error term
;** DX = minor axis extent, SI = major axis extent
;*/

da_find_first_src::

        cmp     ecx,xDst                 ;enter the visible area?
        je      da_have_first_src
        inc     ecx                      ;step major axis
        sub     ebx,edx                   ;subtract minor axis extent
        jge     da_find_first_src ;step minor axis?
        add     ebx,esi                   ;yes, add major extent
        inc     eax                      ;step minor axis
        jmp     da_find_first_src

        ALIGN   4

;/*
;** We now have the offset to the first visible source pel from the unclipped
;** origin, so remember this for address calculation.
;*/

da_have_first_src::

        mov     cxSrcVisPel,eax          ;offset to the 1st visible pel
        mov     ecx,cxExt                ;we need this # of dst pels
        xor     eax,eax
;/*
;** In the DDA loop:
;** AX = interval from the last visible source pel
;** CX = # visible dst pels left
;** BX = Error term
;** DX = minor axis extent, SI = major axis extent
;*/

        assert  ecx,G,0

da_record_run::

        dec     ecx                      ;step major axis
        jz      da_exit

da_dup_src_pel::

        stosd                           ;assume using the current src pel
        sub     ebx,edx                 ;subtract minor axis extent
        jge     da_record_run           ;step minor axis?
        add     ebx,esi                 ;yes, add major extent
        inc     DWORD PTR [edi][-4]     ;correct assumption--move to next src pel
        loop    da_dup_src_pel

da_exit:

        ret


;/***************************************************************************
;*
;* PUBLIC ROUTINE  ComputeInterval 
;*
;* DESCRIPTION   = The bltting interval parameters--first/last byte        
;*                 offset/masks and middle bytes count--will be computed.  
;*                 A first mask and a last mask will be calculated, and    
;*                 possibly combined into the inner loop count.  If no     
;*                 first byte exists, the start address will be incremented
;*                 by the size (byte/word) to adjust for it.               
;*                                                                         
;*                 Registers Preserved:                                                                   
;*                       DI,DS,ES,,BP                                                                     
;*                 Registers Destroyed:                                                                   
;*                       AX,BX,CX,DX,SI,FLAGS                                                             
;*                                                                         
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

PSL_BYTE_FETCH  equ     0703h                     ;mask for modulus / shift count


        ALIGN   4

ComputeInterval::

        mov     edx,xDst                 ;the first visible destination pel
        mov     ebx,cxExt                ;the bltting x-extent
        dec     ebx                      ;make interval inclusive
        mov     ecx,PSL_BYTE_FETCH       ;Get mask/shift counts

;/*
;** We now have to determine how many bits will be affected,
;** and how they are aligned within the bytes.
;**
;** (left_x MOD byte_size) will give us the starting pixel
;** within the left byte.  Adding the inclusive extent
;** of the interval to left_x MOD byte_size and taking the
;** result MOD byte_size will give us the last pixel affected
;** in the last byte.     These pixel indexes (0:7 for bytes)
;** can be used to create the first and last altered bits mask.
;**
;**
;** To compute the number of bytes in the inner loop,
;** use the second calculation above
;**
;**       (left_x MOD byte_size) + inclusive_extent
;**
;** and divide it by the byte size (8).  This gives you
;** the following:
;**
;**
;**           1)  If the result is 0, then only one destination
;**               byte is being altered.  In this case, the
;**               start & ending masks should be ANDed together,
;**               the innerloop count set to zero, and last_mask
;**               set to to all 0's (don't alter any bits).
;**
;**                       |      x x x x x|         |
;**                       |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                        0 1 2 3 4 5 6 7
;**
;**                       start MOD 8 = 3,  extent-1 = 4
;**                       3+7 DIV 8 = 0, only altering one byte
;**
;**
;**
;**           2)  If the result is 1, then only two bytes
;**               will be altered.  In this case, the start and
;**               ending masks are valid, and all that needs to
;**               be done is set the innerloop count to 0.
;**
;**                       |  x x x x x x x|x x x x x x x|
;**                       |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                        0 1 2 3 4 5 6 7
;**
;**                       start MOD 8 = 1,  extent-1 = 14
;**                       3+14 DIV 8 = 1.  There is a first and last
;**                       byte but no innerloop count
;**
;**
;**
;**           3)  If the result is > 1, then some number of entire
;**               bytes will be altered by the innerloop.  In
;**               this case the number of innerloop bytes will
;**               be the result - 1.
;**
;**                       |                               x|x x x x x x x x|x
;**                       |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                        0 1 2 3 4 5 6 7
;**
;**                       start MOD 8 = 7,  extent-1 = 9
;**                       7+9  DIV 8 = 2.  There is a first and last
;**                       byte and an innerloop count of 1 (result - 1)
;**
;*/

;/*
;** Compute the starting bit position on the left and the ending
;** bit position on the right
;*/


        and     dl,ch                             ;Compute bit index for left side
        movzx   edx,dl
        add     ebx,edx                           ;Compute bit index for right side
        mov     esi,ebx                           ;(save for inner loop count)
        and     bl,ch
;/*
;** Turn the bit index within the byte into a mask.
;** We are basically performing a table lookup into the two
;** tables bit_mask_tbl_right and bit_mask_tbl_left.  However,
;** by doing either arithmetic shifts or logical shifts, we
;** can create the masks and save a lot of table space.
;*/


        mov     ch,cl                             ;Save  byte shift count
        mov     cl,dl                             ;Compute left side altered bits mask
        xor     eax,eax
        dec     eax                               ;AX = 0FFFFh
        mov     edx,eax                           ;Need this here later
        shr     al,cl                             ;Compute right side altered bits mask
        mov     cl,bl
        mov     bl,80h
        sar     bl,cl
        mov     cl,ch                             ;Restore byte shift count
        shr     esi,cl                            ;Compute inner byte count
        jnz     ci_not_one_byte                 ;loop count + 1 > 0, check it out

;/*
;** Only one byte will be affected.  Combine the first and
;** last byte masks, and set the loop count to 0.
;*/


        and     al,bl                             ;AL = left, BL = right
        xor     ebx,ebx                           ;Want the entire mask to be 0
        inc     esi                               ;Fall through to set 0

ci_not_one_byte:

        dec     esi                               ;Dec inner loop count (might become 0)
;/*
;** If all pixels in the first byte are altered, combine the
;** first byte into the inner loop and clear the first byte
;** mask.  Ditto for the last byte
;*/

        cmp     al,dl                             ;Set 'C' if not all pixels 1
        sbb     esi,edx                           ;If no 'C', sub -1 (add 1), else sub 0
        cmp     al,dl                             ;Set 'C' if not all pixels 1
        sbb     al,dl                             ;If no 'C', sub -1 (add 1), else sub 0

        cmp     bl,dl                             ;Set 'C' if not all pixels 1
        sbb     esi,edx                           ;If no 'C', sub -1 (add 1), else sub 0
        cmp     bl,dl                             ;Set 'C' if not all pixels 1
        sbb     bl,dl                             ;If no 'C', sub -1 (add 1), else sub 0

;/*
;** Save the first and last byte masks and the loop count.
;** If the background mix mode is LeaveAlone, AND the masks
;** with the transparency mask now to save a little time in
;** the loop for multiple passes.  If the background mode is
;** OverPaint, then we need the original clip masks as well
;** as the transparency mask, in case the clipped bytes contain
;** only background pixels to output.
;**
;** If the first byte/word mask is not zero before ANDing
;** in the transparency mask, and becomes zero after ANDing
;** in the transparency mask, then the starting address needs
;** to be updated by the size (byte/word)
;*/

ci_interval_save:

        mov     cbDstMidByte,esi         ;the inner loop count
        mov     bDstFirstMask,al        ;the first byte mask
        movzx   eax,al
        dec     ah                      ;prevent wasting time with 0 mask
        bsf     cx,ax                   ;CX <= index of the first mask bit from
        mov     cxDstFirstShft,cl       ;the right end
        mov     bDstLastMask,bl         ;the last byte mask
        movzx   ebx,bl
        dec     bh                      ;prevent wasting time with 0 mask
        bsf     cx,bx                   ;CX <= index of the first mask bit
        mov     cxDstLastShft,cl

ci_exit:

        ret


;/***************************************************************************
;*
;* PUBLIC ROUTINE  XFerColorScan 
;*
;* DESCRIPTION   = This function performs the stretching/compressing blt     
;*                 within one scan, using the information from the           
;*                 pre-calculated DDA run record.                            
;*                 
;*                 Registers Preserved:          
;*                       EBX,SI,DS,ES,GS         
;*                 Registers Destroyed:          
;*                       AX,CX,DX,flags          
;*
;* INPUT         = DS:SI = the 1st visible source WORD     
;*                 ES:DI = the 1st visible destination byte
;*                 GS:[ESI.hi] = bitmap buffer             
;* OUTPUT        = ES:DI = start of the next destination scan         
;*                 ZF      set if no more destination scan to output. 
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


        ALIGN   4

XFerColorScan::

;/*
;** Check if the current destination scan is clipped off. If so, we can simply
;** ignore this source scan
;*/

        mov     eax,yDstOrg                  ; the current dest. scan
        inc     yDstOrg                      ; update to the next scan
        cmp     eax,yDst                     ; if smaller than 1st visible scan
        jge     @F                           ; then ignore this source scan
        ret
@@:

;/*
;** After we found the first scan the blt, we do not need the testing above
;** anymore. So the following line will be the entry next time it is called.
;*/

        mov     pfnXDDAScan,OFFSET XFerColorScan_Notest
        mov     pfnCopyScan,OFFSET CopyPrevScan


XFerColorScan_Notest::

;/*
;** Initialize transfer loop
;*/

        push    esi                          ; ESI = ptrs to src and plane buffer
        push    ebx                          ; BX  = cyErrorTerm
        push    XSI                             
        lea     ebx,asDDARun                 ; GS:BX => DDA run records

;/*
;** Transfer one plane at a time
;*/

xcs_c0:
        mov     edx,EGA_BASE+SEQ_DATA        ; plane selection register
        mov     al,MM_C0
        out     dx,al
        mov     dl,GRAF_ADDR                ; plane selection register
        mov     ax,(RM_C0 shl 8)+GRAF_READ_MAP
        out     dx,ax
        call    XFerOneColorPlane           ; blt C0 plane
        xchg    XSI,esi                     ; DS:SI => plane buffer

xcs_c1:
        mov     dx,EGA_BASE+SEQ_DATA        ; plane selection register
        mov     al,MM_C1
        out     dx,al
        mov     dl,GRAF_ADDR                ; plane selection register
        mov     ax,(RM_C1 shl 8)+GRAF_READ_MAP
        out     dx,ax
        call    CopyColorFromBuffer         ; blt C1 plane

xcs_c2:
        mov     dx,EGA_BASE+SEQ_DATA        ; plane selection register
        mov     al,MM_C2
        out     dx,al
        mov     dl,GRAF_ADDR                ; plane selection register
        mov     ax,(RM_C2 shl 8)+GRAF_READ_MAP
        out     dx,ax
        call    CopyColorFromBuffer         ; blt C2 plane

xcs_c3:
        mov     dx,EGA_BASE+SEQ_DATA        ; plane selection register
        mov     al,MM_C3
        out     dx,al
        mov     dl,GRAF_ADDR                ; plane selection register
        mov     ax,(RM_C3 shl 8)+GRAF_READ_MAP
        out     dx,ax
        call    CopyColorFromBuffer         ; blt C3 plane

xcs_update_scan:

        pop     XSI
        pop     ebx
        pop     esi                         ; restore src + buffer ptr
        sub     edi,cbDstScan               ; proceed to the next dst scan
        dec     cyExt                       ; one scan was processed

xcs_exit:

        ret

XOP_BYTE_SIZE           equ     8           ; # of bits in one byte
XOP_MOD_WORD_SIZE       equ     15          ; modulo # of bits in one word
XOP_INNER_LOOP_COUNT    equ     8           ; # of inner loop count


;/***************************************************************************
;*
;* PUBLIC ROUTINE  XFerOneColorPlane 
;*
;* DESCRIPTION   = This function performs the stretching/compressing blt    
;*                 within each plane, using the information from the        
;*                 pre-calculated DDA run record.  The processed byte for   
;*                 plane 0 are written directly to screen (plane 0), and the
;*                 rest are stored in buffer.                               
;*                                                                          
;*                 Registers Preserved:                      
;*                       ESI,EDI,DS,ES,GS                    
;*                 Registers Destroyed:                      
;*                       AX,BX,CX,DX,flags                   
;*
;* INPUT         = DS:SI = the 1st visible source WORD            
;*                 ES:DI = the 1st visible destination byte       
;*                 GS:[ESI.hi] = bitmap buffer                    
;*                 GS:[BX]     = DDA run record                   
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


        ALIGN   4

XFerOneColorPlane::

        push    esi
        push    edi
        push    XSI
        mov     ch,byte ptr cxSrcVisPel
        mov     edx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ch                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE

;/*
;** Start from the first destination byte
;*/

xocp_xfer_first_byte::

        movzx   eax,al
        or      ah,bDstFirstMask            ; the 1st visible dest. byte mask
        jz      xocp_xfer_full_byte
        mov     cl,cxDstFirstShft
        inc     cl                          ; we will always do the first pel
        shr     ah,cl                       ; shift mask to the right end
        mov     cl,ah
        xchg    edi,XDI                     ; save DI in its high word
        xor     eax,eax

xocp_xfer_first_loop::

        push    esi
        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 0
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 1
        add     esi,cbSrcPlane

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 1
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 2
        add     esi,cbSrcPlane

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 2
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 3
        add     esi,cbSrcPlane

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 3
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; back to plane 0
        pop     esi

        and     ch,XOP_MOD_WORD_SIZE        ; AH = offset from MSB
        mov     dl,ch
        add     edx,DWORD PTR [ebx]         ; add interval to the next offset
        add     ebx,4
        mov     ch,dl                       ; save the word offset
        shr     edx,4                       ; offset to the next src word
        shl     edx,1
        add     esi,edx                     ; advance src pointer
        mov     edx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ch                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE
        shr     cl,1                        ; anymore src pel for 1st byte
        jc      xocp_xfer_first_loop

        xchg    XDI,edi                     ; restore DI
        mov     cl,cxDstFirstShft
        shl     eax,cl                      ; shift for the masking right end

        mov     cl,bDstFirstMask
        not     cl                          ; CL = the first byte mask
        and     cl,BYTE PTR [edi]                    ; get the unaltered dest. pel
        or      al,cl
        stosb                               ; write to the destination
        xchg    esi,XSI                     ; get GS:SI => Plane 1 Buffer
        push    esi                         ; remember this address
        mov     BYTE PTR [esi],ah                  ; plane 1 byte in AH
        add     esi,SCREEN_DSCAN             ; GS:SI => Plane 2 Buffer
        ror     eax,16
        mov     BYTE PTR [esi],al                  ; plane 1 byte in AH
        add     esi,SCREEN_DSCAN             ; GS:SI => Plane 3 Buffer
        mov     BYTE PTR [esi],ah
        pop     esi                          ; get GS:SI => Plane 1 Buffer
        inc     esi                          ; go to next byte
        xchg    XSI,esi                      ; DS:SI => source bitmap
;/*
;** Transfer a sequence of full middle bytes. (The inner loop has been unwound
;** to optimize the execution speed and codesize.)
;*/

xocp_xfer_full_byte::

        mov     ah,ch
        mov     ecx,cbDstMidByte             ; number of full middle bytes
        or      ecx,ecx
        jz      xocp_xfer_last_byte

xocp_xfer_outer_mid_loop::

        push    ecx
        mov     ch,ah                       ; AH = offset from MSB
        mov     cl,XOP_INNER_LOOP_COUNT     ; need this # of bits before output
        xchg    edi,XDI                     ; save DI in its high word

xocp_xfer_inner_mid_loop::

        push    esi
        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 0
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 1
        add     esi,cbSrcPlane

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 1
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 2
        add     esi,cbSrcPlane

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 2
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 3
        add     esi,cbSrcPlane

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 3
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; back to plane 0
        pop     esi

        and     ch,XOP_MOD_WORD_SIZE        ; CH = offset from MSB
        mov     dl,ch
        add     edx,DWORD PTR [ebx]         ; add interval to the next offset
        add     ebx,4
        mov     ch,dl                       ; save the word offset
        shr     edx,4                       ; offset to the next src word
        shl     edx,1
        add     esi,edx                      ; advance src pointer
        mov     edx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ch                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE

        dec     cl
        jnz     xocp_xfer_inner_mid_loop

        xchg    edi,XDI                     ; restore DI
        stosb                               ; write to the destination
        xchg    esi,XSI                     ; get GS:SI => Plane 1 Buffer
        push    esi                         ; remember this address
        mov     BYTE PTR [esi],ah           ; plane 1 byte in AH
        add     esi,SCREEN_DSCAN            ; GS:SI => Plane 2 Buffer
        ror     eax,16
        mov     BYTE PTR [esi],al                  ; plane 1 byte in AH
        add     esi,SCREEN_DSCAN            ; GS:SI => Plane 3 Buffer
        mov     BYTE PTR [esi],ah
        pop     esi                         ; get GS:SI => Plane 1 Buffer
        inc     esi                         ; go to next byte
        xchg    esi,XSI                     ; DS:SI => source bitmap

        mov     ah,ch                       ; AH = offset from MSB
        pop     ecx                         ; CX = inner loop count
        dec     ecx
        jnz     xocp_xfer_outer_mid_loop
        mov     ch,ah

;/*
;** Transfer the partial byte left to the last destination byte
;*/

xocp_xfer_last_byte::

        xor     cl,cl
        or      cl,bDstLastMask             ; the last byte mask
        jz      xocp_exit
        shl     cl,1                        ; the first src pel
        xor     eax,eax
        xchg    edi,XDI                     ; save DI in its high word

xocp_xfer_last_loop::

        push    esi
        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 0
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 1
        add     esi,cbSrcPlane

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 1
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 2
        add     esi,cbSrcPlane

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 2
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 3
        add     esi,cbSrcPlane

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel in plane 3
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; back to plane 0
        pop     esi

        and     ch,XOP_MOD_WORD_SIZE        ; AH = offset from MSB
        mov     dl,ch
        add     edx,DWORD PTR [ebx]                  ; add interval to the next offset
        add     ebx,4
        mov     ch,dl                       ; save the word offset
        shr     edx,4                       ; offset to the next src word
        shl     edx,1
        add     esi,edx                     ; advance src pointer
        mov     edx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ch                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE
        shl     cl,1                        ; anymore src pel for last byte
        jc      xocp_xfer_last_loop

        xchg    edi,XDI                     ; restore DI
        mov     cl,cxDstLastShft
        shl     eax,cl                      ; shift for the masking right end

        mov     cl,bDstLastMask
        not     cl                          ; CL = the first byte mask
        and     cl,BYTE PTR [edi]           ; get the unaltered dest. pel
        or      al,cl
        stosb                               ; write to the destination
        xchg    esi,XSI                     ; get GS:SI => Plane 1 Buffer
        mov     BYTE PTR [esi],ah           ; plane 1 byte in AH
        add     esi,SCREEN_DSCAN            ; GS:SI => Plane 2 Buffer
        ror     eax,16
        mov     BYTE PTR [esi],al            ; plane 1 byte in AH
        add     esi,SCREEN_DSCAN             ; GS:SI => Plane 3 Buffer
        mov     BYTE PTR [esi],ah

xocp_exit:

        pop     XSI
        pop     edi
        pop     esi

        ret


;/***************************************************************************
;*
;* PUBLIC ROUTINE  CopyColorFromBuffer 
;*
;* DESCRIPTION   = This function copies the contents of one plane from the     
;*                 bitmap buffer onto the same plane of the screen.            
;*
;*                 Registers Preserved:                                                              
;*                       BX,DI,DS,ES,GS                                                              
;*                 Registers Destroyed:  
;*                       AX,CX,DX,flags  
;*
;* INPUT         = DS:SI = the bitmap buffer of the current plane 
;*                 ES:DI = the 1st visible destination byte       
;*                 
;*                 
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


        ALIGN   4

CopyColorFromBuffer::

        push    esi
        push    edi

;/*
;** Copy the partial first byte
;*/

cpb_first_byte:

        movzx   ecx,bDstFirstMask
        jecxz    cpb_copy_full_byte
        lodsb
        not     cl
        and     cl,BYTE PTR [edi]
        or      al,cl
        stosb

;/*
;** Copy a sequence of full middle bytes
;*/

cpb_copy_full_byte:

        mov     ecx,cbDstMidByte             ; number of full middle bytes
        mov     edx,ecx
        shr     ecx,2
        rep     movsd
        mov     ecx,edx
        and     ecx,3
        rep     movsb

;/*
;** Copy the partial byte left to the last destination byte
;*/

cpb_copy_last_byte:

        mov     cl,bDstLastMask
        jecxz    cpb_exit
        lodsb
        not     cl
        and     cl,BYTE PTR [edi]
        or      al,cl
        stosb

cpb_exit:

        pop     edi
        pop     esi
        add     esi,SCREEN_DSCAN

        ret


;/***************************************************************************
;*
;* PUBLIC ROUTINE   XFerMonoScan  
;*
;* DESCRIPTION   = This function performs the stretching/compressing blt     
;*                 within one scan, using the information from the           
;*                 pre-calculated DDA run record.                            
;*                 
;*                 Registers Preserved:                                                                     
;*                       EBX,SI,DS,ES,GS                                                                    
;*                 Registers Destroyed:           
;*                       AX,CX,DX,flags           
;*                 
;* INPUT         = DS:SI = the 1st visible source WORD        
;*                 ES:DI = the 1st visible destination byte   
;* OUTPUT        = ES:DI = start of the next destination scan         
;*                 ZF      set if no more destination scan to output. 
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


        ALIGN   4

XFerMonoScan::

;/*
;** Check if the current destination scan is clipped off. If so, we can simply
;** ignore this source scan
;*/

        mov     eax,yDstOrg                  ; the current dest. scan
        inc     yDstOrg                     ; update to the next scan
        cmp     eax,yDst                     ; if smaller than 1st visible scan
        jge     @F                    ; then ignore this source scan
        ret
@@:
;/*
;** After we found the first scan the blt, we do not need the testing above
;** anymore. So the following line will be the entry next time it is called.
;*/

        mov     pfnXDDAScan,OFFSET XFerMonoScan_Notest
        mov     pfnCopyScan,OFFSET CopyPrevScan

XFerMonoScan_Notest::

;/*
;** Initialize transfer loop
;*/

        push    esi                          ; SI = ptrs to src
        push    ebx                          ; BX = cyErrorTerm
        lea     ebx,asDDARun                 ; GS:BX => DDA run records

;/*
;** Setup the EGA/VGA registers to handle monochrome to color blt.
;*/

xms_setup::

        mov     edx,EGA_BASE+SEQ_DATA       ; plane selection register
        mov     al,MM_ALL                   ; select all plane
        out     dx,al
        mov     dl,GRAF_ADDR

;/*
;** Now, do the stretching/compressing blt directly to screen
;*/

        push    edi
        xchg    edx,XDX                     ; save EGA_BASE + GRAF_ADDR in hi-word
        mov     ah,byte ptr cxSrcVisPel
        mov     edx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ah                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE

;/*
;** Start from the first destination byte
;*/

xms_xfer_first_byte::

        xor     ecx,ecx
        or      ch,bDstFirstMask            ; the 1st visible dest. byte mask
        jz      xms_xfer_full_byte
        mov     cl,ch                       ; CL = fisrt byte mask
        xchg    ah,cl                       ; get bitmask in AH
        xchg    edx,XDX                     ; DX = EGA_BASE + GRAF_ADDR
        mov     al,GRAF_BIT_MASK            ; set first byte mask
        out     dx,ax
        call    xms_setup_HW_no_xor          ; setup color munging scheme
        mov     ah,cl                       ; AH = offset from MSB
        xchg    edx,XDX

        mov     cl,cxDstFirstShft
        shr     ch,cl                       ; shift mask to the right end
        shr     ch,1                        ; we will always do the first pel
        xor     al,al
        xchg    edi,XDI                     ; save DI in its high word

xms_xfer_first_loop::

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel
        adc     al,al                       ; use it for the next destination pel
        and     ah,XOP_MOD_WORD_SIZE        ; AH = offset from MSB
        mov     dl,ah
        add     edx,DWORD PTR [ebx]         ; add interval to the next offset
        add     ebx,4
        mov     ah,dl                       ; save the word offset
        shr     edx,4                       ; offset to the next src word
        shl     edx,1
        add     esi,edx                     ; advance src pointer
        mov     edx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ah                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE
        shr     ch,1                        ; anymore src pel for 1st byte
        jc      xms_xfer_first_loop
        xchg    edi,XDI                     ; restore DI
        shl     al,cl                       ; shift for the right-end masking
        stosb                               ; write to screen

;/*
;** Transfer a sequence of full middle bytes. (The inner loop has been unwound
;** to optimize the execution speed and codesize.)
;*/


xms_xfer_full_byte::

        mov     ecx,cbDstMidByte             ; number of full middle bytes
        jecxz    xms_xfer_last_byte
        mov     dh,ah                       ; save offset from MSB
        xchg    edx,XDX                     ; DX = EGA_BASE + GRAF_ADDR
        mov     ax,0FF00h + GRAF_BIT_MASK   ; no mask for entire byte
        out     dx,ax
        call    xms_setup_HW
        xchg    edx,XDX
        mov     ah,dh                       ; AH = offset from MSB
        movzx   edx,dl                       ; DH is always 0
        .errnz  XOP_MOD_WORD_SIZE and 0FF00h

xms_xfer_outer_mid_loop::

        push    ecx
        mov     ecx,((XOP_MOD_WORD_SIZE shl 8) + XOP_INNER_LOOP_COUNT)
        xchg    edi,XDI                      ; save DI in its high word

xms_xfer_inner_mid_loop::

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel
        adc     al,al                       ; use it for the next destination pel
        and     ah,ch                       ; AH = offset from MSB
        mov     dl,ah
        add     edx,DWORD PTR [ebx]         ; add interval to the next offset
        add     ebx,4
        mov     ah,dl                       ; save the word offset
        shr     edx,4                       ; offset to the next src word
        shl     edx,1
        add     esi,edx                     ; advance src pointer
        mov     edx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ah                       ; DX = offset from LSB
        and     dl,ch
        dec     cl                          ; the next bit ?
        jnz     xms_xfer_inner_mid_loop

        xchg    edi,XDI                     ; restore DI
        stosb                               ; write to the destination
        pop     ecx
        dec     ecx
        jnz     xms_xfer_outer_mid_loop

;/*
;** Transfer the partial byte left to the last destination byte
;*/

xms_xfer_last_byte::

        mov     ch,bDstLastMask             ; the last byte mask
        jecxz   xms_update_scan
        mov     cl,ch                       ; CL = the last byte mask
        xchg    ah,cl                       ; get bitmask in AH
        xchg    edx,XDX                     ; DX = EGA_BASE + GRAF_ADDR
        mov     al,GRAF_BIT_MASK            ; last byte mask
        out     dx,ax
        call    xms_setup_HW
        mov     ah,cl                       ; AH = offset from MSB
        xchg    edx,XDX
        shl     ch,1                        ; the first src pel
        xor     al,al
        xchg    edi,XDI                     ; save DI in its high word

xms_xfer_last_loop::

        mov     di,WORD PTR [esi]
        bt      di,dx                       ; get the next src pel
        adc     al,al                       ; use it for the next destination pel
        and     ah,XOP_MOD_WORD_SIZE        ; AH = offset from MSB
        mov     dl,ah
        add     edx,DWORD PTR [ebx]         ; add interval to the next offset
        add     ebx,4
        mov     ah,dl                       ; save the word offset
        shr     edx,4                       ; offset to the next src word
        shl     edx,1
        add     esi,edx                     ; advance src pointer
        mov     edx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ah                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE
        shl     ch,1                        ; anymore src pel for last byte
        jc      xms_xfer_last_loop
        xchg    edi,XDI                     ; restore DI

        mov     cl,cxDstLastShft
        shl     al,cl                       ; shift to the left end
        stosb                               ; write to destination

xms_update_scan::

        xchg    edx,XDX                     ; restore EGA_BASE + GRAF_ADDR in hi-word
        pop     edi

        mov     ax,(DR_SET shl 8) + GRAF_DATA_ROT
        out     dx,ax                       ; DX = EGA_BASE + GRAF_ADDR

        pop     ebx                          ; restore cyErrorTerm
        pop     esi                          ; restore src
        sub     edi,cbDstScan                ; proceed to the next dst scan
        dec     cyExt                       ; one scan was processed

        ret


;/***************************************************************************
;*
;* PUBLIC ROUTINE  xms_setup_HW(_no_xor)  
;*
;* DESCRIPTION   = This function sets up the EGA/VGA hardware for           
;*                 mono-to-color bitblt.  It assumes the BitMask register   
;*                 initailized properly.                                    
;*                 
;*                 Registers Preserved:                                                                      
;*                       BX,CX,DX,SI,DI,DS,ES,GS,BP 
;*                 Registers Destroyed:             
;*                       AX,flags                   
;*                 
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


        ALIGN   4

xms_setup_HW::

        mov     ax,(DR_SET shl 8) + GRAF_DATA_ROT
        out     dx,ax                       ; DX = EGA_BASE + GRAF_ADDR
;/*
;** First we put the background color into the latches.  We do this
;** by putting this color into SET_RESET, writing it, then reading it.
;** The memory location we will use is the first byte where we will blt.
;** If we do not want to alter any destination pels, the BitMask register must
;** already be set properly.
;*/

        public  xms_setup_HW_no_xor

xms_setup_HW_no_xor::

        mov     ah,bBackClr
        mov     al,GRAF_SET_RESET
        out     dx,ax
        mov     ax,0F00h + GRAF_ENAB_SR
        out     dx,ax

;/*
;** Fill the latches with the background color and unaltered pels.
;*/

        xchg    BYTE PTR [edi],al              ; color in SetReset is written, not AL
        mov     al,BYTE PTR [edi]              ; read to fill latches

;/*
;** Go to XOR mode.
;*/

        mov     ax,(DR_XOR shl 8) + GRAF_DATA_ROT
        out     dx,ax

;/*
;** Now setup SET_RESET.
;*/

        mov     ah,bBackClr
        xor     ah,bForeClr             ; gives 0 where colors match
        mov     al,GRAF_SET_RESET
        out     dx,ax
        not     ah
        mov     al,GRAF_ENAB_SR
        out     dx,ax                   ; enable Set/Reset where colors match

        ret


;/***************************************************************************
;*
;* PUBLIC ROUTINE  CopyPrevScan 
;*
;* DESCRIPTION   = This function copies the contents of the previous scan onto 
;*                 the current scan.
;*
;*                 Registers Preserved:  
;*                       BX,SI,DS,ES,GS  
;*                 Registers Destroyed:  
;*                       AX,CX,DX,flags  
;*
;* INPUT         = DS:SI = the 1st visible source DWORD     
;*                 ES:DI = the 1st visible destination byte 
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


        ALIGN   4

CopyPrevScan::

        push    ebx
        push    esi                         ; copy from screen to screen
        mov     esi,edi                     ; DS:SI => current plane
        add     esi,cbDstScan               ; DS:SI => same plane last scan
        mov     dx,EGA_BASE                 ; board control register
        mov     bx,(SEQ_DATA shl 8) + GRAF_ADDR
        push    edi

;/*
;** Copy the partial first byte. We have to do this one plane at a time.
;*/

xcp_first_byte:

        movzx   ecx,bDstFirstMask
        jecxz   xcp_copy_full_byte
        call    CopyPartialByte
        inc     edi                          ; to the next byte
        inc     esi
;/*
;** Copy a sequence of full middle bytes. Use WRITE MODE 1 to copy 4 planes
;** at a time
;*/

xcp_copy_full_byte:

        mov     ecx,cbDstMidByte             ; number of full middle bytes
        jecxz    xcp_copy_last_byte
        mov     dl,bh                       ; select all 4 planes
        mov     al,MM_ALL
        out     dx,al
        mov     dl,bl                       ; select write mode
        mov     ax,M_LATCH_WRITE shl 8 + GRAF_MODE
        mov     shadowed_graf_mode.vvr_value,ah ; Must shadow this for state code
        out     dx,ax
        rep     movsb
        mov     ah,M_DATA_READ
        mov     shadowed_graf_mode.vvr_value,ah ; Must shadow this for state code
        out     dx,ax

;/*
;** Copy the partial byte left to the last destination byte
;*/

xcp_copy_last_byte:

        movzx   ecx,bDstLastMask
        jecxz    xcp_copy_done
        call   CopyPartialByte

xcp_copy_done:

        pop     edi
        sub     edi,cbDstScan                ; proceed to the next dst scan
        dec     cyExt                       ; one scan was processed
        mov     ax,0FF00h + GRAF_BIT_MASK   ; bit mask
        mov     dl,bl                       ; BL = GRAF_DATA
        out     dx,ax

xcp_exit:

        pop     esi
        pop     ebx

        ret


;/***************************************************************************
;*
;* PUBLIC ROUTINE  CopyPartialByte 
;*
;* DESCRIPTION   = This function copies the contents of partial from the      
;*                 previous scan onto the corresponding byte of the current   
;*                 scan                                                       
;*                                                                            
;*                 Registers Preserved:         
;*                       BX,CH,DS,ES,GS         
;*                 Registers Destroyed:         
;*                       AX,CL,DX,flags         
;*
;* INPUT         = BX    = (SEQ_DATA shl 8) + GRAF_ADDR     
;*                 CX    = Byte Mask                        
;*                 DX    = EGA_BASE                         
;*                 DS:SI = the 1st visible source DWORD     
;*                 ES:DI = the 1st visible destination byte 
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/


        ALIGN   4

CopyPartialByte::

        push    ebp
        mov     al,GRAF_BIT_MASK
        mov     ah,cl                       ; mask out the unaltered bits
        mov     dl,bl
        out     dx,ax

        mov     al,MM_C3                    ; setup write plane selection
        mov     bp,(RM_C3 shl 8)+GRAF_READ_MAP

cpb_next_plane:

        mov     dl,bh                       ; write plane selection register
        out     dx,al
        mov     dl,bl                       ; read plane selection register
        xchg    ax,bp
        out     dx,ax
        sub     ah,1
        xchg    ax,bp
;/*
;** This transfer cannot use movsb because we need the byte to be filter out
;** by the bit mask register.
;*/

        mov     cl,BYTE PTR [esi]                  ; the first byte last scan
        xchg    BYTE PTR [edi],cl                  ; write to destination

        shr     al,1                        ; AH = next writing plane
        jnc     cpb_next_plane

        pop     ebp

        ret

OEMStretchBlt   ENDP

        public  osb_have_extent
        public  osb_test_yshrink
        public  osb_calc_yerr
        public  osb_test_xshrink
        public  osb_calc_xerr
        public  osb_xform_origin
        public  osb_calc_xdda
        public  osb_calc_src_offset
        public  osb_calc_dst_offset
        public  osb_cursor_exclude
        public  osb_set_blt_param
        public  osb_start_blt
        public  osb_stretch_next_scan
        public  osb_stretch_scan_done
        public  osb_stretch_copy_scan
        public  osb_y_shrink
        public  osb_elim_next_scan
        public  osb_elim_this_scan
        public  osb_exit
        public  sa_find_first_src
        public  sa_have_first_src
        public  sa_record_run
        public  da_find_first_src
        public  da_have_first_src
        public  da_record_run
        public  da_dup_src_pel
        public  xocp_xfer_first_byte
        public  xocp_xfer_full_byte
        public  xocp_xfer_last_byte
        public  xms_xfer_first_byte
        public  xms_xfer_full_byte
        public  xms_xfer_last_byte

        end
