;*DDK*************************************************************************/
;
; COPYRIGHT    Copyright (C) 1995 IBM Corporation
;
;    The following IBM OS/2 WARP source code is provided to you solely for
;    the purpose of assisting you in your development of OS/2 WARP device
;    drivers. You may use this code in accordance with the IBM License
;    Agreement provided in the IBM Device Driver Source Kit for OS/2. This
;    Copyright statement may not be removed.;
;*****************************************************************************/

        page    ,132
;/*****************************************************************************
;*
;* SOURCE FILE NAME = STRCHBLT.ASM
;*
;* DESCRIPTIVE NAME = StretchBlt at level of device driver.
;*
;*
;* VERSION      V2.0
;*
;* DATE         04/03/90
;*
;* DESCRIPTION  StretchBlt at level of device driver.     
;*
;*              
;* FUNCTIONS    OEMStretchBlt
;*              SMajorDDARun
;*              DMajorDDARun
;*              ComputeInterval
;*              XFerColorScan
;*              XFerColorScan_Notest
;*              XFerOneColorPlane
;*              CopyColorFromBuffer
;*              XFerMonoScan
;*              XFerMonoScan_Notest
;*              xms_setup_HW
;*              CopyPrevScan
;*              CopyPartialByte
;*                                   
;* NOTES        NONE
;*
;* STRUCTURES   NONE
;*
;* EXTERNAL REFERENCES
;*
;*              NONE
;*
;* EXTERNAL FUNCTIONS
;*
;*              NONE
;*
;* CHANGE ACTIVITY =
;*   DATE      FLAG        APAR   CHANGE DESCRIPTION
;*   --------  ----------  -----  --------------------------------------
;*   mm/dd/yy  @Vr.mpppxx  xxxxx  xxxxxxx
;*   07/03/90                     Written by Viroon Touranachun 
;*
;*****************************************************************************/

        .286P

        .xlist
        include cmacros.inc
INCL_GPIBITMAPS         equ                      1       ; for bitmap info structure
INCL_DDIMISC            equ                      1       ; for BBP structure
        include pmgre.inc
        include driver.inc
        include display.inc
        include egafam.inc
        include egamemf.inc
        include oemblt.inc
        include assert.mac
        .list

        CPUMode 386

        externFP    far_exclude         ; CURSORSC.ASM
        externFP    far_unexclude       ; CURSORSC.ASM
        externA     SCREEN_DSCAN        ; EGAMEMD.ASM
        externA     DOSHUGEINCR
        externA     DOSHUGESHIFT

sBegin  CompileCodeData
        externB     asDDARun            ; CMPLCODE.ASM
        externB     asPlaneBuff         ; CMPLCODE.ASM
sEnd    CompileCodeData

sBegin  PtrData
        externB     shadowed_graf_mode  ; EGAMEMD.ASM
sEnd    PtrData

OSB_Y_SHRINK            equ     01h     ; compression in vertical direction
OSB_HUGE_BITMAP         equ     02h     ; huge source bitmap
XOS_NEXT_MONO_PLANE     equ     2       ; the mono=>color munge entry size

sBegin  Code
        assumes cs,Code
        assumes ds,Data
        assumes es,nothing

        externW     MyCmplCodeData
        externW     MyPtrCodeData
        externNP    clean_up_before_exit_no_test; EXIT.BLT

;/***************************************************************************
;*
;* FUNCTION NAME = OEMStretchBlt 
;*
;* DESCRIPTION   = This function is a working routine to do the stretchblt  
;*                 from a memory bitmap directly (ROP_SRCCOPY) to the       
;*                 screen.                                                  
;*
;* INPUT         = NONE
;*
;*                 Calls:
;*                       clean_up_before_exit_no_test
;*                       far_exclude
;*                       far_unexclude
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

cProc   OEMStretchBlt,<FAR,PUBLIC,WIN,PASCAL>,<di>

        parmD   pddcDst                 ;Destination ddc
        parmW   xDst                    ;Destination x origin
        parmW   yDst                    ;Destination y origin
        parmD   psdSrc                  ;Source surface definition
        parmD   lpParm                  ;StretchBlt parameters
        parmW   cxExt                   ;x extent of the BLT
        parmW   cyExt                   ;y extent of the BLT

        localW  xSrcOrg                 ;source x-origin of the bltting area
        localW  ySrcOrg                 ;source y-origin of the bltting area
        localW  cxSrcExt                ;source bltting x-extent
        localW  cySrcExt                ;source bltting y-extent
        localW  xDstOrg                 ;destination x-origin of the bltting area
        localW  yDstOrg                 ;destination y-origin of the bltting area
        localW  cxDstExt                ;destination bltting x-extent
        localW  cyDstExt                ;destination bltting y-extent

        localW  cyErrorTerm             ;the initial Y error term

        localD  lpBits                  ;ptr to the source bitmap
        localW  selFirstSeg             ;selector of bitmap's 1st segment
        localW  npSrcLastScan           ;offset to the source last scan
        localW  cbSrcPlane              ;# of bytes in one src plane
        localW  cbSrcScan               ;# of bytes in one src scan
        localD  lpScreen                ;ptr to the screen bitmap
        localW  cbDstScan               ;# of bytes in one dst scan
        localW  cxSrcVisPel             ;offset to the first source visible pel
        localW  cbDstMidByte            ;clipped destination scansize (in bytes)
        localB  bDstFirstMask           ;mask of the first destination byte
        localB  bDstLastMask            ;mask of the last destination byte
        localB  cxDstFirstShft          ;the # of bit to shift in the first byte
        localB  cxDstLastShft           ;the # of bit to shift in the last byte
        localB  fbStBlt                 ;the stretchblt flag

        localB  bForeClr                ;Screen Foreground color
        localB  bBackClr                ;Screen Background color

        localW  pfnXDDAScan             ;ptr to the current scan blt function
        localW  pfnCopyScan             ;ptr to the current scan copy function

cBegin

        push    esi
        cld
        xor     ax,ax
        mov     fbStBlt,al              ; initialize the flag

;/*
;** Get the source/destination bltting area's vertical/horizontal extents
;*/

        lgs     si,lpParm               ; ptr to stretchblt parameters
        assumes gs,nothing
        les     di,gs:[si].stb_lpRectl  ; bltting area
        assumes es,nothing
        mov     ax,es:[di].bbp_rclSrc.rcl_xLeft.lo
        mov     xSrcOrg,ax
        mov     bx,es:[di].bbp_rclSrc.rcl_xRight.lo
        sub     bx,ax                   ; BX = cxSrcExt = SrcRight - SrcLeft
        mov     ax,es:[di].bbp_rclSrc.rcl_yBottom.lo
        mov     ySrcOrg,ax
        mov     cx,es:[di].bbp_rclSrc.rcl_yTop.lo
        sub     cx,ax                   ; CX = cySrcExt = SrcTop - SrcBottom

        mov     ax,es:[di].bbp_rclTarg.rcl_xLeft.lo
        mov     xDstOrg,ax
        mov     dx,es:[di].bbp_rclTarg.rcl_xRight.lo
        sub     dx,ax                   ; DX = cxDstExt = DstRight - DstLeft
        mov     ax,es:[di].bbp_rclTarg.rcl_yBottom.lo
        mov     yDstOrg,ax
        mov     ax,es:[di].bbp_rclTarg.rcl_yTop.lo
        sub     ax,yDstOrg              ; AX = cyDstExt = DstTop - DstBottom

osb_have_extent:
        assert  ax,G,0
        assert  bx,G,0
        assert  cx,G,0
        assert  dx,G,0
        mov     cyDstExt,ax
        mov     cxSrcExt,bx
        mov     cySrcExt,cx
        mov     cxDstExt,dx

;/*
;** Calculate the initial X/Y error terms and the horizontal DDA Runlength.
;** BX = cxSrcExt, DX = cxDstExt
;** CX = cySrcExt, AX = cyDstExt
;*/

osb_test_yshrink:
        cmp     cx,ax
        jbe     short osb_calc_yerr
        or      fbStBlt,OSB_Y_SHRINK    ;flag if we are vertically compressing
        xchg    cx,ax

osb_calc_yerr:
        shr     cx,1                    ;Initial error = MajExt - MinExt/2
        sub     ax,cx
        mov     cyErrorTerm,ax

osb_test_xshrink:
        mov     cx,CodeOFFSET SMajorDDARun
        cmp     bx,dx
        jae     short osb_calc_xerr
        xchg    bx,dx
        mov     cx,CodeOFFSET DMajorDDARun

osb_calc_xerr:
        shr     dx,1                    ;Initial error = MajExt - MinExt/2
        sub     bx,dx                   ;BX = initial X error term

;/*
;** Before we calculate the DDA run, we need to transform the starting
;** coordinates of the bltting area from device to screen coordinate.
;*/

osb_xform_origin:
        add     si,stb_xSrcDel          ;GS:SI => STBLT.stb_xSrcDel
        lods    word ptr gs:[si]
        add     xSrcOrg,ax
        lods    word ptr gs:[si]
        add     ySrcOrg,ax
        lods    word ptr gs:[si]
        add     xDstOrg,ax
        lods    word ptr gs:[si]
        add     yDstOrg,ax

osb_calc_xdda:
        mov     es,MyCmplCodeData
        assumes es,nothing
        lea     di,asDDARun             ;ES:DI => XDDA run array
        cCall   cx                      ;calculate the DDA run

;/*
;** Calculate the offset to the start byte in the source bitmap that corresponds
;** to the origin of the clipped source bltting area and the offset to the first
;** pel
;*/

osb_calc_src_offset:
        les     si,psdSrc               ;the source surface
        assumes es,nothing
        mov     eax,es:[si].sd_pBits    ;the source bitmap
        mov     lpBits,eax
        shr     eax,16                  ;remember the first segment
        mov     selFirstSeg,ax

        mov     bx,es:[si].sd_dScan     ;the source scansize
        mov     cbSrcScan,bx
        mov     ax,es:[si].sd_cbScan
        mov     cbSrcPlane,ax           ;the source plane size (mono unused)
        mov     ax,es:[si].sd_cy
        sub     ax,ySrcOrg
        dec     ax                      ;flip to HW coordinate
        test    es:[si].sd_fb,SD_HUGE   ;if the bitmap is not huge
        jz      short @F                ;we already have the scan number
        or      fbStBlt,OSB_HUGE_BITMAP
        xor     dx,dx
        div     es:[si].sd_cySeg        ;find the segment where it is in
        mov     cx,DOSHUGESHIFT         ;FATAL!! DOSHUGESHFT will overwrite
        shl     ax,cl                   ;next instruction if use with shl
        add     lpBits.sel,ax           ;advance to the correct segment
        mov     ax,es:[si].sd_cbFill
        add     ax,bx                   ;BX = scansize
        neg     ax
        mov     npSrcLastScan,ax        ;offset to the last scan in a segment
        mov     ax,dx                   ;DX = number of scans in a segment
@@:
        mul     bx                      ;AX = offset to start of the scan
        assert  dx,E,0
        add     lpBits.off,ax
        mov     ax,cxSrcVisPel          ;AX = offset to 1st visible src pel
        shr     ax,4
        shl     ax,1                    ;make it word offset
        add     lpBits.off,ax
        add     npSrcLastScan,ax        ;start from this byte in last scan
        and     cxSrcVisPel,15          ;offset of the 1st pel within word

;/*
;** We now need to decide which routine to use to blt each src plane, that is,
;** to use routines for color or monochrome source.
;*/

osb_check_src_color:
        mov     ax,CodeOFFSET XFerColorScan   ;assume color source
        test    es:[si].sd_fb,SD_COLOR
        les     si,pddcDst                    ;ES:SI => the dest. ddc
        assumes es,nothing
        jnz     short osb_have_routine        ;check if source is color

;/*
;**  We have a monochrome source, therefore we need to know the destination's
;**  image foreground and background colors for color mapping.
;*/

        mov     ax,word ptr es:[si].ddc_ia.ia_ba.ba_ipc
        mov     ah,es:[si].ddc_ia.ia_ba.ba_ipcBack.ipc_bClr
        mov     bForeClr,al
        mov     bBackClr,ah
        mov     ax,CodeOFFSET XFerMonoScan

osb_have_routine:
        mov     pfnXDDAScan,ax          ;get the right routine
        mov     pfnCopyScan,ax          ;we cannot copy screen-screen until a
                                        ;scan is drawn

;/*
;** lpBits now points to the word that contains the 1st visible src pel        ;
;** now calculate the address of the start byte in the screen blt area.       ;
;*/

osb_calc_dst_offset:
        mov     si,es:[si].ddc_npsd     ; ES:SI => the dest. surface
        mov     cbDstScan,SCREEN_DSCAN  ; destination scan width in bytes
        mov     ax,es:[si].sd_cy        ; flip screen's y-coordinate to the
        sub     ax,yDst                 ; hardware convention
        dec     ax                      ; make extents inclusive
        mov     di,ax                   ; save it for cursor exclusion
        mul     cbDstScan               ; no of bytes in a scan
        assert  dx,E,0
        mov     bx,xDst                 ; get the x origin
        shr     bx,3                    ; get the no of bytes
        add     ax,bx                   ; this is the start offset 
        mov     lpScreen.off,ax
        mov     ax,es:[si].sd_pBits.sel ; the screen segment
        mov     lpScreen.sel,ax
        cCall   ComputeInterval         ; compute interval parameters

;/*
;** Cursor Exclusion for a device destination
;*/

osb_cursor_exclude:
        mov     si,cxExt
        dec     si                      ;Make extents inclusive of last point
        mov     cx,xDst                 ;Set left
        add     si,cx                   ;Set right

        mov     dx,cyExt
        neg     dx
        add     dx,di                   ;DI = top, DX = bottom
        inc     dx                      ;Make DX exclusive
        cCall   far_exclude                       ;Exclude the area from the screen

;/*
;** Setup pointers to all the essential data structures.
;*/

osb_set_blt_param:
        mov     gs,MyCmplCodeData       ;for DDA run buffer
        assumes gs,nothing
        lea     si,asPlaneBuff          ;GS:SI => Plane Buffer
        ror     esi,16                  ;put it in the high word
        lds     si,lpBits               ;the starting pel to blt
        assumes ds,nothing
        les     di,lpScreen             ;the starting pel on screen
        assumes es,nothing
        mov     bx,cyErrorTerm          ;initial y error term

;/*
;** Now we have all the essential information to perform stretchblt. So start
;** munging the bits. Do the DDA in the y direction. For each scan, call
;** XFerOneScan to do the DDA in the X direction.
;*/

osb_start_blt:
        test    fbStBlt,OSB_Y_SHRINK    ;we treat stretching and compressing
        jnz     short osb_y_shrink      ;seperately

;/*
;** We are stretching the bitmap vertically. This means, in terms of Y
;** coordinate, destination is the major axis and source is minor. therefore,
;** every source scan will be output to screen. Note that since we may have
;** some destination scan clipped off, we will let the XDDA function decide
;** whether it is going to processthat scan.
;*/

osb_stretch_next_scan:
        cCall   [pfnXDDAScan]           ;walk along the current src scan
        jz      osb_exit                ;ZF set if all dest scans are processed

osb_stretch_scan_done:
        sub     bx,cySrcExt             ;subtract minor axis extent
        jge     short osb_stretch_copy_scan;are we repeating the same scan?
        add     bx,cyDstExt             ;no! prepare to walk the next scan
        sub     si,cbSrcScan            ;proceed to the next scan
        jnc     short osb_stretch_next_scan

osb_stretch_new_seg:

ifdef   FIREWALLS
        test    fbStBlt,OSB_HUGE_BITMAP
        jnz     short @F
        rip     text,<OEMStretchBlt - updating selector of a small bitmap>
@@:
endif;  FIREWALLS

        mov     ax,ds
        sub     ax,DOSHUGEINCR          ;upto previous scan
        cmp     ax,selFirstSeg          ;this could happen only once
        jb      short osb_stretch_dup_last
        mov     ds,ax
        assumes ds,nothing
        mov     si,npSrcLastScan        ;offset to the last scan in segment
        jmp     short osb_stretch_next_scan

osb_stretch_dup_last:
        add     si,cbSrcScan
        jmp     short osb_stretch_next_scan

osb_stretch_copy_scan:
        cCall   [pfnCopyScan]           ;copy the previous scan
        jnz     short osb_stretch_scan_done
        jmp     short osb_exit

;/*
;** We are compressing the bitmap vertically. This means, in terms of Y
;** coordinate, source is the major axis and destination is minor. therefore,
;** some source scans will be ignored. Note that since we may have
;** some destination scan clipped off, we will let the XDDA function decide
;** whether it is going to processthat scan.
;*/

osb_shrink_new_seg:

ifdef   FIREWALLS
        test    fbStBlt,OSB_HUGE_BITMAP
        jnz     short @F
        rip     text,<OEMStretchBlt - updating selector of a small bitmap>
@@:
endif;  FIREWALLS

        mov     ax,ds
        sub     ax,DOSHUGEINCR          ;upto previous scan
        cmp     ax,selFirstSeg          ;this could happen only once
        jb      short osb_shrink_dup_last
        mov     ds,ax
        assumes ds,nothing
        mov     si,npSrcLastScan        ;offset to the last scan in segment
        jmp     short osb_elim_this_scan

osb_shrink_dup_last:
        add     si,cbSrcScan
        jmp     short osb_elim_this_scan

osb_y_shrink:
        assert  bx,NE,0                 ;BX = YErrorTerm should not be 0
        cCall   [pfnXDDAScan]           ;walk along the current src scan
        jz      short osb_exit          ;ZF set if all dest scans are processed

osb_elim_next_scan:
        sub     si,cbSrcScan
        jc      short osb_shrink_new_seg

osb_elim_this_scan:
        sub     bx,cyDstExt             ;subtract minor axis extent
        jge     short osb_elim_next_scan
        add     bx,cySrcExt
        cCall   [pfnXDDAScan]
        jnz     short osb_elim_next_scan

;/*
;** now reset the EGA/VGA parameters and bring back the cursor
;*/

osb_exit:
        cCall   clean_up_before_exit_no_test      ; restore EGA registers
        cCall   far_unexclude                     ; re-draw the cursor
        pop     esi

osb_end:
cEnd

;/***************************************************************************
;*
;* PUBLIC ROUTINE  SMajorDDARun
;*
;* DESCRIPTION   = This routine calculates a source major DDA run.  the run is   
;*                 the sequence of positive number, Ri, representing the interval
;*                 of the source pel for the current, ith, destination pel from  
;*                 the previous destination pel.                                 
;*
;*                 Warning:
;*                       Use frames from OEMStretchBlt
;*
;*                 Registers Destroyed:                                                                      
;*                       AX,CX,DX,SI,flags                                                                   
;*
;* INPUT         = BX = X error term                     
;*                 ES:DI = address of the DDA run record 
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

cProc   SMajorDDARun,<PUBLIC,NEAR,NODATA>

cBegin

;/*
;** First, DDA walk until we find the first source pel corresponds to the
;** first visible pel on the destination.
;*/

        mov     ax,xSrcOrg
        mov     cx,xDstOrg
        mov     dx,cxSrcExt
        mov     si,cxDstExt
        cmp     cx,xDst                 ;enter the visible area?
        je      short sa_have_first_src

ifdef   FIREWALLS
        jl      short sa_find_first_src
        rip     text,<OEMStretchBlt - incorrect dst starting coordinates>
endif;  FIREWALLS

;/*
;** In the DDA loop:
;** AX = source x, CX = destination x, BX = error term
;** DX = major axis extent, SI = minor axis extent
;*/

sa_find_first_src:
        inc     ax                      ;step major axis
        sub     bx,si                   ;subtract minor axis extent
        jge     short sa_find_first_src ;step minor axis?
        add     bx,dx                   ;yes, add major extent
        inc     cx                      ;step minor axis
        cmp     cx,xDst                 ;enter the visible area?
        jl      short sa_find_first_src

;/*
;** We now have the offset to the first visible source pel from the unclipped
;** origin, so remember this for address calculation.
;*/

sa_have_first_src:
        mov     cxSrcVisPel,ax          ;offset to the 1st visible pel
        mov     cx,cxExt
        assert  cx,G,0
        dec     cx                      ;we need this # of dst pels
        jz      short sa_exit
        xor     ax,ax

;/*
;** In the DDA loop:
;** AX = interval from the last visible source pel
;** CX = # visible dst pels left
;** BX = Error term
;** DX = major axis extent, SI = minor axis extent
;*/

sa_record_run:
        inc     ax                      ;step major axis
        sub     bx,si                   ;subtract minor axis extent
        jge     short sa_record_run     ;step minor axis?
        add     bx,dx                   ;yes, add major extent
        stosw                           ;AX = interval between visible pels
        xor     ax,ax                   ;reset the interval
        loop    sa_record_run

sa_exit:
cEnd

;/***************************************************************************
;*
;* PUBLIC ROUTINE  DMajorDDARun    
;*
;* DESCRIPTION   = This routine calculates a source major DDA run.  the run is 
;*                 the sequence of positive number, Ri, representing the       
;*                 interval of the source pel for the current, ith, destination
;*                 pel from the previous destination pel.                      
;*
;*                 Registers Destroyed:                                                                   
;*                       AX,CX,DX,SI,flags                                                                
;*
;*                 Warning:
;*                       Use frames from OEMStretchBlt
;*
;* INPUT         = BX = X error term                    
;*                 ES:DI = address of the DDA run record
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

cProc   DMajorDDARun,<PUBLIC,NEAR,NODATA>

cBegin

;/*
;** First, DDA walk until we find the first source pel corresponds to the
;** first visible pel on the destination.
;*/

        mov     ax,xSrcOrg
        mov     cx,xDstOrg
        mov     dx,cxSrcExt
        mov     si,cxDstExt

ifdef   FIREWALLS
        cmp     cx,xDst                 ;enter the visible area?
        jle     short da_find_first_src
        rip     text,<OEMStretchBlt - incorrect dst starting coordinates>
endif;  FIREWALLS


;/*
;** In the DDA loop:
;** AX = source x, CX = destination x, BX = error term
;** DX = minor axis extent, SI = major axis extent
;*/

da_find_first_src:
        cmp     cx,xDst                 ;enter the visible area?
        je      short da_have_first_src
        inc     cx                      ;step major axis
        sub     bx,dx                   ;subtract minor axis extent
        jge     short da_find_first_src ;step minor axis?
        add     bx,si                   ;yes, add major extent
        inc     ax                      ;step minor axis
        jmp     short da_find_first_src

;/*
;** We now have the offset to the first visible source pel from the unclipped
;** origin, so remember this for address calculation.
;*/

da_have_first_src:
        mov     cxSrcVisPel,ax          ;offset to the 1st visible pel
        mov     cx,cxExt                ;we need this # of dst pels
        xor     ax,ax

;/*
;** In the DDA loop:
;** AX = interval from the last visible source pel
;** CX = # visible dst pels left
;** BX = Error term
;** DX = minor axis extent, SI = major axis extent
;*/

        assert  cx,G,0

da_record_run:
        dec     cx                      ;step major axis
        jz      short da_exit

da_dup_src_pel:
        stosw                           ;assume using the current src pel
        sub     bx,dx                   ;subtract minor axis extent
        jge     short da_record_run     ;step minor axis?
        add     bx,si                   ;yes, add major extent
        inc     word ptr es:[di][-2]    ;correct assumption--move to next src pel
        loop    da_dup_src_pel

da_exit:
cEnd

;/***************************************************************************
;*
;* PUBLIC ROUTINE  ComputeInterval 
;*
;* DESCRIPTION   = The bltting interval parameters--first/last byte        
;*                 offset/masks and middle bytes count--will be computed.  
;*                 A first mask and a last mask will be calculated, and    
;*                 possibly combined into the inner loop count.  If no     
;*                 first byte exists, the start address will be incremented
;*                 by the size (byte/word) to adjust for it.               
;*                                                                         
;*                 Registers Preserved:                                                                   
;*                       DI,DS,ES,,BP                                                                     
;*                 Registers Destroyed:                                                                   
;*                       AX,BX,CX,DX,SI,FLAGS                                                             
;*                                                                         
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

PSL_BYTE_FETCH  equ     0703h                     ;mask for modulus / shift count

cProc   ComputeInterval,<PUBLIC,NEAR,NODATA>

cBegin

        mov     dx,xDst                 ;the first visible destination pel
        mov     bx,cxExt                ;the bltting x-extent
        dec     bx                                ;make interval inclusive
        mov     cx,PSL_BYTE_FETCH                ;Get mask/shift counts

;/*
;**  We now have to determine how many bits will be affected,
;**  and how they are aligned within the bytes.
;** 
;**  (left_x MOD byte_size) will give us the starting pixel
;**  within the left byte.  Adding the inclusive extent
;**  of the interval to left_x MOD byte_size and taking the
;**  result MOD byte_size will give us the last pixel affected
;**  in the last byte.     These pixel indexes (0:7 for bytes)
;**  can be used to create the first and last altered bits mask.
;** 
;** 
;**  To compute the number of bytes in the inner loop,
;**  use the second calculation above
;** 
;**        (left_x MOD byte_size) + inclusive_extent
;** 
;**  and divide it by the byte size (8).  This gives you
;**  the following:
;** 
;** 
;**            1)  If the result is 0, then only one destination
;**                byte is being altered.  In this case, the
;**                start & ending masks should be ANDed together,
;**                the innerloop count set to zero, and last_mask
;**                set to to all 0's (don't alter any bits).
;** 
;**                        |      x x x x x|         |
;**                        |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                         0 1 2 3 4 5 6 7
;** 
;**                        start MOD 8 = 3,  extent-1 = 4
;**                        3+7 DIV 8 = 0, only altering one byte
;** 
;** 
;** 
;**            2)  If the result is 1, then only two bytes
;**                will be altered.  In this case, the start and
;**                ending masks are valid, and all that needs to
;**                be done is set the innerloop count to 0.
;** 
;**                           |  x x x x x x x|x x x x x x x|
;**                           |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                            0 1 2 3 4 5 6 7
;**    
;**                           start MOD 8 = 1,  extent-1 = 14
;**                           3+14 DIV 8 = 1.  There is a first and last
;**                           byte but no innerloop count
;**    
;**    
;**    
;**            3)  If the result is > 1, then some number of entire
;**                bytes will be altered by the innerloop.  In
;**                this case the number of innerloop bytes will
;**                be the result - 1.
;**    
;**                        |                               x|x x x x x x x x|x
;**                        |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
;**                         0 1 2 3 4 5 6 7
;**    
;**                        start MOD 8 = 7,  extent-1 = 9
;**                        7+9  DIV 8 = 2.  There is a first and last
;**                        byte and an innerloop count of 1 (result - 1)
;*/


;/*
;** Compute the starting bit position on the left and the ending
;** bit position on the right
;*/


        and     dl,ch                             ;Compute bit index for left side
        xor     dh,dh
        add     bx,dx                             ;Compute bit index for right side
        mov     si,bx                             ;(save for inner loop count)
        and     bl,ch

;/*
;** Turn the bit index within the byte into a mask.
;** We are basically performing a table lookup into the two
;** tables bit_mask_tbl_right and bit_mask_tbl_left.  However,
;** by doing either arithmetic shifts or logical shifts, we
;** can create the masks and save a lot of table space.
;*/


        mov     ch,cl                             ;Save  byte shift count
        mov     cl,dl                             ;Compute left side altered bits mask
        xor     ax,ax
        dec     ax                                ;AX = 0FFFFh
        mov     dx,ax                             ;Need this here later
        shr     al,cl                             ;Compute right side altered bits mask
        mov     cl,bl
        mov     bl,80h
        sar     bl,cl
        mov     cl,ch                             ;Restore byte shift count
        shr     si,cl                             ;Compute inner byte count
        jnz     short ci_not_one_byte             ;loop count + 1 > 0, check it out


;/*
;** Only one byte will be affected.  Combine the first and
;** last byte masks, and set the loop count to 0.
;*/


        and     al,bl                             ;AL = left, BL = right
        xor     bx,bx                             ;Want the entire mask to be 0
        inc     si                                ;Fall through to set 0

ci_not_one_byte:
        dec     si                                ;Dec inner loop count (might become 0)

;/*
;** If all pixels in the first byte are altered, combine the
;** first byte into the inner loop and clear the first byte
;** mask.  Ditto for the last byte
;*/

        cmp     al,dl                             ;Set 'C' if not all pixels 1
        sbb     si,dx                             ;If no 'C', sub -1 (add 1), else sub 0
        cmp     al,dl                             ;Set 'C' if not all pixels 1
        sbb     al,dl                             ;If no 'C', sub -1 (add 1), else sub 0

        cmp     bl,dl                             ;Set 'C' if not all pixels 1
        sbb     si,dx                             ;If no 'C', sub -1 (add 1), else sub 0
        cmp     bl,dl                             ;Set 'C' if not all pixels 1
        sbb     bl,dl                             ;If no 'C', sub -1 (add 1), else sub 0

;/*
;**  Save the first and last byte masks and the loop count.
;**  If the background mix mode is LeaveAlone, AND the masks
;**  with the transparency mask now to save a little time in
;**  the loop for multiple passes.  If the background mode is
;**  OverPaint, then we need the original clip masks as well
;**  as the transparency mask, in case the clipped bytes contain
;**  only background pixels to output.
;** 
;**  If the first byte/word mask is not zero before ANDing
;**  in the transparency mask, and becomes zero after ANDing
;**  in the transparency mask, then the starting address needs
;**  to be updated by the size (byte/word)
;*/

ci_interval_save:
        mov     cbDstMidByte,si         ;the inner loop count
        mov     bDstFirstMask,al        ;the first byte mask
        xor     ah,ah
        dec     ah                      ;prevent wasting time with 0 mask
        bsf     cx,ax                   ;CX <= index of the first mask bit from
        mov     cxDstFirstShft,cl       ;the right end
        mov     bDstLastMask,bl         ;the last byte mask
        xor     bh,bh
        dec     bh                      ;prevent wasting time with 0 mask
        bsf     cx,bx                   ;CX <= index of the first mask bit
        mov     cxDstLastShft,cl

ci_exit:
cEnd

;/***************************************************************************
;*
;* PUBLIC ROUTINE  XFerColorScan 
;*
;* DESCRIPTION   = This function performs the stretching/compressing blt     
;*                 within one scan, using the information from the           
;*                 pre-calculated DDA run record.                            
;*                 
;*                 Registers Preserved:          
;*                       EBX,SI,DS,ES,GS         
;*                 Registers Destroyed:          
;*                       AX,CX,DX,flags          
;*
;* INPUT         = DS:SI = the 1st visible source WORD     
;*                 ES:DI = the 1st visible destination byte
;*                 GS:[ESI.hi] = bitmap buffer             
;* OUTPUT        = ES:DI = start of the next destination scan         
;*                 ZF      set if no more destination scan to output. 
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

cProc   XFerColorScan,<PUBLIC,NEAR,NODATA>

cBegin  <nogen>

;/*
;** Check if the current destination scan is clipped off. If so, we can simply
;** ignore this source scan
;*/

        mov     ax,yDstOrg                  ; the current dest. scan
        inc     yDstOrg                     ; update to the next scan
        cmp     ax,yDst                     ; if smaller than 1st visible scan
        jge     short @F                    ; then ignore this source scan
        ret
@@:

;/*
;** After we found the first scan the blt, we do not need the testing above
;** anymore. So the following line will be the entry next time it is called.
;*/

        mov     pfnXDDAScan,CodeOFFSET XFerColorScan_Notest
        mov     pfnCopyScan,CodeOFFSET CopyPrevScan

cEnd    <nogen>

cProc   XFerColorScan_Notest,<PUBLIC,NEAR,NODATA>

cBegin

;/*
;** Initialize transfer loop
;*/

        push    esi                         ; ESI = ptrs to src and plane buffer
        push    bx                          ; BX  = cyErrorTerm
        lea     bx,asDDARun                 ; GS:BX => DDA run records

;/*
;** Transfer one plane at a time
;*/

xcs_c0:
        mov     dx,EGA_BASE+SEQ_DATA        ; plane selection register
        mov     al,MM_C0
        out     dx,al
        mov     dl,GRAF_ADDR                ; plane selection register
        mov     ax,(RM_C0 shl 8)+GRAF_READ_MAP
        out     dx,ax
        cCall   XFerOneColorPlane           ; blt C0 plane
        push    ds                          ; save this for future
        push    gs
        pop     ds                          ; copy from buffer to screen
        assumes ds,nothing
        ror     esi,16                      ; DS:SI => plane buffer

xcs_c1:
        mov     dx,EGA_BASE+SEQ_DATA        ; plane selection register
        mov     al,MM_C1
        out     dx,al
        mov     dl,GRAF_ADDR                ; plane selection register
        mov     ax,(RM_C1 shl 8)+GRAF_READ_MAP
        out     dx,ax
        cCall   CopyColorFromBuffer         ; blt C1 plane

xcs_c2:
        mov     dx,EGA_BASE+SEQ_DATA        ; plane selection register
        mov     al,MM_C2
        out     dx,al
        mov     dl,GRAF_ADDR                ; plane selection register
        mov     ax,(RM_C2 shl 8)+GRAF_READ_MAP
        out     dx,ax
        cCall   CopyColorFromBuffer         ; blt C2 plane

xcs_c3:
        mov     dx,EGA_BASE+SEQ_DATA        ; plane selection register
        mov     al,MM_C3
        out     dx,al
        mov     dl,GRAF_ADDR                ; plane selection register
        mov     ax,(RM_C3 shl 8)+GRAF_READ_MAP
        out     dx,ax
        cCall   CopyColorFromBuffer         ; blt C3 plane

xcs_update_scan:
        pop     ds
        pop     bx                          ; restore cyErrorTerm
        pop     esi                         ; restore src + buffer ptr
        sub     di,cbDstScan                ; proceed to the next dst scan
        dec     cyExt                       ; one scan was processed

xcs_exit:
cEnd

XOP_BYTE_SIZE           equ     8           ; # of bits in one byte
XOP_MOD_WORD_SIZE       equ     15          ; modulo # of bits in one word
XOP_INNER_LOOP_COUNT    equ     8           ; # of inner loop count


;/***************************************************************************
;*
;* PUBLIC ROUTINE  XFerOneColorPlane 
;*
;* DESCRIPTION   = This function performs the stretching/compressing blt    
;*                 within each plane, using the information from the        
;*                 pre-calculated DDA run record.  The processed byte for   
;*                 plane 0 are written directly to screen (plane 0), and the
;*                 rest are stored in buffer.                               
;*                                                                          
;*                 Registers Preserved:                      
;*                       ESI,EDI,DS,ES,GS                    
;*                 Registers Destroyed:                      
;*                       AX,BX,CX,DX,flags                   
;*
;* INPUT         = DS:SI = the 1st visible source WORD            
;*                 ES:DI = the 1st visible destination byte       
;*                 GS:[ESI.hi] = bitmap buffer                    
;*                 GS:[BX]     = DDA run record                   
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

cProc   XFerOneColorPlane,<PUBLIC,NEAR,NODATA>

cBegin

        push    esi
        push    edi
        mov     ch,byte ptr cxSrcVisPel
        mov     dx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ch                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE

;/*
;** Start from the first destination byte
;*/

xocp_xfer_first_byte:
        xor     ah,ah
        or      ah,bDstFirstMask            ; the 1st visible dest. byte mask
        jz      xocp_xfer_full_byte
        mov     cl,cxDstFirstShft
        inc     cl                          ; we will always do the first pel
        shr     ah,cl                       ; shift mask to the right end
        mov     cl,ah
        ror     edi,16                      ; save DI in its high word
        xor     eax,eax

xocp_xfer_first_loop:
        push    si
        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 0
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 1
        add     si,cbSrcPlane

        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 1
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 2
        add     si,cbSrcPlane

        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 2
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 3
        add     si,cbSrcPlane

        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 3
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; back to plane 0
        pop     si

        and     ch,XOP_MOD_WORD_SIZE        ; AH = offset from MSB
        mov     dl,ch
        add     dx,gs:[bx]                  ; add interval to the next offset
        add     bx,2
        mov     ch,dl                       ; save the word offset
        shr     dx,4                        ; offset to the next src word
        shl     dx,1
        add     si,dx                       ; advance src pointer
        mov     dx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ch                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE
        shr     cl,1                        ; anymore src pel for 1st byte
        jc      short xocp_xfer_first_loop

        ror     edi,16                      ; restore DI
        mov     cl,cxDstFirstShft
        shl     eax,cl                      ; shift for the masking right end

        mov     cl,bDstFirstMask
        not     cl                          ; CL = the first byte mask
        and     cl,es:[di]                  ; get the unaltered dest. pel
        or      al,cl
        stosb                               ; write to the destination
        ror     esi,16                      ; get GS:SI => Plane 1 Buffer
        push    si                          ; remember this address
        mov     gs:[si],ah                  ; plane 1 byte in AH
        add     si,SCREEN_DSCAN             ; GS:SI => Plane 2 Buffer
        ror     eax,16
        mov     gs:[si],al                  ; plane 1 byte in AH
        add     si,SCREEN_DSCAN             ; GS:SI => Plane 3 Buffer
        mov     gs:[si],ah
        pop     si                          ; get GS:SI => Plane 1 Buffer
        inc     si                          ; go to next byte
        ror     esi,16                      ; DS:SI => source bitmap

;/*
;** Transfer a sequence of full middle bytes. (The inner loop has been unwound
;** to optimize the execution speed and codesize.)
;*/

xocp_xfer_full_byte:
        mov     ah,ch
        mov     cx,cbDstMidByte             ; number of full middle bytes
        or      cx,cx
        jz      xocp_xfer_last_byte

xocp_xfer_outer_mid_loop:
        push    cx
        mov     ch,ah                       ; AH = offset from MSB
        mov     cl,XOP_INNER_LOOP_COUNT     ; need this # of bits before output
        ror     edi,16                      ; save DI in its high word

xocp_xfer_inner_mid_loop:
        push    si
        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 0
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 1
        add     si,cbSrcPlane

        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 1
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 2
        add     si,cbSrcPlane

        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 2
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 3
        add     si,cbSrcPlane

        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 3
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; back to plane 0
        pop     si

        and     ch,XOP_MOD_WORD_SIZE        ; CH = offset from MSB
        mov     dl,ch
        add     dx,gs:[bx]                  ; add interval to the next offset
        add     bx,2
        mov     ch,dl                       ; save the word offset
        shr     dx,4                        ; offset to the next src word
        shl     dx,1
        add     si,dx                       ; advance src pointer
        mov     dx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ch                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE

        dec     cl
        jnz     xocp_xfer_inner_mid_loop

        ror     edi,16                      ; restore DI
        stosb                               ; write to the destination
        ror     esi,16                      ; get GS:SI => Plane 1 Buffer
        push    si                          ; remember this address
        mov     gs:[si],ah                  ; plane 1 byte in AH
        add     si,SCREEN_DSCAN             ; GS:SI => Plane 2 Buffer
        ror     eax,16
        mov     gs:[si],al                  ; plane 1 byte in AH
        add     si,SCREEN_DSCAN             ; GS:SI => Plane 3 Buffer
        mov     gs:[si],ah
        pop     si                          ; get GS:SI => Plane 1 Buffer
        inc     si                          ; go to next byte
        ror     esi,16                      ; DS:SI => source bitmap

        mov     ah,ch                       ; AH = offset from MSB
        pop     cx                          ; CX = inner loop count
        dec     cx
        jnz     xocp_xfer_outer_mid_loop
        mov     ch,ah

;/*
;** Transfer the partial byte left to the last destination byte
;*/

xocp_xfer_last_byte:
        xor     cl,cl
        or      cl,bDstLastMask             ; the last byte mask
        jz      xocp_exit
        shl     cl,1                        ; the first src pel
        xor     eax,eax
        ror     edi,16                      ; save DI in its high word

xocp_xfer_last_loop:
        push    si
        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 0
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 1
        add     si,cbSrcPlane

        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 1
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 2
        add     si,cbSrcPlane

        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 2
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; move to plane 3
        add     si,cbSrcPlane

        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel in plane 3
        adc     al,al                       ; use it for the next destination pel
        ror     eax,8                       ; back to plane 0
        pop     si

        and     ch,XOP_MOD_WORD_SIZE        ; AH = offset from MSB
        mov     dl,ch
        add     dx,gs:[bx]                  ; add interval to the next offset
        add     bx,2
        mov     ch,dl                       ; save the word offset
        shr     dx,4                        ; offset to the next src word
        shl     dx,1
        add     si,dx                       ; advance src pointer
        mov     dx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ch                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE
        shl     cl,1                        ; anymore src pel for last byte
        jc      short xocp_xfer_last_loop

        ror     edi,16                      ; restore DI
        mov     cl,cxDstLastShft
        shl     eax,cl                      ; shift for the masking right end

        mov     cl,bDstLastMask
        not     cl                          ; CL = the first byte mask
        and     cl,es:[di]                  ; get the unaltered dest. pel
        or      al,cl
        stosb                               ; write to the destination
        ror     esi,16                      ; get GS:SI => Plane 1 Buffer
        mov     gs:[si],ah                  ; plane 1 byte in AH
        add     si,SCREEN_DSCAN             ; GS:SI => Plane 2 Buffer
        ror     eax,16
        mov     gs:[si],al                  ; plane 1 byte in AH
        add     si,SCREEN_DSCAN             ; GS:SI => Plane 3 Buffer
        mov     gs:[si],ah

xocp_exit:
        pop     edi
        pop     esi
cEnd


;/***************************************************************************
;*
;* PUBLIC ROUTINE  CopyColorFromBuffer 
;*
;* DESCRIPTION   = This function copies the contents of one plane from the     
;*                 bitmap buffer onto the same plane of the screen.            
;*
;*                 Registers Preserved:                                                              
;*                       BX,DI,DS,ES,GS                                                              
;*                 Registers Destroyed:  
;*                       AX,CX,DX,flags  
;*
;* INPUT         = DS:SI = the bitmap buffer of the current plane 
;*                 ES:DI = the 1st visible destination byte       
;*                 
;*                 
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

cProc   CopyColorFromBuffer,<PUBLIC,NEAR,NODATA>,<di>

cBegin

        push    si

;/*
;** Copy the partial first byte
;*/

cpb_first_byte:
        movzx   cx,bDstFirstMask
        jcxz    cpb_copy_full_byte
        lodsb
        not     cl
        and     cl,es:[di]
        or      al,cl
        stosb

;/*
;** Copy a sequence of full middle bytes
;*/

cpb_copy_full_byte:
        mov     cx,cbDstMidByte             ; number of full middle bytes
        mov     dx,cx
        shr     cx,2
        rep     movsd
        mov     cx,dx
        and     cx,3
        rep     movsb

;/*
;** Copy the partial byte left to the last destination byte
;*/

cpb_copy_last_byte:
        mov     cl,bDstLastMask
        jcxz    cpb_exit
        lodsb
        not     cl
        and     cl,es:[di]
        or      al,cl
        stosb

cpb_exit:
        pop     si
        add     si,SCREEN_DSCAN
cEnd


;/***************************************************************************
;*
;* PUBLIC ROUTINE   XFerMonoScan  
;*
;* DESCRIPTION   = This function performs the stretching/compressing blt     
;*                 within one scan, using the information from the           
;*                 pre-calculated DDA run record.                            
;*                 
;*                 Registers Preserved:                                                                     
;*                       EBX,SI,DS,ES,GS                                                                    
;*                 Registers Destroyed:           
;*                       AX,CX,DX,flags           
;*                 
;* INPUT         = DS:SI = the 1st visible source WORD        
;*                 ES:DI = the 1st visible destination byte   
;* OUTPUT        = ES:DI = start of the next destination scan         
;*                 ZF      set if no more destination scan to output. 
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

cProc   XFerMonoScan,<PUBLIC,NEAR,NODATA>

cBegin  <nogen>

;/*
;** Check if the current destination scan is clipped off. If so, we can simply
;** ignore this source scan
;*/

        mov     ax,yDstOrg                  ; the current dest. scan
        inc     yDstOrg                     ; update to the next scan
        cmp     ax,yDst                     ; if smaller than 1st visible scan
        jge     short @F                    ; then ignore this source scan
        ret
@@:

;/*
;** After we found the first scan the blt, we do not need the testing above
;** anymore. So the following line will be the entry next time it is called.
;*/

        mov     pfnXDDAScan,CodeOFFSET XFerMonoScan_Notest
        mov     pfnCopyScan,CodeOFFSET CopyPrevScan

cEnd    <nogen>

cProc   XFerMonoScan_Notest,<PUBLIC,NEAR,NODATA>

cBegin

;/*
;** Initialize transfer loop
;*/

        push    si                          ; SI = ptrs to src
        push    bx                          ; BX = cyErrorTerm
        lea     bx,asDDARun                 ; GS:BX => DDA run records

;/*
;** Setup the EGA/VGA registers to handle monochrome to color blt.
;*/

xms_setup:
        mov     dx,EGA_BASE+SEQ_DATA        ; plane selection register
        mov     al,MM_ALL                   ; select all plane
        out     dx,al
        mov     dl,GRAF_ADDR

;/*
;** Now, do the stretching/compressing blt directly to screen
;*/

        push    edi
        ror     edx,16                      ; save EGA_BASE + GRAF_ADDR in hi-word
        mov     ah,byte ptr cxSrcVisPel
        mov     dx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ah                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE

;/*
;** Start from the first destination byte
;*/

xms_xfer_first_byte:
        xor     cx,cx
        or      ch,bDstFirstMask            ; the 1st visible dest. byte mask
        jz      short xms_xfer_full_byte
        mov     cl,ch                       ; CL = fisrt byte mask
        xchg    ah,cl                       ; get bitmask in AH
        ror     edx,16                      ; DX = EGA_BASE + GRAF_ADDR
        mov     al,GRAF_BIT_MASK            ; set first byte mask
        out16   dx,ax
        cCall   xms_setup_HW_no_xor         ; setup color munging scheme
        mov     ah,cl                       ; AH = offset from MSB
        ror     edx,16

        mov     cl,cxDstFirstShft
        shr     ch,cl                       ; shift mask to the right end
        shr     ch,1                        ; we will always do the first pel
        xor     al,al
        ror     edi,16                      ; save DI in its high word

xms_xfer_first_loop:
        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel
        adc     al,al                       ; use it for the next destination pel
        and     ah,XOP_MOD_WORD_SIZE        ; AH = offset from MSB
        mov     dl,ah
        add     dx,gs:[bx]                  ; add interval to the next offset
        add     bx,2
        mov     ah,dl                       ; save the word offset
        shr     dx,4                        ; offset to the next src word
        shl     dx,1
        add     si,dx                       ; advance src pointer
        mov     dx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ah                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE
        shr     ch,1                        ; anymore src pel for 1st byte
        jc      short xms_xfer_first_loop
        ror     edi,16                      ; restore DI
        shl     al,cl                       ; shift for the right-end masking
        stosb                               ; write to screen

;/*
;** Transfer a sequence of full middle bytes. (The inner loop has been unwound
;** to optimize the execution speed and codesize.)
;*/

xms_xfer_full_byte:
        mov     cx,cbDstMidByte             ; number of full middle bytes
        jcxz    xms_xfer_last_byte
        mov     dh,ah                       ; save offset from MSB
        ror     edx,16                      ; DX = EGA_BASE + GRAF_ADDR
        mov     ax,0FF00h + GRAF_BIT_MASK   ; no mask for entire byte
        out16   dx,ax
        cCall   xms_setup_HW
        ror     edx,16
        mov     ah,dh                       ; AH = offset from MSB
        xor     dh,dh                       ; DH is always 0
        .errnz  XOP_MOD_WORD_SIZE and 0FF00h

xms_xfer_outer_mid_loop:
        push    cx
        mov     cx,((XOP_MOD_WORD_SIZE shl 8) + XOP_INNER_LOOP_COUNT)
        ror     edi,16                      ; save DI in its high word

xms_xfer_inner_mid_loop:
        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel
        adc     al,al                       ; use it for the next destination pel
        and     ah,ch                       ; AH = offset from MSB
        mov     dl,ah
        add     dx,gs:[bx]                  ; add interval to the next offset
        add     bx,2
        mov     ah,dl                       ; save the word offset
        shr     dx,4                        ; offset to the next src word
        shl     dx,1
        add     si,dx                       ; advance src pointer
        mov     dx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ah                       ; DX = offset from LSB
        and     dl,ch
        dec     cl                          ; the next bit ?
        jnz     short xms_xfer_inner_mid_loop

        ror     edi,16                      ; restore DI
        stosb                               ; write to the destination
        pop     cx
        dec     cx
        jnz     short xms_xfer_outer_mid_loop

;/*
;** Transfer the partial byte left to the last destination byte
;*/

xms_xfer_last_byte:
        mov     ch,bDstLastMask             ; the last byte mask
        jcxz    xms_update_scan
        mov     cl,ch                       ; CL = the last byte mask
        xchg    ah,cl                       ; get bitmask in AH
        ror     edx,16                      ; DX = EGA_BASE + GRAF_ADDR
        mov     al,GRAF_BIT_MASK            ; last byte mask
        out16   dx,ax
        cCall   xms_setup_HW
        mov     ah,cl                       ; AH = offset from MSB
        ror     edx,16
        shl     ch,1                        ; the first src pel
        xor     al,al
        ror     edi,16                      ; save DI in its high word

xms_xfer_last_loop:
        mov     di,ds:[si]
        bt      di,dx                       ; get the next src pel
        adc     al,al                       ; use it for the next destination pel
        and     ah,XOP_MOD_WORD_SIZE        ; AH = offset from MSB
        mov     dl,ah
        add     dx,gs:[bx]                  ; add interval to the next offset
        add     bx,2
        mov     ah,dl                       ; save the word offset
        shr     dx,4                        ; offset to the next src word
        shl     dx,1
        add     si,dx                       ; advance src pointer
        mov     dx,(XOP_BYTE_SIZE + XOP_MOD_WORD_SIZE)
        sub     dl,ah                       ; DX = offset from LSB
        and     dl,XOP_MOD_WORD_SIZE
        shl     ch,1                        ; anymore src pel for last byte
        jc      short xms_xfer_last_loop
        ror     edi,16                      ; restore DI

        mov     cl,cxDstLastShft
        shl     al,cl                       ; shift to the left end
        stosb                               ; write to destination

xms_update_scan:
        ror     edx,16                      ; restore EGA_BASE + GRAF_ADDR in hi-word
        pop     edi

        mov     ax,(DR_SET shl 8) + GRAF_DATA_ROT
        out16   dx,ax                       ; DX = EGA_BASE + GRAF_ADDR

        pop     bx                          ; restore cyErrorTerm
        pop     si                          ; restore src
        sub     di,cbDstScan                ; proceed to the next dst scan
        dec     cyExt                       ; one scan was processed

cEnd


;/***************************************************************************
;*
;* PUBLIC ROUTINE  xms_setup_HW(_no_xor)  
;*
;* DESCRIPTION   = This function sets up the EGA/VGA hardware for           
;*                 mono-to-color bitblt.  It assumes the BitMask register   
;*                 initailized properly.                                    
;*                 
;*                 Registers Preserved:                                                                      
;*                       BX,CX,DX,SI,DI,DS,ES,GS,BP 
;*                 Registers Destroyed:             
;*                       AX,flags                   
;*                 
;* INPUT         = NONE
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

cProc   xms_setup_HW,<PUBLIC,NEAR,NODATA>

cBegin

        mov     ax,(DR_SET shl 8) + GRAF_DATA_ROT
        out16   dx,ax                       ; DX = EGA_BASE + GRAF_ADDR

;/*
;** First we put the background color into the latches.  We do this
;** by putting this color into SET_RESET, writing it, then reading it.
;** The memory location we will use is the first byte where we will blt.
;** If we do not want to alter any destination pels, the BitMask register must
;** already be set properly.
;*/

        public  xms_setup_HW_no_xor
xms_setup_HW_no_xor:
        mov     ah,bBackClr
        mov     al,GRAF_SET_RESET
        out16   dx,ax
        mov     ax,0F00h + GRAF_ENAB_SR
        out16   dx,ax

;/*
;** Fill the latches with the background color and unaltered pels.
;*/

        xchg    es:[di],al              ; color in SetReset is written, not AL
        mov     al,es:[di]              ; read to fill latches

;/*
;** Go to XOR mode.
;*/

        mov     ax,(DR_XOR shl 8) + GRAF_DATA_ROT
        out16   dx,ax

;/*
;** Now setup SET_RESET.
;*/

        mov     ah,bBackClr
        xor     ah,bForeClr             ; gives 0 where colors match
        mov     al,GRAF_SET_RESET
        out16   dx,ax
        not     ah
        mov     al,GRAF_ENAB_SR
        out16   dx,ax                   ; enable Set/Reset where colors match

cEnd


;/***************************************************************************
;*
;* PUBLIC ROUTINE  CopyPrevScan 
;*
;* DESCRIPTION   = This function copies the contents of the previous scan onto 
;*                 the current scan.
;*
;*                 Registers Preserved:  
;*                       BX,SI,DS,ES,GS  
;*                 Registers Destroyed:  
;*                       AX,CX,DX,flags  
;*
;* INPUT         = DS:SI = the 1st visible source DWORD     
;*                 ES:DI = the 1st visible destination byte 
;*
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

cProc   CopyPrevScan,<PUBLIC,NEAR,NODATA>,<bx,si,ds>

cBegin

        push    es
        pop     ds                          ; copy from screen to screen
        mov     si,di                       ; DS:SI => current plane
        add     si,cbDstScan                ; DS:SI => same plane last scan
        mov     dx,EGA_BASE                 ; board control register
        mov     bx,(SEQ_DATA shl 8) + GRAF_ADDR
        push    di

;/*
;** Copy the partial first byte. We have to do this one plane at a time.
;*/

xcp_first_byte:
        movzx   cx,bDstFirstMask
        jcxz    xcp_copy_full_byte
        cCall   CopyPartialByte
        inc     di                          ; to the next byte
        inc     si

;/*
;** Copy a sequence of full middle bytes. Use WRITE MODE 1 to copy 4 planes
;** at a time
;*/

xcp_copy_full_byte:
        mov     cx,cbDstMidByte             ; number of full middle bytes
        jcxz    xcp_copy_last_byte
        push    gs
        mov     gs,MyPtrCodeData            ; Show color read mode to the
        assumes gs,PtrData                  ;   EGA restoration code

        mov     dl,bh                       ; select all 4 planes
        mov     al,MM_ALL
        out     dx,al
        mov     dl,bl                       ; select write mode
        mov     ax,M_LATCH_WRITE shl 8 + GRAF_MODE
        mov     gs:shadowed_graf_mode.vvr_value,ah ; Must shadow this for state code
        out     dx,ax
        rep     movsb
        mov     ah,M_DATA_READ
        mov     gs:shadowed_graf_mode.vvr_value,ah ; Must shadow this for state code
        out     dx,ax
        pop     gs

;/*
;** Copy the partial byte left to the last destination byte
;*/

xcp_copy_last_byte:
        movzx   cx,bDstLastMask
        jcxz    xcp_copy_done
        cCall   CopyPartialByte

xcp_copy_done:
        pop     di
        sub     di,cbDstScan                ; proceed to the next dst scan
        dec     cyExt                       ; one scan was processed
        mov     ax,0FF00h + GRAF_BIT_MASK   ; bit mask
        mov     dl,bl                       ; BL = GRAF_DATA
        out16   dx,ax


xcp_exit:
cEnd


;/***************************************************************************
;*
;* PUBLIC ROUTINE  CopyPartialByte 
;*
;* DESCRIPTION   = This function copies the contents of partial from the      
;*                 previous scan onto the corresponding byte of the current   
;*                 scan                                                       
;*                                                                            
;*                 Registers Preserved:         
;*                       BX,CH,DS,ES,GS         
;*                 Registers Destroyed:         
;*                       AX,CL,DX,flags         
;*
;* INPUT         = BX    = (SEQ_DATA shl 8) + GRAF_ADDR     
;*                 CX    = Byte Mask                        
;*                 DX    = EGA_BASE                         
;*                 DS:SI = the 1st visible source DWORD     
;*                 ES:DI = the 1st visible destination byte 
;* OUTPUT        = NONE
;*
;* RETURN-NORMAL = NONE
;* RETURN-ERROR  = NONE
;*
;**************************************************************************/

        assumes ds,nothing
        assumes es,nothing

cProc   CopyPartialByte,<PUBLIC,NEAR,NODATA>,<bp>

cBegin

        mov     al,GRAF_BIT_MASK
        mov     ah,cl                       ; mask out the unaltered bits
        mov     dl,bl
        out16   dx,ax

        mov     al,MM_C3                    ; setup write plane selection
        mov     bp,(RM_C3 shl 8)+GRAF_READ_MAP

cpb_next_plane:
        mov     dl,bh                       ; write plane selection register
        out     dx,al
        mov     dl,bl                       ; read plane selection register
        xchg    ax,bp
        out16   dx,ax
        sub     ah,1
        xchg    ax,bp

;/*
;** This transfer cannot use movsb because we need the byte to be filter out
;** by the bit mask register.
;*/
 
        mov     cl,ds:[si]                  ; the first byte last scan
        xchg    es:[di],cl                  ; write to destination

        shr     al,1                        ; AH = next writing plane
        jnc     cpb_next_plane

cEnd

sEnd    Code

        public  osb_have_extent
        public  osb_test_yshrink
        public  osb_calc_yerr
        public  osb_test_xshrink
        public  osb_calc_xerr
        public  osb_xform_origin
        public  osb_calc_xdda
        public  osb_calc_src_offset
        public  osb_calc_dst_offset
        public  osb_cursor_exclude
        public  osb_set_blt_param
        public  osb_start_blt
        public  osb_stretch_next_scan
        public  osb_stretch_scan_done
        public  osb_stretch_new_seg
        public  osb_stretch_copy_scan
        public  osb_shrink_new_seg
        public  osb_y_shrink
        public  osb_elim_next_scan
        public  osb_elim_this_scan
        public  osb_exit
        public  sa_find_first_src
        public  sa_have_first_src
        public  sa_record_run
        public  da_find_first_src
        public  da_have_first_src
        public  da_record_run
        public  da_dup_src_pel
        public  xocp_xfer_first_byte
        public  xocp_xfer_full_byte
        public  xocp_xfer_last_byte
        public  xms_xfer_first_byte
        public  xms_xfer_full_byte
        public  xms_xfer_last_byte

        end
