;*DDK*************************************************************************/
;
; COPYRIGHT    Copyright (C) 1995 IBM Corporation
;
;    The following IBM OS/2 WARP source code is provided to you solely for
;    the purpose of assisting you in your development of OS/2 WARP device
;    drivers. You may use this code in accordance with the IBM License
;    Agreement provided in the IBM Device Driver Source Kit for OS/2. This
;    Copyright statement may not be removed.;
;*****************************************************************************/
ifdef DCAF
        page    ,132
;-----------------------------Module-Header-----------------------------;
; Module Name:  DCAFCNV.ASM
;
; This file contains the row conversion code.
; planar to packed and packed to planar code.
;
; Routines:
;   convert_row_4pl_4pk
;   convert_row_4pk_4pl
;   convert_row_4pk_8pk
;   convert_row_4pk_16pk
;   convert_row_8pk_16pk
;   convert_row_16pk_8pk
;   convert_row_16pk_4pk
ifdef S3
;   convert_row_4pk_24pk
;   convert_row_8pk_24pk
;   convert_row_24pk_4pk
;   convert_row_24pk_8pk
endif
;   convert_row_8pkint_8pkext
;   convert_row_8pk_4pk
;   convert_row_4pkint_4pkext
;   convert_row_4pkext_4pkint
;
; Created: 1-Oct-1992
; Author:  John Batty, Data Connection Ltd.
;
;
;-----------------------------------------------------------------------;

        .xlist
        include cmacros.inc
        include dcafmac.inc
        .list

        .386p

;-----------------------------------------------------------------------
; Cope with C naming convention.
;-----------------------------------------------------------------------
FullSizeDeviceDefaultPalette equ <_FullSizeDeviceDefaultPalette>
StandardVGADefaultPalette    equ <_StandardVGADefaultPalette>
NearestRestrictedColourIndex equ <_NearestRestrictedColourIndex>


_DATA           segment use32 dword public 'DATA'
                assume  cs:FLAT, ds:FLAT, es:FLAT

;-----------------------------------------------------------------------
; The planar->packed conversion table converts a 4 bit index into a
; 16-bit (2 byte) value which has the same 4 bits spaced one per nibble.

; The words in the table are byte swapped because we want them to
; generate Motorola format data (see explanation within
; convert_row_4pl_4pk for further details).
;-----------------------------------------------------------------------

        public PlanarToPackedConvertTable
PlanarToPackedConvertTable   equ    this word
        ; All entries in this table are byte-swapped!

        dw     0000h            ; 0000b -> 0000 0000 0000 0000b (0000h)
        dw     0100h            ; 0001b -> 0000 0000 0000 0001b (0001h)
        dw     1000h            ; 0010b -> 0000 0000 0001 0000b (0010h)
        dw     1100h            ; 0011b -> 0000 0000 0001 0001b (0011h)
        dw     0001h            ; 0100b -> 0000 0001 0000 0000b (0100h)
        dw     0101h            ; 0101b -> 0000 0001 0000 0001b (0101h)
        dw     1001h            ; 0110b -> 0000 0001 0001 0000b (0110h)
        dw     1101h            ; 0111b -> 0000 0001 0001 0001b (0111h)
        dw     0010h            ; 1000b -> 0001 0000 0000 0000b (1000h)
        dw     0110h            ; 1001b -> 0001 0000 0000 0001b (1001h)
        dw     1010h            ; 1010b -> 0001 0000 0001 0000b (1010h)
        dw     1110h            ; 1011b -> 0001 0000 0001 0001b (1011h)
        dw     0011h            ; 1100b -> 0001 0001 0000 0000b (1100h)
        dw     0111h            ; 1101b -> 0001 0001 0000 0001b (1101h)
        dw     1011h            ; 1110b -> 0001 0001 0001 0000b (1110h)
        dw     1111h            ; 1111b -> 0001 0001 0001 0001b (1111h)



;-----------------------------------------------------------------------
; The packed->planar conversion table converts a 4 bit index into a
; 32-bit (4 byte) value which has the same 4 bits spaced one per byte.
;-----------------------------------------------------------------------

        public PackedToPlanarConvertTable
PackedToPlanarConvertTable   equ    this word

        dd     00000000h     ; Index = 0000b
        dd     00000001h     ; Index = 0001b
        dd     00000100h     ; Index = 0010b
        dd     00000101h     ; Index = 0011b
        dd     00010000h     ; Index = 0100b
        dd     00010001h     ; Index = 0101b
        dd     00010100h     ; Index = 0110b
        dd     00010101h     ; Index = 0111b
        dd     01000000h     ; Index = 1000b
        dd     01000001h     ; Index = 1001b
        dd     01000100h     ; Index = 1010b
        dd     01000101h     ; Index = 1011b
        dd     01010000h     ; Index = 1100b
        dd     01010001h     ; Index = 1101b
        dd     01010100h     ; Index = 1110b
        dd     01010101h     ; Index = 1111b



;-----------------------------------------------------------------------
; Declare conversion table pointers.
; The conversion tables are allocated and calculated as required.
;-----------------------------------------------------------------------
public _pConvertTable_8int_8ext
public _pConvertTable_8_4
public _pConvertTable_4int_4ext
public _pConvertTable_4ext_4int
public _pConvertTable_4_8
public _pConvertTable_4_16
public _pConvertTable_8_16
_pConvertTable_8int_8ext dd 0
_pConvertTable_8_4       dd 0
_pConvertTable_4int_4ext dd 0
_pConvertTable_4ext_4int dd 0
_pConvertTable_4_8       dd 0
_pConvertTable_4_16      dd 0
_pConvertTable_8_16      dd 0

ifdef S3
public  _pConvertTable_4_24
public  _pConvertTable_8_24
public  _pConvertTable_24_4
public  _pConvertTable_24_8
_pConvertTable_4_24      dd 0
_pConvertTable_8_24      dd 0
_pConvertTable_24_4      dd 0
_pConvertTable_24_8      dd 0
endif

;-----------------------------------------------------------------------
; Declare space for a cache that is used when converting from 16bpp.
; The cache is split into two parts (tables):
;   1. The Tag   - this is the source value
;   2. The Value - this is the resulting mapping
;-----------------------------------------------------------------------
ALIGN 4
CONVERT_CACHE_SIZE  EQU  32
public  ConvertCacheTags
public  ConvertCacheValues

ConvertCacheTags        equ this word
REPT    CONVERT_CACHE_SIZE
dd      0
ENDM

ConvertCacheValues      equ this word
REPT    CONVERT_CACHE_SIZE
dd      0
ENDM

public  iCacheIndex
iCacheIndex     dd 0


;-----------------------------------------------------------------------
; Declare external variables.
;-----------------------------------------------------------------------
extrn StandardVGADefaultPalette     :byte
extrn FullSizeDeviceDefaultPalette  :byte
extrn NearestRestrictedColourIndex  :proc

_DATA           ends



;-----------------------------------------------------------------------
; ConvertRGB2To16bpp is a macro that takes a RGB2 value (in eax) and
; returns the corresponding 16-bit color value in ax.
; bx is destroyed.
;
; XGA 16-bit color format is:
;
;      5R   6G   5B
;    RRRRRGGGGGGBBBBB
;
;-----------------------------------------------------------------------
ConvertRGB2To16bpp macro

        shr     ah,2
        shr     ax,3
        mov     bx,ax
        shr     eax,8
        and     ax,0F800h
        or      ax,bx
endm

;-----------------------------------------------------------------------
; Convert16bppToRGB2 is a macro that takes a 16bpp value (in bx) and
; returns the corresponding RGB2 value in eax.
; bx is destroyed.
;
; XGA 16-bit color format is:
;
;      5R   6G   5B
;    RRRRRGGGGGGBBBBB
;
;-----------------------------------------------------------------------
Convert16bppToRGB2 macro

        ; Zero output register
        xor     eax,eax

        ; Extract red component
        mov     ah,bh
        and     ah,0F8h
        shl     eax,8

        ; Extract blue component
        mov     al,bl
        shl     al,3

        ; Extract green component
        rol     bx,5
        mov     ah,bh
        and     ah,0FCh
endm




_TEXT           segment use32 dword public 'CODE'
                assume  cs:FLAT, ds:FLAT, es:FLAT


cProc   InitializeCache,<PUBLIC>

cBegin
        ; Fill the cache tag table with zero entries.
        xor     eax,eax
        mov     ecx,CONVERT_CACHE_SIZE
        mov     edi,offset FLAT:ConvertCacheTags
        rep     stosd

        ; Fill the cache value table with zero entries.
        mov     ecx,CONVERT_CACHE_SIZE
        mov     edi,offset FLAT:ConvertCacheValues
        rep     stosd

        ; Reset the cache index to zero (the first entry).
        mov     iCacheIndex,0
cEnd


cProc   SearchCache,<PUBLIC>

cBegin
        mov     ecx,CONVERT_CACHE_SIZE
        mov     edi,offset FLAT:ConvertCacheTags
        repne   scasd
        je      short found_tag

        mov     eax,-1
        jmp     short exit_search_cache

found_tag:
        ; We found the tag.
        ; Now convert the value in ecx into an index into the cache
        ; and retrieve the required value.
        mov     eax,CONVERT_CACHE_SIZE
        dec     eax
        sub     eax,ecx
        mov     eax,dword ptr ConvertCacheValues[eax*4]

exit_search_cache:

cEnd

cProc   AddValueToCache,<PUBLIC>

cBegin
        mov     edx,iCacheIndex
        mov     dword ptr ConvertCacheTags[edx*4],ebx
        mov     dword ptr ConvertCacheValues[edx*4],eax

        inc     edx
        cmp     edx,CONVERT_CACHE_SIZE
        jl      short not_at_end
        xor     edx,edx
not_at_end:
        mov     iCacheIndex,edx
cEnd



;--------------------------Internal-Routine-----------------------------;
; convert_row_4pl_4pk
;
; This routine converts a row from 4bpp planar format to 4bpp packed
; format.
;
; Entry:
;   esi -> source data
;   edi -> destination buffer
;   ecx =  number of source pels
;   ebx =  byte offset between planes
;
; Returns:
;   esi -> byte after last byte in row
;   edi -> next free byte in destination buffer
;
; Error Returns:
;       None
; Registers Preserved:
;       DS,ES,FS,GS,EBP
; Registers Destroyed:
;       EAX,EBX,ECX,EDX,ESI,EDI,FLAGS
; History:
;
;-----------------------------------------------------------------------;

cProc   convert_row_4pl_4pk,<PUBLIC>

        localD  ulPreviousPlanarData
        localD  ulPreviousPackedData
        localD  cLoopCount
        localD  cbPlaneDelta
        localD  pSrcData

cBegin

        ; Store supplied delta between planes
        mov     cbPlaneDelta,ebx

        ; Store the source data pointer
        mov     pSrcData,esi

ifdef  FIREWALLS
        ; The code assumes that the number of pels is a multiple of 8.
        ; This allows us to manipulate 32 bits at a time.
        ; Check that this is so.
        test    ecx,7
        jz      short @F
        int     3
@@:
endif ; FIREWALLS

        ; We process eight pels on each iteration of the loop.
        ; Therefore divide the loop count by eight.
        shr     ecx,3
        mov     cLoopCount,ecx

        ; When processing pels, we keep a note of the previous
        ; source(planar) and destination(packed) pel values.
        ; This allows us to quickly convert repeated pel values.
        ; Initialise these values here to zero (a planar value of zero
        ; translates to a packed value of zero).
        xor     eax,eax
        mov     ulPreviousPlanarData,eax
        mov     ulPreviousPackedData,eax

;----------------------------------------------------------------------
; Let's just pause to consider the problem...
;
;
; Consider 8 4bpp pels occupying 4 bytes (32 bits).
;
; The first (leftmost) pel is comprised of bits (A0,B0,C0,D0)
; The second pel is comprised of bits (A1,B1,C1,D1)
; ...
; The eighth (last, rightmost) pel is comprised of bits (A7,B7,C7,D7)
;
;
;
; The source (planar) data is arranged with one bit of each of the
; eight pels in each byte, with the bytes separated by a fixed
; pitch (the length of the row - which we have in ebx).
; The plane order goes from least to most significant.
;
; Address     [esi]     ...  [esi+ebx] ...  [esi+ebx*2]...  [esi+ebx*3]
;             DDDDDDDD  ...  CCCCCCCC  ...  BBBBBBBB   ...  AAAAAAAA
;             01234567  ...  01234567  ...  01234567   ...  01234567
;
;
; We are going to convert this data to packed format, which is arranged
; with two whole pels per byte:
;
; Address     [edi]          [edi+1]        [edi+2]        [edi+3]
;             ABCDABCD       ABCDABCD       ABCDABCD       ABCDABCD
; Pixel       00001111       22223333       44445555       66667777
;
;
; In both cases the data is in Motorola format, which means that the
; left-most pels occupy the most-significant bits within each byte.
;
; The packed data above stored within a 32-bit Intel register (e.g. eax)
; looks like this:
;
;     <-----------------eax---------------->
;                         <-------ax------->
;                                   <--al-->
;     ABCDABCD  ABCDABCD  ABCDABCD  ABCDABCD
;     66667777  44445555  22223333  00001111
;
; This is what we are aiming for!
;
; We do the conversion using a 16-entry lookup table, which takes four
; bits of planar data and expands them to 16-bits, spaced at 4-bit
; intervals (which, as can be seen in the diagram above, is how they are
; spaced in packed format).
;
; We repeat this lookup for each group of 4 bits in the source, and
; merge the results using shift and or operations.
;
;
; OK, enough explanation... lets do it!
;----------------------------------------------------------------------

loop_4pl_4pk:

        ; Fetch current source pointer
        mov     esi,pSrcData

        ; Get plane delta into ebx
        mov     ebx,cbPlaneDelta

        ; Get Plane D
        mov     ah,[esi]

        ; Get Plane C
        mov     al,[esi+ebx]
        rol     eax,16

        ; Get Plane B
        mov     ah,[esi+ebx*2]

        ; Get Plane A
        add     esi,ebx
        mov     al,[esi+ebx*2]

        ; Update stored source pointer to point to the next
        ; source byte.
        inc     pSrcData

        ;---------------------------------------------------------------
        ; Now we have:
        ; 
        ;  <----------------eax----------------->
        ;                      <-------ax------->
        ;                      <--ah-->  <--al-->
        ;  DDDDDDDD  CCCCCCCC  BBBBBBBB  AAAAAAAA
        ;  01234567  01234567  01234567  01234567
        ; 
        ;---------------------------------------------------------------

        ; See if these are the same pels as we previously converted
        cmp     eax,ulPreviousPlanarData
        jne     short no_fast_path

        ; Hey! This is the same source data as the previous 32-bits.
        ; Just pick up the last converted data and exit quickly!
        mov     eax,ulPreviousPackedData
        jmp     short got_packed_data

no_fast_path:
        ; Store the source data
        mov     ulPreviousPlanarData,eax

        ;---------------------------------------------------------------
        ; In the next bit of code, the following registers are used:
        ; 
        ;   eax - accumulates the 32 bits of packed data
        ;   ebx - a work register, containing parts of the planar data
        ;   ecx - keeps the original 32 bits of planar data
        ;   edx - indexes into the planar->packed conversion table
        ;   esi - points to the start of the planar->packed conversion
        ;         table
        ;---------------------------------------------------------------

        ; We will convert bits 4,5,6,7 of all pels first.
        ; Copy the original source data into ecx.
        mov     ecx,eax

        ; Mask off bits 4,5,6,7 of all pels
        mov     ebx,ecx         ; Get planar data
        and     ebx,0F0F0F0Fh   ; Mask off lower nibbles of all bytes

        ;---------------------------------------------------------------
        ; Now we have:
        ; 
        ;  <----------------ebx----------------->
        ;                      <-------bx------->
        ;                      <--bh-->  <--bl-->
        ;  ....DDDD  ....CCCC  ....BBBB  ....AAAA
        ;  ....4567  ....4567  ....4567  ....4567
        ; 
        ; ( . = zeroed bit)
        ;---------------------------------------------------------------

        ; Point esi at the conversion table
        lea     esi,PlanarToPackedConvertTable

        ; Clear all bits of edx (we use only bottom 8 bits for index).
        xor     edx,edx

        ; We want to use the values in ebx as indices into a table of
        ; words. Multiply them all by two now.
        ; (Cool! This processor can do four multiplies in one instruction!)
        shl     ebx,1

        ; Now convert, and accumulate the results into eax.

        ; Expand A4-A7
        mov     dl,bl                   ; Get Plane A, bits 4-7
        mov     ax,word ptr [esi+edx]   ; Expand bits
        shl     eax,1                   ; Shift destination by 1 bit

        ; Expand B4-B7
        mov     dl,bh                   ; Get Plane B, bits 4-7
        or      ax,word ptr [esi+edx]   ; Expand+merge bits
        shl     eax,1                   ; Shift destination by 1 bit

        ; Expand C4-C7
        shr     ebx,16                  ; Move Planes C+D into bl,bh
        mov     dl,bl                   ; Get Plane C, bits 4-7
        or      ax,word ptr [esi+edx]   ; Expand+merge bits
        shl     eax,1                   ; Shift destination by 1 bit

        ; Expand D4-D7
        mov     dl,bh                   ; Get Plane D, bits 4-7
        or      ax,word ptr [esi+edx]   ; Expand+merge bits

        ;---------------------------------------------------------------
        ; We have now processed bits 4,5,6,7 of all pels.
        ; Shift them up into their final positions in the upper
        ; 16 bits of eax.
        ;---------------------------------------------------------------
        shl     eax,16

        ;---------------------------------------------------------------
        ; Now process bits 0,1,2,3
        ;---------------------------------------------------------------
        mov     ebx,ecx         ; Get original data into ebx

        ; Mask off bits 0,1,2,3 of all pels
        and     ebx,0F0F0F0F0h

        ; Move bits into lower nibble (by shifting right 4 bits).
        ; However, we will want to use the bits as an index
        ; into a table of words (as we did above), which we do by
        ; shifting left by 1 bit.
        ; We can combine these operations and just shift right by 3 bits.
        ; (Ultra-Cool! This processor can do four shifts and four
        ; multiplies in one instruction!)
        shr     ebx,3

        ; Now repeat the same expansion process...

        ; Expand A0-A3
        mov     dl,bl                   ; Get Plane A, bits 0-3
        or      ax,word ptr [esi+edx]   ; Expand bits
        shl     ax,1                    ; Shift destination by 1 bit

        ; Expand B0-B3
        mov     dl,bh                   ; Get Plane B, bits 0-3
        or      ax,word ptr [esi+edx]   ; Expand+merge bits
        shl     ax,1                    ; Shift destination by 1 bit

        ; Expand C0-C3
        shr     ebx,16                  ; Move Planes C+D into bl,bh
        mov     dl,bl                   ; Get Plane C, bits 0-3
        or      ax,word ptr [esi+edx]   ; Expand+merge bits
        shl     ax,1                    ; Shift destination by 1 bit

        ; Expand D0-D3
        mov     dl,bh                   ; Get Plane D, bits 0-3
        or      ax,word ptr [esi+edx]   ; Expand+merge bits

        ; Save the 32-bit packed value in our single-entry cache
        mov     ulPreviousPackedData,eax

got_packed_data:
        ; We have converted all eight pels!
        ; Write the packed data to the destination, and update the
        ; destination pointer (edi).
        stosd

        dec     cLoopCount
        jnz     loop_4pl_4pk

        ; Load esi with source pointer before return.
        mov     esi,pSrcData

cEnd



;--------------------------Internal-Routine-----------------------------;
; convert_row_4pk_4pl
;
; This routine converts a row from 4bpp packed format to 4bpp planar
; format.
;
; Entry:
;   esi -> source data
;   edi -> destination buffer
;   ecx =  number of source pels
;   ebx =  byte offset between planes
;
; Returns:
;       None
; Error Returns:
;       None
; Registers Preserved:
;       DS,ES,FS,GS,EBP
; Registers Destroyed:
;       EAX,EBX,ECX,EDX,ESI,EDI,FLAGS
; History:
;
;-----------------------------------------------------------------------;


cProc   convert_row_4pk_4pl,<PUBLIC>

        localD  ulPreviousPlanarData
        localD  ulPreviousPackedData
        localD  cLoopCount
        localD  cbPlaneDelta

cBegin

        ; Store supplied delta between planes
        mov     cbPlaneDelta,ebx

ifdef FIREWALLS
        ; The code assumes that the number of pels is a multiple of 8.
        ; This allows us to manipulate 32 bits at a time.
        ; Check that this is so.
        test    ecx,7
        jz      short @F
        int     3
@@:
endif ; FIREWALLS

        ; We process eight pels on each iteration of the loop.
        ; Therefore divide the loop count by eight.
        shr     ecx,3
        mov     cLoopCount,ecx


        ; When processing pels, we keep a note of the previous
        ; source(packed) and destination(planar) pel values.
        ; This allows us to quickly convert repeated pel values.
        ; Initialise these values here to zero (a packed value of zero
        ; translates to a planar value of zero).
        xor     eax,eax
        mov     ulPreviousPackedData,eax
        mov     ulPreviousPlanarData,eax

;----------------------------------------------------------------------
; Let's just pause to consider the problem...
;
;
; Consider 8 4bpp pels, occupying 4 bytes (32 bits).
;
; The first (leftmost) pel is comprised of bits (A0,B0,C0,D0)
; The second pel is comprised of bits (A1,B1,C1,D1)
; ...
; The eighth (last, rightmost) pel is comprised of bits (A7,B7,C7,D7)
;
;
;
; The source (packed) data is arranged with two whole pels per byte:
;
; Address     [esi]          [esi+1]        [esi+2]        [esi+3]
;             ABCDABCD       ABCDABCD       ABCDABCD       ABCDABCD
; Pixel       00001111       22223333       44445555       66667777
;
;
; We are going to convert this data to planar format, which is arranged
; with one bit of each of the eight pels in each byte, with the bytes
; separated by a fixed pitch (the length of the row - which we have in
; ebx).
;
; Address     [edi]     ...  [edi+ebx] ...  [edi+ebx*2]...  [edi+ebx*3]
;             DDDDDDDD  ...  CCCCCCCC  ...  BBBBBBBB   ...  AAAAAAAA
;             01234567  ...  01234567  ...  01234567   ...  01234567
;
;
;
; In both cases the data is in Motorola format, which means that the
; left-most pels occupy the most-significant bits within each byte.
;
; The planar data above stored as bytes within a 32-bit Intel register
; (e.g. eax) looks like this:
;
;     <----------------eax----------------->
;                         <-------ax------->
;                         <--ah-->  <--al-->
;     AAAAAAAA  BBBBBBBB  CCCCCCCC  DDDDDDDD
;     01234567  01234567  01234567  01234567
;
; This is what we are aiming for!
;
; We do the conversion using a 16-entry lookup table, which when
; indexed with a packed source pel value gives a 32-bit value
; that contains each of the bits spaced at 8-bit (byte) intervals.
;
;
; We repeat this lookup for eight packed source pels, and
; merge the results using shift and or operations.
;
;
; OK, enough explanation... lets do it!
;----------------------------------------------------------------------

loop_4pk_4pl:

        ; Read eight packed 4bpp pels into eax.
        lodsd

        ;---------------------------------------------------------------
        ; The source data is in Motorola format, and we have read it
        ; into an Intel register, which gives us:
        ; 
        ;   <-----------------eax---------------->
        ;                       <-------ax------->
        ;                                 <--al-->
        ;   ABCDABCD  ABCDABCD  ABCDABCD  ABCDABCD
        ;   66667777  44445555  22223333  00001111
        ; 
        ;---------------------------------------------------------------

        ; See if these are the same pels as we previously converted
        cmp     eax,ulPreviousPackedData
        jne     short no_4pk_4pl_fast_path

        ; Hey! This is the same source data as the previous 32-bits.
        ; Just pick up the last converted data and exit quickly!
        mov     eax,ulPreviousPlanarData
        jmp     short got_planar_data

no_4pk_4pl_fast_path:
        ; Store the source data
        mov     ulPreviousPackedData,eax

        ;---------------------------------------------------------------
        ; In the next bit of code, the following registers are used:
        ;   eax - accumulates the 32 bits of planar data
        ;   ebx - a work register, containing parts of the planar data
        ;   ecx - keeps the original 32 bits of packed data
        ;   edx - indexes into the packed->planar conversion table
        ;   esi - points to the start of the packed->planar conversion
        ;         table
        ;---------------------------------------------------------------

        ; We will convert the odd pels, then the even pels.
        ; This saves on the amount of masking (ANDing) that we have to do.
        ; Copy the original source data into ecx.
        mov     ecx,eax

        ; Extract pels 1,3,5,7
        mov     ebx,ecx         ; Get planar data
        and     ebx,0F0F0F0Fh   ; Mask off lower pels of all bytes

        ;---------------------------------------------------------------
        ; Now we have:
        ; 
        ;  <----------------ebx----------------->
        ;                      <-------bx------->
        ;                      <--bh-->  <--bl-->
        ;  ....ABCD  ....ABCD  ....ABCD  ....ABCD
        ;  ....7777  ....5555  ....3333  ....1111
        ; 
        ; ( . = zeroed bit)
        ;---------------------------------------------------------------

        ; Point esi at the conversion table
        push    esi                             ; Save current esi
        lea     esi,PackedToPlanarConvertTable

        ; Clear all bits of edx (we use only bottom 8 bits for index).
        xor     edx,edx

        ; We want to use the values in ebx as indices into a table of
        ; dwords. Multiply them all by four now.
        ; (Cool! This processor can do four multiplies in one instruction!)
        shl     ebx,2

        ; Now convert, and accumulate the results into eax.

        ; Expand Pel 1
        mov     dl,bl                   ; Get Pel 1
        mov     eax,dword ptr [esi+edx] ; Expand bits
        rol     eax,2                   ; Rotate destination by 2 bits

        ; Expand Pel 3
        mov     dl,bh                   ; Get Pel 3
        or      eax,dword ptr [esi+edx] ; Expand+merge bits
        rol     eax,2                   ; Rotate destination by 2 bits

        ; Expand Pel 5
        shr     ebx,16                  ; Move Pels 5+7 into bl,bh
        mov     dl,bl                   ; Get Pel 5
        or      eax,dword ptr [esi+edx] ; Expand+merge bits
        rol     eax,2                   ; Rotate destination by 2 bits

        ; Expand Pel 7
        mov     dl,bh                   ; Get Pel 7
        or      eax,dword ptr [esi+edx] ; Expand+merge bits

        ;---------------------------------------------------------------
        ; We have now processed Pels 1,3,5,7 into eax.
        ; Now process Pels 0,2,4,6.
        ;---------------------------------------------------------------
        mov     ebx,ecx         ; Get original data into ebx

        ; Mask off Pels 1,3,5,7.
        and     ebx,0F0F0F0F0h

        ; Move bits into lower nibble of each byte (by shifting right 4 bits).
        ; However, we will want to use the bits as an index
        ; into a table of dwords (as we did above), which we do by
        ; shifting left by 2 bits.
        ; We can combine these operations and just shift right by 2 bits.
        ; (Ultra-Cool! This processor can do four shifts and four
        ; multiplies in one instruction!)
        shr     ebx,2

        ; We currently have the data in eax such that bit D7 is in
        ; the lsb position. Rotate the register so that bit D0 is
        ; in the lsb position.
        ror     eax,7

        ; Now repeat the same expansion process for Pels 0,2,4,6.

        ; Expand Pel 0
        mov     dl,bl                   ; Get Pel 0
        or      eax,dword ptr [esi+edx] ; Expand bits
        rol     eax,2                   ; Rotate destination by 2 bits

        ; Expand Pel 2
        mov     dl,bh                   ; Get Pel 2
        or      eax,dword ptr [esi+edx] ; Expand+merge bits
        rol     eax,2                   ; Rotate destination by 2 bits

        ; Expand Pel 4
        shr     ebx,16                  ; Move Pels 4+6 into bl,bh
        mov     dl,bl                   ; Get Pel 4
        or      eax,dword ptr [esi+edx] ; Expand+merge bits
        rol     eax,2                   ; Rotate destination by 2 bit

        ; Expand Pel 6
        mov     dl,bh                   ; Get Pel 6
        or      eax,dword ptr [esi+edx] ; Expand+merge bits

        ; We have now expanded all pels into eax.
        ; Rotate eax so that the bits are in the correct positions
        ; (i.e. so D7 is in least significant bit - see earlier diagram).
        rol     eax,1

        ; Restore source pointer
        pop     esi


        ; Save the 32-bit packed value in our single-entry cache
        mov     ulPreviousPlanarData,eax

got_planar_data:
        ; We have converted all eight pels!
        ; Write the planar data to the destination, and update the
        ; destination pointer (edi).

        ; Get plane increment into ebx
        mov     ebx,cbPlaneDelta

        ; Save current destination pointer
        push    edi

        ; Store plane D
        mov     [edi],al

        ; Store plane C
        mov     [edi+ebx],ah

        ; Store plane B
        shr     eax,16          ; Planes A+B, come on down!
        mov     [edi+ebx*2],al

        ; Store plane A
        add     edi,ebx         ; We need [edi+ebx*3]
        mov     [edi+ebx*2],ah

        ; Restore and update destination pointer
        pop     edi
        inc     edi

        dec     cLoopCount
        jnz     loop_4pk_4pl
cEnd


;-----------------------------------------------------------------------
; Function:
;   convert_row_4pk_8pk
;
; Description:
;   Converts a rectangle row from 4bpp packed format to 8bpp packed
;   format
;
; Input values:
;
;       _pConvertTable_4_8
;              - Global variable that points to a conversion table of
;                16 bytes that convert the 4bpp values to 8bpp values.
;
;       esi    - Points to source (4bpp) data.
;
;       edi    - Points to destination buffer (where 8bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 8.
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;-----------------------------------------------------------------------
cProc   convert_row_4pk_8pk,<PUBLIC>

cBegin
        ; Load up the conversion table pointer into ebx.
        ; This is a table of 16 bytes, mapping 4bpp to 8bpp palette
        ; indices.
        mov     ebx, _pConvertTable_4_8

ifdef FIREWALLS
        ; Check that the convert table has been created
        or      ebx,ebx
        jnz     short @F
        int     3
@@:
endif ;FIREWALLS

        ; At 4bpp, source data is always aligned on 8-pel boundaries
        ; Therefore source data will always be dword aligned, and
        ; there will be a whole number of dwords in the row.
        ; We can therefore safely process 8 pels at a time.

ifdef   FIREWALLS
        ; Check to make sure!
        test    ecx,7
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS

        ; Divide pel count by 8
        shr     ecx,3

convert_4pk_8pk_loop:
        ; Read eight source pels
        lodsd

        ;--------------------------------------------------------------
        ; The data is in Motorola format. i.e.
        ; 
        ; Byte        0         1         2         3
        ; Bit     7654 3210 7654 3210 7654 3210 7654 3210
        ; Pel      P1   P2   P3   P4   P5   P6   P7   P8
        ; 
        ; where Byte 0 is the lowest address.
        ; 
        ; Reading this into eax (Intel format), it looks like this:
        ; 
        ; 
        ;   <--------------eax---------------->
        ;                     <------ax------->
        ;                     <--ah--> <--al-->
        ;   33222222 22221111 11111100 00000000
        ;   10987654 32109876 54321098 76543210
        ; 
        ;    P7  P8   P5  P6   P3  P4   P1  P2
        ; 
        ; We must process the pels in the right order! (P1,P2..P8)
        ;---------------------------------------------------------------

        ; Save value
        mov     edx,eax

        ; Convert P1
        shr     al,4    ; Put P1 in bits 0..3
        xlatb           ; Translate
        stosb           ; Store

        ; Convert P2
        mov     al,dl   ; Put P2 in bits 0..3
        and     al,0Fh  ; Mask out P1
        xlatb           ; Translate
        stosb           ; Store

        ; Convert P3
        mov     al,dh   ; Get P3+P4
        shr     al,4    ; Put P3 in bits 0..3
        xlatb           ; Translate
        stosb           ; Store

        ; Convert P4
        mov     al,dh   ; Put P4 in bits 0..3
        and     al,0Fh  ; Mask out P3
        xlatb           ; Translate
        stosb           ; Store

        ; Shift edx right so we can easily access the next four pels
        shr     edx,16

        ; Convert P5
        mov     al,dl   ; Get P5 in bits 4..7
        shr     al,4    ; Put P5 in bits 0..3
        xlatb           ; Translate
        stosb           ; Store

        ; Convert P6
        mov     al,dl   ; Get P6 in bits 0..3
        and     al,0Fh  ; Mask out P5
        xlatb           ; Translate
        stosb           ; Store

        ; Convert P7
        mov     al,dh   ; Get P7 in bits 4..7
        shr     al,4    ; Put P7 in bits 0..3
        xlatb           ; Translate
        stosb           ; Store

        ; Convert P8
        mov     al,dh   ; Get P8 in bits 0..3
        and     al,0Fh  ; Mask out P7
        xlatb           ; Translate
        stosb           ; Store

        ; Update the count and loop back if there are more pels.
        loop    short convert_4pk_8pk_loop

cEnd

;-----------------------------------------------------------------------
; Function:
;   convert_row_4pk_16pk
;
; Description:
;   Converts a rectangle row from 4bpp packed format to 16bpp packed format
;
; Input values:
;
;       _pConvertTable_4_16
;              - Global variable that points to a conversion table of
;                16 words that convert the 4bpp values to 16bpp values.
;
;       esi    - Points to source (4bpp) data.
;
;       edi    - Points to destination buffer (where 16bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 8.
;
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;-----------------------------------------------------------------------
cProc   convert_row_4pk_16pk,<PUBLIC>

cBegin
        ; Load up the conversion table pointer into ebx.
        ; This is a table of 16 words, mapping 4bpp palette to 16bpp
        ; Direct Color indices.
        mov     ebx, _pConvertTable_4_16

ifdef FIREWALLS
        ; Check that the convert table has been created
        or      ebx,ebx
        jnz     short @F
        int     3
@@:
endif ;FIREWALLS

        ; At 4bpp, source data is always aligned on 8-pel boundaries
        ; Therefore source data will always be dword aligned, and
        ; there will be a whole number of dwords in the row.
        ; We can therefore safely process 8 pels at a time.

ifdef   FIREWALLS
        ; Check to make sure!
        test    ecx,7
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS

        ; Divide pel count by 8
        shr     ecx,3

convert_4pk_16pk_loop:
        ; Read four source pels
        lodsd

        ;--------------------------------------------------------------
        ; The data is in Motorola format. i.e.
        ; 
        ; Byte        0         1         2         3
        ; Bit     7654 3210 7654 3210 7654 3210 7654 3210
        ; Pel      P1   P2   P3   P4   P5   P6   P7   P8
        ; 
        ; where Byte 0 is the lowest address.
        ; 
        ; Reading this into eax (Intel format), it looks like this:
        ; 
        ; 
        ;   <--------------eax---------------->
        ;                     <------ax------->
        ;                     <--ah--> <--al-->
        ;   33222222 22221111 11111100 00000000
        ;   10987654 32109876 54321098 76543210
        ; 
        ;    P7  P8   P5  P6   P3  P4   P1  P2
        ; 
        ; We must process the pels in the right order! (P1,P2..P8)
        ;---------------------------------------------------------------

        ; Save value
        mov     edx,eax

        ; Convert P1
        shr     al,4                      ; Put P1 in bits 0..3
        movzx   eax,al                    ; Zero upper bits of eax
        mov     ax, word ptr [ebx+eax*2]  ; Translate
        stosw                             ; Store

        ; Convert P2
        movzx   eax,dl                    ; Put P2 in bits 0..3
        and     al,0Fh                    ; Mask out P1
        mov     ax, word ptr [ebx+eax*2]  ; Translate
        stosw                             ; Store

        ; Convert P3
        movzx   eax,dh                    ; Get P3+P4
        shr     al,4                      ; Put P3 in bits 0..3
        mov     ax, word ptr [ebx+eax*2]  ; Translate
        stosw                             ; Store

        ; Convert P4
        movzx   eax,dh                    ; Put P4 in bits 0..3
        and     al,0Fh                    ; Mask out P3
        mov     ax, word ptr [ebx+eax*2]  ; Translate
        stosw                             ; Store

        ; Shift edx right so we can easily access the next four pels
        shr     edx,16

        ; Convert P5
        movzx   eax,dl                    ; Get P5 in bits 4..7
        shr     al,4                      ; Put P5 in bits 0..3
        mov     ax, word ptr [ebx+eax*2]  ; Translate
        stosw                             ; Store

        ; Convert P6
        movzx   eax,dl                    ; Get P6 in bits 0..3
        and     al,0Fh                    ; Mask out P5
        mov     ax, word ptr [ebx+eax*2]  ; Translate
        stosw                             ; Store

        ; Convert P7
        movzx   eax,dh                    ; Get P7 in bits 4..7
        shr     al,4                      ; Put P7 in bits 0..3
        mov     ax, word ptr [ebx+eax*2]  ; Translate
        stosw                             ; Store

        ; Convert P8
        movzx   eax,dh                    ; Get P8 in bits 0..3
        and     al,0Fh                    ; Mask out P7
        mov     ax, word ptr [ebx+eax*2]  ; Translate
        stosw                             ; Store

        ; Update the count and loop back if there are more pels.
;       dec     ecx
        loop    convert_4pk_16pk_loop

cEnd


;----------------------------------------------------------------------;
;
; Function: convert_row_8pk_16pk
;       Converts a row of 8bpp data to 16bpp.
;
; Input values:
;
;       _pConvertTable_8_16
;              - Global variable that points to a conversion table of
;                256 words that convert the 8bpp values to 16bpp values.
;
;       esi    - Points to source (8bpp) data.
;
;       edi    - Points to destination buffer (where 16bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 2.
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;

cProc   convert_row_8pk_16pk,<PUBLIC>
cBegin
        ; Make ebx point to the conversion table, which is
        ; a table of 256 words which map from 8bpp to 16bpp.
        mov     ebx, _pConvertTable_8_16

        ; We process pairs of pels at a time, so halve the pel count to
        ; give a loop count.
ifdef FIREWALLS
        ; Check that we have an even number of pels
        test    ecx,1
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS
        shr     ecx,1

        ;---------------------------------------------------------------
        ; We convert the pels into 16bpp Motorola format.
        ;---------------------------------------------------------------
more_8_16_pels:
        ; Read two source pixels.
        ; PEL1 will be in al, the PEL2 in ah
        lodsw

        ; Save value in edx
        mov     edx,eax

        ; Convert PEL1 to a 16bpp index
        movzx   eax,al
        mov     ax, word ptr [ebx+eax*2]
        stosw

        ; Convert PEL2 to a 16bpp index
        movzx   eax,dh
        mov     ax, word ptr [ebx+eax*2]
        stosw

        loop    more_8_16_pels

cEnd

;----------------------------------------------------------------------;
;
; Function: convert_row_16pk_8pk
;       Converts a row of 16bpp data to 8bpp.
;
; Input values:
;
;       esi    - Points to source (16bpp) data.
;
;       edi    - Points to destination buffer (where 8bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;
cProc   convert_row_16pk_8pk,<PUBLIC>
localD  ulPel1
localD  ulPel2
localD  cLoopCount
localD  pSrc
localD  pDest
localD  ulSavedSrcPel

cBegin
        ; Store the source pointer
        mov     pSrc,esi

        ; Store destination pointer
        mov     pDest,edi

        ; Store loop count
        mov     cLoopCount,ecx

more_16pk_8pk_pels:
        mov     esi,pSrc
        lodsw
        mov     pSrc,esi

        ; Save the source pel value
        mov     ulSavedSrcPel,eax

        ; Check to see if the pel is in the cache
        cCall   SearchCache
        cmp     eax,-1
        jne     short store_dst_8bpp_pel

must_calculate_16bpp_8bpp:
        ; We now have a 16bpp source pel in eax (ax).
        ; Is was in Motorola format, but we did an Intel read.
        ; Therefore we have to swap the bytes to get the data back
        ; into Motorola format.
        mov     eax,ulSavedSrcPel
        xchg    al,ah

        ; Convert 16bpp value into a 24-bit RGB value.
        mov     ebx,eax
        Convert16bppToRGB2

        ; eax now contains a 24-bit RGB value.
        ; Now we need to find the nearest color in the 8bpp palette

        push    256     ; Length of 8bpp palette
        push    eax     ; RGB we want to match
        push    4       ; Sizeof(RGB2)
        push    offset FLAT:FullSizeDeviceDefaultPalette
        call    NearestRestrictedColourIndex
        add     esp,16  ; Remove parameters

        ; The 8bpp index is in eax.
        ; Store it in the cache
        mov     ebx,ulSavedSrcPel
        cCall   AddValueToCache

store_dst_8bpp_pel:
        ; Store the resulting value
        mov     edi,pDest
        stosb
        mov     pDest,edi

        ; Update the loop count and jump back if more pels to
        ; process.
        dec     cLoopCount
        jnz     more_16pk_8pk_pels

        mov     esi,pSrc
cEnd


;----------------------------------------------------------------------;
;
; Function: convert_row_16pk_4pk
;       Converts a row of 16bpp data to 4bpp.
;
; Input values:
;
;       esi    - Points to source (16bpp) data.
;
;       edi    - Points to destination buffer (where 4bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 2.
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;
cProc   convert_row_16pk_4pk,<PUBLIC>
localD  ulSrcPel1
localD  ulSrcPel2
localD  ulDstPel1
localD  cLoopCount
localD  pSrc
localD  pDest
localD  ulSavedSrcPels

cBegin
        ; We process two pels at a time, so halve the pel count
        ; to get a loop count.
ifdef FIREWALLS
        ; Check that we have an even number of pels
        test    ecx,1
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS
        shr     ecx,1
        mov     cLoopCount,ecx

        ; Store the source pointer
        mov     pSrc,esi

        ; Store destination pointer
        mov     pDest,edi

more_16pk_4pk_pels:
        mov     esi,pSrc
        lodsd
        mov     pSrc,esi

        ; Save the source pel values
        mov     ulSavedSrcPels,eax


must_calculate_16bpp_4bpp:
        ; We now have a pair of 16bpp source pels in eax.
        ; They were in Motorola format, but we did an Intel read.
        ; Therefore we have to swap the bytes to get the data back
        ; into Motorola format.
        xchg    al,ah

        ; Check to see if the pel is in the cache
        movzx   eax,ax          ; Zero upper bits
        mov     ulSrcPel1,eax
        cCall   SearchCache
        cmp     eax,-1
        jne     short got_4bpp_pel1

        ; Convert 16bpp value into a 24-bit RGB value.
        mov     ebx,ulSrcPel1
        Convert16bppToRGB2

        ; eax now contains a 24-bit RGB value.
        ; Now we need to find the nearest color in the 4bpp palette

        push    16      ; Length of VGA palette
        push    eax     ; RGB we want to match
        push    4       ; Sizeof(RGB2)
        push    offset FLAT:StandardVGADefaultPalette
        call    NearestRestrictedColourIndex
        add     esp,16  ; Remove parameters

        ; Store the newly calculated value in the cache
        mov     ebx,ulSrcPel1
        cCall   AddValueToCache

got_4bpp_pel1:
        ; The 4bpp index is in eax.
        ; Shift it to the correct position and save it on the stack.
        shl     eax,4
        mov     ulDstPel1,eax

        ; Now do the same thing for the second pel...

        ; Read second 16bpp source pel. This is in Motorola format, but
        ; we are doing an Intel read. Therefore we have to swap the
        ; bytes to get the data back into Motorola format.
        mov     eax,ulSavedSrcPels
        shr     eax,16
        xchg    al,ah

        ; Check to see if the pel is in the cache
        mov     ulSrcPel2,eax
        cCall   SearchCache
        cmp     eax,-1
        jne     short got_4bpp_pel2

        ; Convert 16bpp value into a 24-bit RGB value.
        mov     ebx,ulSrcPel2
        Convert16bppToRGB2

        ; eax now contains a 24-bit RGB value.
        ; Now we need to find the nearest color in the 4bpp palette

        push    16      ; Length of VGA palette
        push    eax     ; RGB we want to match
        push    4       ; Sizeof(RGB2)
        push    offset FLAT:StandardVGADefaultPalette
        call    NearestRestrictedColourIndex
        add     esp,16  ; Remove parameters

        ; Store the newly calculated value in the cache
        mov     ebx,ulSrcPel2
        cCall   AddValueToCache

got_4bpp_pel2:
        ; The 4bpp index is in eax.
        ; Merge it with the previous pel
        or      eax,ulDstPel1

store_dst_4bpp_pels:
        ; Store the resulting value
        mov     edi,pDest
        stosb
        mov     pDest,edi

        ; Update the loop count and jump back if more pels to
        ; process.
        dec     cLoopCount
        jnz     more_16pk_4pk_pels

        mov     esi,pSrc
cEnd

;----------------------------------------------------------------------;
;
; Function: convert_row_8pkint_8pkext
;       Converts a row of 8bpp data from internal to external format.
;
; Input values:
;       _pConvertTable_8int_8ext
;              - Global variable that points to a conversion
;                table of 256 bytes that convert the 8bpp
;                values from internal to external format.
;
;       esi    - Points to source (internal 8bpp) data.
;
;       edi    - Points to destination buffer (where external 8bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;

cProc   convert_row_8pkint_8pkext,<PUBLIC>
cBegin
        ; Make ebx point to the conversion table, which is
        ; a table of 256 bytes which map from internal 8bpp to
        ; external 8bpp.
        mov     ebx, _pConvertTable_8int_8ext

ifdef FIREWALLS
        ; Check that the convert table has been created
        or      ebx,ebx
        jnz     short @F
        int     3
@@:
endif ;FIREWALLS

more_8int_8ext_pels:
        lodsb
        xlatb
        stosb
        loop    more_8int_8ext_pels

cEnd

;----------------------------------------------------------------------;
;
; Function: convert_row_8pk_4pk
;       Converts a row of 8bpp data to 4bpp.
;
; Input values:
;       _pConvertTable_8_4
;              - Global variable that points to a conversion
;                table of 256 bytes that convert the 8bpp
;                values to 4bpp values.
;
;       esi    - Points to source (8bpp) data.
;
;       edi    - Points to destination buffer (where 4bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 2.
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;

cProc   convert_row_8pk_4pk,<PUBLIC>
cBegin
        ; Make ebx point to the conversion table, which is
        ; a table of 256 bytes which map from 8bpp to 4bpp.
        mov     ebx, _pConvertTable_8_4

ifdef FIREWALLS
        ; Check that the convert table has been created
        or      ebx,ebx
        jnz     short @F
        int     3
@@:
endif ;FIREWALLS

        ; We process pairs of pels at a time, so halve the pel count to
        ; give a loop count.
ifdef FIREWALLS
        ; Check that we have an even number of pels
        test    ecx,1
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS
        shr     ecx,1

        ;---------------------------------------------------------------
        ; We convert the pels into 4bpp motorola format.
        ; This means that when converting a pair of pels (PEL1,PEL2):
        ;  - PEL1 goes in bits 7..4
        ;  - PEL2 goes in bits 3..0
        ;---------------------------------------------------------------
more_8_4_pels:
        ; Read two source pixels.
        ; PEL1 will be in al, PEL2 in ah
        lodsw

        ; Convert PEL1 to a 4bpp index
        xlatb

        ; Move PEL1(now 4bpp) from bits 3..0 of al to bits 7..4 of ah
        ; Also bring PEL2 (8bpp) into al.
        xchg    al,ah
        shl     ah,4

        ; Convert PEL2 to a 4bpp index
        xlatb

        ; Merge in the first pel
        or      al,ah

        ; Store the new pair in the destination
        stosb

        loop    more_8_4_pels

cEnd


;----------------------------------------------------------------------;
;
; Function: convert_row_4pkint_4pkext
;       Converts a row of 4bpp data from internal to external format.
;
; Input values:
;       _pConvertTable_4int_4ext
;              - Global variable that points to a conversion table of
;                256 bytes that converts pairs of 4bpp internal values
;                to 4bpp external values.
;
;       esi    - Points to source (4bpp internal) data.
;
;       edi    - Points to destination buffer (where 4bpp external data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 8.
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;

cProc   convert_row_4pkint_4pkext,<PUBLIC>
cBegin
        ; Make ebx point to the conversion table, which is
        ; a table of 256 bytes which give the correct mapping.
        mov     ebx, _pConvertTable_4int_4ext

ifdef FIREWALLS
        ; Check that the convert table has been created
        or      ebx,ebx
        jnz     short @F
        int     3
@@:
endif ;FIREWALLS

        ; We process pels eight at a time, so adjust the pel count to
        ; give a loop count.
ifdef FIREWALLS
        ; Check that we have a valid number of pels
        test    ecx,7
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS
        shr     ecx,3

        ;---------------------------------------------------------------
        ; We convert the pels into 4bpp Motorola format.
        ; This means that when converting a pair of pels (PEL1,PEL2):
        ; 
        ;  - PEL1 goes in bits 7..4
        ;  - PEL2 goes in bits 3..0
        ; 
        ; The lookup table takes care of this.
        ;---------------------------------------------------------------
more_4int_4ext_pels:

        ; Read 8 source pels
        lodsd

        ; Convert source pels 0 and 1
        xlatb

        ; Convert source pels 2 and 3
        ror     eax,8
        xlatb

        ; Convert source pels 4 and 5
        ror     eax,8
        xlatb

        ; Convert source pels 6 and 7
        ror     eax,8
        xlatb

        ; Realign eax correctly and store the eight translated pels.
        ror     eax,8
        stosd

        loop    more_4int_4ext_pels

cEnd

;----------------------------------------------------------------------;
;
; Function: convert_row_4pkext_4pkint
;       Converts a row of 4bpp data from external to internal format.
;
; Input values:
;       _pConvertTable_4ext_4int -
;                Global variable that points to a conversion table of
;                256 bytes that converts pairs of 4bpp external values
;                to 4bpp internal values.
;
;       esi    - Points to source (4bpp external) data.
;
;       edi    - Points to destination buffer (where 4bpp internal data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 8.
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;

cProc   convert_row_4pkext_4pkint,<PUBLIC>
cBegin
        ; Make ebx point to the conversion table, which is
        ; a table of 256 bytes which give the correct mapping.
        mov     ebx, _pConvertTable_4ext_4int

ifdef FIREWALLS
        ; Check that the convert table has been created
        or      ebx,ebx
        jnz     short @F
        int     3
@@:
endif ;FIREWALLS

        ; We process pels eight at a time, so adjust the pel count to
        ; give a loop count.
ifdef FIREWALLS
        ; Check that we have a valid number of pels
        test    ecx,7
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS
        shr     ecx,3

        ;---------------------------------------------------------------
        ; We convert the pels into 4bpp Motorola format.
        ; This means that when converting a pair of pels (PEL1,PEL2):
        ; 
        ;  - PEL1 goes in bits 7..4
        ;  - PEL2 goes in bits 3..0
        ; 
        ; The lookup table takes care of this.
        ;---------------------------------------------------------------
more_4ext_4int_pels:
        ; Read 8 source pels
        lodsd

        ; Convert source pels 0 and 1
        xlatb

        ; Convert source pels 2 and 3
        ror     eax,8
        xlatb

        ; Convert source pels 4 and 5
        ror     eax,8
        xlatb

        ; Convert source pels 6 and 7
        ror     eax,8
        xlatb

        ; Realign eax correctly and store the eight translated pels.
        ror     eax,8
        stosd

        loop    more_4ext_4int_pels

cEnd
;----------------------------------------------------------------------;



ifdef S3


;**********************************************************************
; ONLY NEEDED FOR 24BPP DRIVERS!
;**********************************************************************


;----------------------------------------------------------------------;
;
; Function: convert_row_24pk_16pk
;       Converts a row of 24bpp data to 8bpp.
;
; Input values:
;
;       esi    - Points to source (24bpp) data.
;
;       edi    - Points to destination buffer (where 16bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;
cProc   convert_row_24pk_16pk,<PUBLIC>
localD  ulPel1
localD  ulPel2
localD  cLoopCount
localD  pSrc
localD  pDest
localD  ulSavedSrcPel

cBegin
        ; Store the source pointer
        mov     pSrc,esi

        ; Store destination pointer
        mov     pDest,edi

        ; Store loop count
        mov     cLoopCount,ecx

more_24pk_16pk_pels:
        mov     esi,pSrc
        mov     eax,dword ptr [esi]             ;Read in a DWORD and use
        and     eax,00ffffffh                   ; only the low 24-bits which is
        add     esi,3                           ; our current pel.
        mov     pSrc,esi

        ; eax now contains a 24-bit RGB value.
        ; Now we need to convert it to the 16bpp equivalent.
        ConvertRGB2To16bpp

store_dst_16bpp_pel:
        ; Store the resulting value
        mov     edi,pDest
        stosw
        mov     pDest,edi

        ; Update the loop count and jump back if more pels to
        ; process.
        dec     cLoopCount
        jnz     more_24pk_16pk_pels

        mov     esi,pSrc
cEnd


;----------------------------------------------------------------------;
;
; Function: convert_row_24pk_8pk
;       Converts a row of 24bpp data to 8bpp.
;
; Input values:
;
;       esi    - Points to source (24bpp) data.
;
;       edi    - Points to destination buffer (where 8bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;
cProc   convert_row_24pk_8pk,<PUBLIC>
localD  ulPel1
localD  ulPel2
localD  cLoopCount
localD  pSrc
localD  pDest
localD  ulSavedSrcPel

cBegin
        ; Store the source pointer
        mov     pSrc,esi

        ; Store destination pointer
        mov     pDest,edi

        ; Store loop count
        mov     cLoopCount,ecx

more_24pk_8pk_pels:
        mov     esi,pSrc
        mov     eax,dword ptr [esi]             ;Read in a DWORD and use
        and     eax,00ffffffh                   ; only the low 24-bits which is
        add     esi,3                           ; our current pel.
        mov     pSrc,esi

        ; Save the source pel value
        mov     ulSavedSrcPel,eax

        ; Check to see if the pel is in the cache
        cCall   SearchCache
        cmp     eax,-1
        jne     short store_dst_8bpp_pel2

must_calculate_24bpp_8bpp:
        mov     eax,ulSavedSrcPel

        ; eax now contains a 24-bit RGB value.
        ; Now we need to find the nearest color in the 8bpp palette

        push    256     ; Length of 8bpp palette
        push    eax     ; RGB we want to match
        push    4       ; Sizeof(RGB2)
        push    offset FLAT:FullSizeDeviceDefaultPalette
        call    NearestRestrictedColourIndex
        add     esp,16  ; Remove parameters

        ; The 8bpp index is in eax.
        ; Store it in the cache
        mov     ebx,ulSavedSrcPel
        cCall   AddValueToCache

store_dst_8bpp_pel2:
        ; Store the resulting value
        mov     edi,pDest
        stosb
        mov     pDest,edi

        ; Update the loop count and jump back if more pels to
        ; process.
        dec     cLoopCount
        jnz     more_24pk_8pk_pels

        mov     esi,pSrc
cEnd


;----------------------------------------------------------------------;
;
; Function: convert_row_24pk_4pk
;       Converts a row of 24bpp data to 4bpp.
;
; Input values:
;
;       esi    - Points to source (24bpp) data.
;
;       edi    - Points to destination buffer (where 4bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 2.
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;
cProc   convert_row_24pk_4pk,<PUBLIC>
localD  ulSrcPel1
localD  ulSrcPel2
localD  ulDstPel1
localD  cLoopCount
localD  pSrc
localD  pDest
localD  ulSavedSrcPel1
localD  ulSavedSrcPel2

cBegin
        ; We process two pels at a time, so halve the pel count
        ; to get a loop count.
ifdef FIREWALLS
        ; Check that we have an even number of pels
        test    ecx,1
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS
        shr     ecx,1
        mov     cLoopCount,ecx

        ; Store the source pointer
        mov     pSrc,esi

        ; Store destination pointer
        mov     pDest,edi

more_24pk_4pk_pels:
        mov     esi,pSrc
        mov     eax,dword ptr [esi]             ;Read in a DWORD and use
        and     eax,00ffffffh                   ; only the low 24-bits which is
        add     esi,3                           ; our current pel.
        mov     ulSavedSrcPel1,eax              ;Save first pel value.
        mov     eax,dword ptr [esi]             ;Read in a DWORD and use
        and     eax,00ffffffh                   ; only the low 24-bits which is
        add     esi,3                           ; our current pel.
        mov     ulSavedSrcPel2,eax              ;Save second pel value.
        mov     pSrc,esi

        ; Check to see if the pel is in the cache
        mov     eax,ulSavedSrcPel1
        mov     ulSrcPel1,eax
        cCall   SearchCache
        cmp     eax,-1
        jne     short got_4bpp_pel3

must_calculate_24bpp_4bpp:
        mov     eax,ulSavedSrcPel1

        ; eax now contains a 24-bit RGB value.
        ; Now we need to find the nearest color in the 4bpp palette

        push    16      ; Length of VGA palette
        push    eax     ; RGB we want to match
        push    4       ; Sizeof(RGB2)
        push    offset FLAT:StandardVGADefaultPalette
        call    NearestRestrictedColourIndex
        add     esp,16  ; Remove parameters

        ; Store the newly calculated value in the cache
        mov     ebx,ulSrcPel1
        cCall   AddValueToCache

got_4bpp_pel3:
        ; The 4bpp index is in eax.
        ; Shift it to the correct position and save it on the stack.
        shl     eax,4
        mov     ulDstPel1,eax

        ; Now do the same thing for the second pel...

        ; Check to see if the pel is in the cache
        mov     eax,ulSavedSrcPel2
        mov     ulSrcPel2,eax
        cCall   SearchCache
        cmp     eax,-1
        jne     short got_4bpp_pel4

        mov     eax,ulSavedSrcPel2

        ; eax now contains a 24-bit RGB value.
        ; Now we need to find the nearest color in the 4bpp palette

        push    16      ; Length of VGA palette
        push    eax     ; RGB we want to match
        push    4       ; Sizeof(RGB2)
        push    offset FLAT:StandardVGADefaultPalette
        call    NearestRestrictedColourIndex
        add     esp,16  ; Remove parameters

        ; Store the newly calculated value in the cache
        mov     ebx,ulSrcPel2
        cCall   AddValueToCache

got_4bpp_pel4:
        ; The 4bpp index is in eax.
        ; Merge it with the previous pel
        or      eax,ulDstPel1

store_dst_4bpp_pels2:
        ; Store the resulting value
        mov     edi,pDest
        stosb
        mov     pDest,edi

        ; Update the loop count and jump back if more pels to
        ; process.
        dec     cLoopCount
        jnz     more_24pk_4pk_pels

        mov     esi,pSrc
cEnd

;-----------------------------------------------------------------------
; Function:
;   convert_row_4pk_24pk
;
; Description:
;   Converts a rectangle row from 4bpp packed format to 24bpp packed format
;
; Input values:
;
;       _pConvertTable_4_24
;              - Global variable that points to a conversion table of
;                16 words that convert the 4bpp values to 24bpp values.
;
;       esi    - Points to source (4bpp) data.
;
;       edi    - Points to destination buffer (where 24bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 8.
;
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;-----------------------------------------------------------------------
cProc   convert_row_4pk_24pk,<PUBLIC>

cBegin
        ; Load up the conversion table pointer into ebx.
        ; This is a table of 16 words, mapping 4bpp palette to 24bpp
        ; Direct Color indices.
        mov     ebx, _pConvertTable_4_24

ifdef FIREWALLS
        ; Check that the convert table has been created
        or      ebx,ebx
        jnz     short @F
        int     3
@@:
endif ;FIREWALLS

        ; At 4bpp, source data is always aligned on 8-pel boundaries
        ; Therefore source data will always be dword aligned, and
        ; there will be a whole number of dwords in the row.
        ; We can therefore safely process 8 pels at a time.

ifdef   FIREWALLS
        ; Check to make sure!
        test    ecx,7
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS

        ; Divide pel count by 8
        shr     ecx,3

convert_4pk_24pk_loop:
        ; Read four source pels
        lodsd

        ;--------------------------------------------------------------
        ; The data is in Motorola format. i.e.
        ; 
        ; Byte        0         1         2         3
        ; Bit     7654 3210 7654 3210 7654 3210 7654 3210
        ; Pel      P1   P2   P3   P4   P5   P6   P7   P8
        ; 
        ; where Byte 0 is the lowest address.
        ; 
        ; Reading this into eax (Intel format), it looks like this:
        ; 
        ; 
        ;   <--------------eax---------------->
        ;                     <------ax------->
        ;                     <--ah--> <--al-->
        ;   33222222 22221111 11111100 00000000
        ;   10987654 32109876 54321098 76543210
        ; 
        ;    P7  P8   P5  P6   P3  P4   P1  P2
        ; 
        ; We must process the pels in the right order! (P1,P2..P8)
        ;---------------------------------------------------------------

        ; Save value
        mov     edx,eax

        ; Convert P1
        shr     al,4                      ; Put P1 in bits 0..3
        movzx   eax,al                    ; Zero upper bits of eax
        mov     eax, dword ptr [ebx+eax*4]; Translate
        stosd                             ; Store
        dec     edi

        ; Convert P2
        movzx   eax,dl                    ; Put P2 in bits 0..3
        and     al,0Fh                    ; Mask out P1
        mov     eax, dword ptr [ebx+eax*4]; Translate
        stosd                             ; Store
        dec     edi

        ; Convert P3
        movzx   eax,dh                    ; Get P3+P4
        shr     al,4                      ; Put P3 in bits 0..3
        mov     eax, dword ptr [ebx+eax*4]; Translate
        stosd                             ; Store
        dec     edi

        ; Convert P4
        movzx   eax,dh                    ; Put P4 in bits 0..3
        and     al,0Fh                    ; Mask out P3
        mov     eax, dword ptr [ebx+eax*4]; Translate
        stosd                             ; Store
        dec     edi

        ; Shift edx right so we can easily access the next four pels
        shr     edx,16

        ; Convert P5
        movzx   eax,dl                    ; Get P5 in bits 4..7
        shr     al,4                      ; Put P5 in bits 0..3
        mov     eax, dword ptr [ebx+eax*4]; Translate
        stosd                             ; Store
        dec     edi

        ; Convert P6
        movzx   eax,dl                    ; Get P6 in bits 0..3
        and     al,0Fh                    ; Mask out P5
        mov     eax, dword ptr [ebx+eax*4]; Translate
        stosd                             ; Store
        dec     edi

        ; Convert P7
        movzx   eax,dh                    ; Get P7 in bits 4..7
        shr     al,4                      ; Put P7 in bits 0..3
        mov     eax, dword ptr [ebx+eax*4]; Translate
        stosd                             ; Store
        dec     edi

        ; Convert P8
        movzx   eax,dh                    ; Get P8 in bits 0..3
        and     al,0Fh                    ; Mask out P7
        mov     eax, dword ptr [ebx+eax*4]; Translate
        stosw                             ; Store
        shr     eax,16
        stosb
;        stosd
;        dec     edi

        ; Update the count and loop back if there are more pels.
;       dec     ecx
        loop    convert_4pk_24pk_loop

cEnd


;----------------------------------------------------------------------;
;
; Function: convert_row_8pk_24pk
;       Converts a row of 8bpp data to 24bpp.
;
; Input values:
;
;       _pConvertTable_8_24
;              - Global variable that points to a conversion table of
;                256 words that convert the 8bpp values to 24bpp values.
;
;       esi    - Points to source (8bpp) data.
;
;       edi    - Points to destination buffer (where 24bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;                Must be a multiple of 2.
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;

cProc   convert_row_8pk_24pk,<PUBLIC>
cBegin
        ; Make ebx point to the conversion table, which is
        ; a table of 256 words which map from 8bpp to 24bpp.
        mov     ebx, _pConvertTable_8_24

        ; We process pairs of pels at a time, so halve the pel count to
        ; give a loop count.
ifdef FIREWALLS
        ; Check that we have an even number of pels
        test    ecx,1
        jz      short @F
        int     3
@@:
endif   ;FIREWALLS
        shr     ecx,1

        ;---------------------------------------------------------------
        ; We convert the pels into 24bpp Motorola format.
        ;---------------------------------------------------------------
more_8_24_pels:
        ; Read two source pixels.
        ; PEL1 will be in al, the PEL2 in ah
        lodsw

        ; Save value in edx
        mov     edx,eax

        ; Convert PEL1 to a 24bpp index
        movzx   eax,al
        mov     eax, dword ptr [ebx+eax*4]
        stosd
        dec     edi

        ; Convert PEL2 to a 16bpp index
        movzx   eax,dh
        mov     eax, dword ptr [ebx+eax*4]
        stosd
        dec     edi

        loop    more_8_24_pels

cEnd


;----------------------------------------------------------------------;
;
; Function: convert_row_16pk_24pk
;       Converts a row of 16bpp data to 24bpp.
;
; Input values:
;
;       esi    - Points to source (16bpp) data.
;
;       edi    - Points to destination buffer (where 24bpp data
;                will be written).
;
;       ecx    - Number of pels in row (to be converted).
;
; Exit:
;       esi    - Points to byte after last source byte.
;       edi    - Points to byte after last destination byte.
;       ecx    - Zero.
;
;----------------------------------------------------------------------;
cProc   convert_row_16pk_24pk,<PUBLIC>
localD  ulPel1
localD  ulPel2
localD  cLoopCount
localD  pSrc
localD  pDest
localD  ulSavedSrcPel

cBegin
        ; Store the source pointer
        mov     pSrc,esi

        ; Store destination pointer
        mov     pDest,edi

        ; Store loop count
        mov     cLoopCount,ecx

more_16pk_24pk_pels:
        mov     esi,pSrc
        xor     ebx,ebx
        lodsw                               ;Read in a WORD,
        mov     pSrc,esi
        mov     bx,ax

        ; eax now contains a 16-bit RGB value.
        ; Now we need to convert it to the 24bpp equivalent.
        Convert16bppToRGB2

store_dst_24bpp_pel:
        ; Store the resulting value
        mov     edi,pDest
        mov     dword ptr [edi],eax
        add     edi,3
        mov     pDest,edi

        ; Update the loop count and jump back if more pels to
        ; process.
        dec     cLoopCount
        jnz     more_16pk_24pk_pels

        mov     esi,pSrc
cEnd


endif ;S3


_TEXT ends

endif ; DCAF

end
