/*
 * DMLib
 * -- Sprite / surface blitting functions
 * Programmed and designed by Matti 'ccr' Hamalainen
 * (C) Copyright 2011-2012 Tecnic Software productions (TNSP)
 */

// =======================================================================
// DMD_NONE
// =======================================================================

#define DM_BLITFUNC_NAME dmScaledBlitSurface8to8
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 1
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE Uint8

#define DM_BLITFUNC_INNER *dp++ = sp[FP_GETH(xv)];

#include "dmscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmScaledBlitSurface8to32
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE Uint32

#define DM_BLITFUNC_VARS const Uint32 *pal;
#define DM_BLITFUNC_INIT \
    if (src->format->palette == NULL || src->format->palette->ncolors < 256) return -2;	\
    pal = (Uint32 *) src->format->palette->colors;

#define DM_BLITFUNC_INNER *dp++ = pal[sp[FP_GETH(xv)]];

#include "dmscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmScaledBlitSurface32to32
#define DM_BLITFUNC_SRC_BYTES 4
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_SRC_TYPE Uint32
#define DM_BLITFUNC_DST_TYPE Uint32
#define DM_BLITFUNC_INIT
#define DM_BLITFUNC_INNER *dp++ = sp[FP_GETH(xv)];

#include "dmscaledblit.h"


// =======================================================================
// DMD_TRANSPARENT
// =======================================================================

#define DM_BLITFUNC_NAME dmScaledBlitSurface8to8Transparent
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 1
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE Uint8

#define DM_BLITFUNC_INNER \
    *dp = sp[FP_GETH(xv)] ? sp[FP_GETH(xv)] : *dp; dp++;

#include "dmscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmScaledBlitSurface8to32Transparent
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_VARS const DMRGBA32 *pal;
#define DM_BLITFUNC_INIT \
    if (src->format->palette == NULL || src->format->palette->ncolors < 256) return -2;	\
    pal = (DMRGBA32 *) src->format->palette->colors;

#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = pal[sp[FP_GETH(xv)]]; \
    dp->r += ((q.r - dp->r) * q.a) >> 8; \
    dp->g += ((q.g - dp->g) * q.a) >> 8; \
    dp->b += ((q.b - dp->b) * q.a) >> 8; \
    dp->a  = q.a; \
    dp++;

#include "dmscaledblit.h"


// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmScaledBlitSurface32to32Transparent
#define DM_BLITFUNC_SRC_BYTES 4
#define DM_BLITFUNC_DST_BYTES 4

#ifdef DM_USE_SIMD
#define DM_BLITFUNC_VARS \
    const Uint32 qpdmask =       0xff000000; \
    const Uint64 qpdrm   = 0xff00ff00ff00ffULL;
    
#define DM_BLITFUNC_SRC_TYPE Uint32
#define DM_BLITFUNC_DST_TYPE Uint32
#define DM_BLITFUNC_INNER			\
    asm(					\
        "movd        %2,     %%mm1\n"		\
        \
        "movd        %3,     %%mm2\n"		\
        "movq        %%mm1,  %%mm5\n"		\
        "pand        %%mm2,  %%mm5\n"		\
        "psrlw       $8,     %%mm5\n"		\
        "punpcklwd   %%mm5,  %%mm5\n"		\
        "punpckhwd   %%mm5,  %%mm5\n"		\
        \
        "pxor        %%mm2,  %%mm2\n"		\
        "movd        %1,     %%mm3\n"		\
        "punpcklbw   %%mm2,  %%mm1\n"		\
        "punpcklbw   %%mm2,  %%mm3\n"		\
        \
        "psubw       %%mm3,  %%mm1\n"		\
        "pmullw      %%mm5,  %%mm1\n"		\
        "psraw       $8,     %%mm1\n"		\
        "paddw       %%mm3,  %%mm1\n"		\
        "pand        %4,     %%mm1\n"		\
        "packuswb    %%mm2,  %%mm1\n"		\
        "movd        %%mm1,  %0\n"		\
        : "=m" (*dp)				\
        : "m" (*dp), "m" (sp[FP_GETH(xv)]), "m" (qpdmask), "m" (qpdrm)	\
        : "memory", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" ); dp++;

#define DM_BLITFUNC_FINISH asm("emms\n");

#else

#define DM_BLITFUNC_SRC_TYPE DMRGBA32
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = sp[FP_GETH(xv)]; \
    dp->r += ((q.r - dp->r) * q.a) >> 8; \
    dp->g += ((q.g - dp->g) * q.a) >> 8; \
    dp->b += ((q.b - dp->b) * q.a) >> 8; \
    dp->a  = q.a; \
    dp++;
#endif

#include "dmscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmScaledBlitSurface32to32TransparentX
#define DM_BLITFUNC_SRC_BYTES 4
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_SRC_TYPE DMRGBA32
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_INIT

#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = sp[FP_GETH(xv)]; \
    dp->r = (q.r * q.a + dp->r * dp->a) >> 8; \
    dp->g = (q.g * q.a + dp->g * dp->a) >> 8; \
    dp->b = (q.b * q.a + dp->b * dp->a) >> 8; \
    dp->a = q.a ? q.a : dp->a; \
    dp++;

#include "dmscaledblit.h"


// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmScaledBlitSurface32to32TransparentGA
#define DM_BLITFUNC_SRC_BYTES 4
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_ARGS , Uint32 alpha

#ifdef DM_USE_SIMD
#define DM_BLITFUNC_VARS \
    const Uint32 qpdmask =       0xff000000; \
    const Uint64 qpdrm   = 0xff00ff00ff00ffULL;

#define DM_BLITFUNC_SRC_TYPE Uint32
#define DM_BLITFUNC_DST_TYPE Uint32
#define DM_BLITFUNC_INNER_INIT \
    asm(					\
        "movd        %0,     %%mm4\n"		\
        "punpcklwd   %%mm4,  %%mm4\n"		\
        "punpckldq   %%mm4,  %%mm4\n"		\
        : 					\
        : "m" (alpha)				\
        : "%mm4" );

#define DM_BLITFUNC_INNER			\
    asm(					\
        "movd        %2,     %%mm1\n"		\
        \
        "movd        %3,     %%mm2\n"		\
        "movq        %%mm1,  %%mm5\n"		\
        "pand        %%mm2,  %%mm5\n"		\
        "psrlw       $8,     %%mm5\n"		\
        "punpcklwd   %%mm5,  %%mm5\n"		\
        "punpckhwd   %%mm5,  %%mm5\n"		\
        \
        "pmullw      %%mm4,  %%mm5\n"		\
        "psrlw       $8,     %%mm5\n"		\
        \
        "pxor        %%mm2,  %%mm2\n"		\
        "movd        %1,     %%mm3\n"		\
        "punpcklbw   %%mm2,  %%mm1\n"		\
        "punpcklbw   %%mm2,  %%mm3\n"		\
        \
        "psubw       %%mm3,  %%mm1\n"		\
        "pmullw      %%mm5,  %%mm1\n"		\
        "psraw       $8,     %%mm1\n"		\
        "paddw       %%mm3,  %%mm1\n"		\
        "pand        %4,     %%mm1\n"		\
        "packuswb    %%mm2,  %%mm1\n"		\
        "movd        %%mm1,  %0\n"		\
        : "=m" (*dp)				\
        : "m" (*dp), "m" (sp[FP_GETH(xv)]), "m" (qpdmask), "m" (qpdrm)	\
        : "memory", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" ); dp++;

#define DM_BLITFUNC_FINISH asm("emms\n");

#else

#define DM_BLITFUNC_SRC_TYPE DMRGBA32
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = sp[FP_GETH(xv)]; \
    const int a = (alpha * q.a) >> 8; \
    dp->r += ((q.r - dp->r) * a) >> 8; \
    dp->g += ((q.g - dp->g) * a) >> 8; \
    dp->b += ((q.b - dp->b) * a) >> 8; \
    dp->a  = a;
    dp++;
#endif

#include "dmscaledblit.h"

// =======================================================================
// DMD_SATURATE
// =======================================================================

#define DM_BLITFUNC_NAME dmScaledBlitSurface8to8Saturate
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 1
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE Uint8
#define DM_BLITFUNC_INNER \
    const int q = sp[FP_GETH(xv)] + *dp; \
    *(dp++) = q < 256 ? q : 255;

#include "dmscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmScaledBlitSurface8to32Saturate
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_VARS const DMRGBA32 *pal;
#define DM_BLITFUNC_INIT \
    if (src->format->palette == NULL || src->format->palette->ncolors < 256) return -2;	\
    pal = (DMRGBA32 *) src->format->palette->colors;

#ifdef DM_USE_SIMD
#define DM_BLITFUNC_INNER				\
    asm("movd        %2,     %%mm1\n"		\
        "movd        %1,     %%mm2\n"		\
        "paddusb     %%mm2,  %%mm1\n"		\
        "movd        %%mm1,  %0\n"		\
        : "=m" (*dp)				\
        : "m" (*dp), "m" (pal[sp[FP_GETH(xv)]])	\
        : "memory", "%mm1", "%mm2" ); dp++;

#define DM_BLITFUNC_FINISH asm("emms\n");
#else
#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = pal[sp[FP_GETH(xv)]]; \
    const int qr = dp->r + q.r, qg = dp->g + q.g, qb = dp->b + q.b; \
    dp->r = qr < 256 ? qr : 255; \
    dp->g = qg < 256 ? qg : 255; \
    dp->b = qb < 256 ? qb : 255; \
    dp->a = q.a; \
    dp++;
#endif

#include "dmscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmScaledBlitSurface32to32Saturate
#define DM_BLITFUNC_SRC_BYTES 4
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_INIT

#ifdef DM_USE_SIMD
#define DM_BLITFUNC_SRC_TYPE Uint32
#define DM_BLITFUNC_DST_TYPE Uint32
#define DM_BLITFUNC_INNER				\
    asm("movd        %2,     %%mm1\n"		\
        "movd        %1,     %%mm2\n"		\
        "paddusb     %%mm2,  %%mm1\n"		\
        "movd        %%mm1,  %0\n"		\
        : "=m" (*dp)				\
        : "m" (*dp), "m" (sp[FP_GETH(xv)])	\
        : "memory", "%mm1", "%mm2" ); dp++;

#define DM_BLITFUNC_FINISH asm("emms\n");
#else
#define DM_BLITFUNC_SRC_TYPE DMRGBA32
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = sp[FP_GETH(xv)]; \
    const int qr = dp->r + q.r, qg = dp->g + q.g, qb = dp->b + q.b; \
    dp->r = qr < 256 ? qr : 255; \
    dp->g = qg < 256 ? qg : 255; \
    dp->b = qb < 256 ? qb : 255; \
    dp->a = q.a; \
    dp++;
#endif

#include "dmscaledblit.h"



// =======================================================================
// =======================================================================



// =======================================================================
// DMD_NONE
// =======================================================================

#define DM_BLITFUNC_NAME dmUnscaledBlitSurface8to8
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 1
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE Uint8

#define DM_BLITFUNC_INNER *dp++ = sp[xv];

#include "dmunscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmUnscaledBlitSurface8to32
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE Uint32

#define DM_BLITFUNC_VARS const Uint32 *pal;
#define DM_BLITFUNC_INIT \
    if (src->format->palette == NULL || src->format->palette->ncolors < 256) return -2;	\
    pal = (Uint32 *) src->format->palette->colors;

#define DM_BLITFUNC_INNER *dp++ = pal[sp[xv]];

#include "dmunscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmUnscaledBlitSurface32to32
#define DM_BLITFUNC_SRC_BYTES 4
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_SRC_TYPE Uint32
#define DM_BLITFUNC_DST_TYPE Uint32
#define DM_BLITFUNC_INIT
#define DM_BLITFUNC_INNER *dp++ = sp[xv];

#include "dmunscaledblit.h"


// =======================================================================
// DMD_TRANSPARENT
// =======================================================================

#define DM_BLITFUNC_NAME dmUnscaledBlitSurface8to8Transparent
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 1
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE Uint8

#define DM_BLITFUNC_INNER \
    *dp = sp[xv] ? sp[xv] : *dp; dp++;

#include "dmunscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmUnscaledBlitSurface8to32Transparent
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_VARS const DMRGBA32 *pal;
#define DM_BLITFUNC_INIT \
    if (src->format->palette == NULL || src->format->palette->ncolors < 256) return -2;	\
    pal = (DMRGBA32 *) src->format->palette->colors;

#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = pal[sp[xv]]; \
    dp->r += ((q.r - dp->r) * q.a) >> 8; \
    dp->g += ((q.g - dp->g) * q.a) >> 8; \
    dp->b += ((q.b - dp->b) * q.a) >> 8; \
    dp->a  = q.a; \
    dp++;

#include "dmunscaledblit.h"


// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmUnscaledBlitSurface32to32Transparent
#define DM_BLITFUNC_SRC_BYTES 4
#define DM_BLITFUNC_DST_BYTES 4

#ifdef DM_USE_SIMD
#define DM_BLITFUNC_VARS \
    const Uint32 qpdmask =       0xff000000; \
    const Uint64 qpdrm   = 0xff00ff00ff00ffULL;
    
#define DM_BLITFUNC_SRC_TYPE Uint32
#define DM_BLITFUNC_DST_TYPE Uint32
#define DM_BLITFUNC_INNER				\
    asm(					\
        "movd        %2,     %%mm1\n"		\
        \
        "movd        %3,     %%mm2\n"		\
        "movq        %%mm1,  %%mm5\n"		\
        "pand        %%mm2,  %%mm5\n"		\
        "psrlw       $8,     %%mm5\n"		\
        "punpcklwd   %%mm5,  %%mm5\n"		\
        "punpckhwd   %%mm5,  %%mm5\n"		\
        \
        "pxor        %%mm2,  %%mm2\n"		\
        "movd        %1,     %%mm3\n"		\
        "punpcklbw   %%mm2,  %%mm1\n"		\
        "punpcklbw   %%mm2,  %%mm3\n"		\
        \
        "psubw       %%mm3,  %%mm1\n"		\
        "pmullw      %%mm5,  %%mm1\n"		\
        "psraw       $8,     %%mm1\n"		\
        "paddw       %%mm3,  %%mm1\n"		\
        "pand        %4,     %%mm1\n"		\
        "packuswb    %%mm2,  %%mm1\n"		\
        "movd        %%mm1,  %0\n"		\
        : "=m" (*dp)				\
        : "m" (*dp), "m" (sp[xv]), "m" (qpdmask), "m" (qpdrm)	\
        : "memory", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" ); dp++;

#define DM_BLITFUNC_FINISH asm("emms\n");

#else

#define DM_BLITFUNC_SRC_TYPE DMRGBA32
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = sp[xv]; \
    dp->r += ((q.r - dp->r) * q.a) >> 8; \
    dp->g += ((q.g - dp->g) * q.a) >> 8; \
    dp->b += ((q.b - dp->b) * q.a) >> 8; \
    dp->a  = q.a; \
    dp++;
#endif

#include "dmunscaledblit.h"


// =======================================================================
// DMD_SATURATE
// =======================================================================

#define DM_BLITFUNC_NAME dmUnscaledBlitSurface8to8Saturate
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 1
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE Uint8
#define DM_BLITFUNC_INNER \
    const int q = sp[xv] + *dp; \
    *(dp++) = q < 256 ? q : 255;

#include "dmunscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmUnscaledBlitSurface8to32Saturate
#define DM_BLITFUNC_SRC_BYTES 1
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_SRC_TYPE Uint8
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_VARS const DMRGBA32 *pal;
#define DM_BLITFUNC_INIT \
    if (src->format->palette == NULL || src->format->palette->ncolors < 256) return -2;	\
    pal = (DMRGBA32 *) src->format->palette->colors;

#ifdef DM_USE_SIMD
#define DM_BLITFUNC_INNER				\
    asm("movd        %2,     %%mm1\n"		\
        "movd        %1,     %%mm2\n"		\
        "paddusb     %%mm2,  %%mm1\n"		\
        "movd        %%mm1,  %0\n"		\
        : "=m" (*dp)				\
        : "m" (*dp), "m" (pal[sp[xv]])	\
        : "memory", "%mm1", "%mm2" ); dp++;

#define DM_BLITFUNC_FINISH asm("emms\n");
#else
#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = pal[sp[xv]]; \
    const int qr = dp->r + q.r, qg = dp->g + q.g, qb = dp->b + q.b; \
    dp->r = qr < 256 ? qr : 255; \
    dp->g = qg < 256 ? qg : 255; \
    dp->b = qb < 256 ? qb : 255; \
    dp->a = q.a; \
    dp++;
#endif

#include "dmunscaledblit.h"

// -----------------------------------------------------------------------

#define DM_BLITFUNC_NAME dmUnscaledBlitSurface32to32Saturate
#define DM_BLITFUNC_SRC_BYTES 4
#define DM_BLITFUNC_DST_BYTES 4
#define DM_BLITFUNC_INIT

#ifdef DM_USE_SIMD
#define DM_BLITFUNC_SRC_TYPE Uint32
#define DM_BLITFUNC_DST_TYPE Uint32
#define DM_BLITFUNC_INNER			\
    asm("movd        %2,     %%mm1\n"		\
        "movd        %1,     %%mm2\n"		\
        "paddusb     %%mm2,  %%mm1\n"		\
        "movd        %%mm1,  %0\n"		\
        : "=m" (*dp)				\
        : "m" (*dp), "m" (sp[xv])	\
        : "memory", "%mm1", "%mm2" ); dp++;

#define DM_BLITFUNC_FINISH asm("emms\n");
#else
#define DM_BLITFUNC_SRC_TYPE DMRGBA32
#define DM_BLITFUNC_DST_TYPE DMRGBA32
#define DM_BLITFUNC_INNER \
    const DMRGBA32 q = sp[xv]; \
    const int qr = dp->r + q.r, qg = dp->g + q.g, qb = dp->b + q.b; \
    dp->r = qr < 256 ? qr : 255; \
    dp->g = qg < 256 ? qg : 255; \
    dp->b = qb < 256 ? qb : 255; \
    dp->a = q.a; \
    dp++;
#endif

#include "dmunscaledblit.h"


// =======================================================================
