/* 
 * Copyright (c) 1997 NextLevel Systems of Delaware, Inc.  All rights reserved.
 * 
 * This software module  was developed by  Bob Eifrig (at NextLevel
 * Systems of Delaware, Inc.), Xuemin Chen (at NextLevel Systems of
 * Delaware, Inc.), and Ajay Luthra (at NextLevel Systems of Delaware,
 * Inc.), in the course of development of the MPEG-4 Video Standard
 * (ISO/IEC 14496-2).   This software module is an implementation of a
 * part of one or more tools as specified by the MPEG-4 Video Standard.
 * 
 * NextLevel Systems of Delaware, Inc. grants the right, under its
 * copyright in this software module, to use this software module and to
 * make modifications to it for use in products which conform to the
 * MPEG-4 Video Standard.  No license is granted for any use in
 * connection with products which do not conform to the MPEG-4 Video
 * Standard.
 * 
 * Those intending to use this software module are advised that such use
 * may infringe existing and unissued patents.  Please note that in
 * order to practice the MPEG-4 Video Standard, a license may be
 * required to certain patents held by NextLevel Systems of Delaware,
 * Inc., its parent or affiliates ("NextLevel").   The provision of this
 * software module conveys no license, express or implied, under any
 * patent rights of NextLevel or of any third party.  This software
 * module is subject to change without notice.  NextLevel assumes no
 * responsibility for any errors that may appear in this software
 * module.  NEXTLEVEL DISCLAIMS ALL WARRANTIES, EXPRESS AND IMPLIED,
 * INCLUDING, BUT NOT LIMITED TO ANY WARRANTY THAT COMPLIANCE WITH OR
 * PRACTICE OF THE SPECIFICATIONS OR USE OF THIS SOFTWARE MODULE WILL
 * NOT INFRINGE THE INTELLECTUAL PROPERTY RIGHTS OF NEXTLEVEL OR ANY
 * THIRD PARTY, AND ANY IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.
 * 
 * NextLevel retains the full right to use this software module for its
 * own purposes, to assign or transfer this software module to others,
 * to prevent others from using this software module in connection with
 * products which do not conform to the MPEG-4 Video Standard, and to
 * prevent others from infringing NextLevel's patents.
 * 
 * As an express condition of the above license grant, users are
 * required to include this copyright notice in all copies or derivative
 * works of this software module.
 */
/*
 * B-VOP encoder functions for VM 7.0
 *
 * Bob Eifrig
 * General Instrument
 *
 * Modified : 25/05/97 X. Chen, GI.
 *
 * NOTE:
 *  This is a limited implementation of B-VOPs.  It supports rectangular
 *  shape VOPs in combined shape/motion/texture mode with no scalability
 *  only and optionally interlaced coding.
 */

#include "momusys.h"
#include "mom_structs.h"
#include "vm_config.h"
#include "vm_common_defs.h"
#include "vm_stats.h"
#include "mot_util.h"
#include "text_util.h"
#include "interlaced_bvop.h"
#include "tm5rc.h"

#define DIFF(c,r,i)     (((d = (int)(c)[i] - (int)(r)[i]) < 0) ? (-d) : d)

#define EDGE            16      /* Padding assumed on all anchor VOPs */

/*
 * Encode an interlaced B-VOP
 */

Void
EncodeInterlacedBVOP(
     VolConfig  *vol_config,       /* Video object layer */
     Vop        *cur_orig,         /* Current orignal */
     Vop        *cur_rec,          /* Current reconstructed */
     Vop        *fwd_orig,         /* Forward original */
     Vop        *fwd_rec,          /* Forward reconstructed */
     Vop        *bak_orig,         /* Backward original */
     Vop        *bak_rec,          /* Backward reconstructed */
     Image      *bak_P_mot_x,      /* fwd anchor X motion vector */
     Image      *bak_P_mot_y,      /* fwd anchor Y motion vector */
     Image      *bak_P_mode,       /* fwd anchor MB modes */
     BitCount   *stats,            /* Statistics & bit counts */
     Int        rc_type,           /* Rate control type */
     Float      time)              /* VOP time */
{  
    BState b;
    int pic, type, fref, bref, frad, brad;
    Int i, xmin, xmax, ymin, ymax, *u, *v;
    SInt *p, *q, *qp;
    char line[256];

    if ((vol_config->shape != RECTANGULAR) ||
            (cur_orig->scalability != 0) ||
        (cur_orig->sadct_disable != 1) ||
        (cur_orig->reverse_vlc != 0) ||
        (cur_orig->data_partitioning != 0) ||
        (cur_orig->bits_per_pixel != 8)) {
        fprintf(stderr,
"EncodeInterlacedBVOP: Shape/Separate/Scalability/SADCT/RevVLC/DataPart/bits_per_pixel\nmust be 0/0/0/0/0/0/8, got %d/%d/%d/%d/%d/%d\n",
            vol_config->shape,  cur_orig->scalability, !cur_orig->sadct_disable,
            cur_orig->reverse_vlc, cur_orig->data_partitioning, cur_orig->bits_per_pixel);
        exit(1);
    }

    b.cur_orig = cur_orig;
    b.cur_rec  = cur_rec;
    b.fwd_orig = fwd_orig;
    b.fwd_rec  = fwd_rec;
    b.bak_orig = bak_orig;
    b.bak_rec  = bak_rec;
    if (bak_orig->prediction_type == I_VOP) {
        b.Pmode = NULL;
        b.Pmvx  = NULL;
        b.Pmvy  = NULL;
    } else {
        b.Pmode = bak_P_mode;
        b.Pmvx  = bak_P_mot_x;
        b.Pmvy  = bak_P_mot_y;
    }
    b.interlaced = cur_orig->interlaced;
    b.direct_rad = cur_orig->sr_direct;
    b.mb_width = cur_orig->width / MB_SIZE;
    b.mb_height = cur_orig->height / MB_SIZE;
    b.motvec = AllocImage(8*b.mb_width, b.mb_height, SHORT_TYPE);
    b.modes = AllocImage(b.mb_width, b.mb_height, SHORT_TYPE);

    Bits_Reset(&stats->text_bits);

    SubsampleAlphaMap(cur_orig->a_chan, cur_orig->a_uv_chan,
        GetVopShape(cur_orig));
    CopyImage(cur_orig->a_chan, cur_rec->a_chan);       /* Same shape */
    CopyImage(cur_orig->a_uv_chan, cur_rec->a_uv_chan);

    /* Write VOP header */
    stats->syntax += BitstreamPutVopHeader(cur_orig,
        cur_orig->vo_id, time, 1, vol_config,
        NULL,           /* num_bits[][] for STATIC SPRITE */
        NULL,           /* vo_config_list for STATIC_SPRITE */
        rc_type, 0);
    b.mottext = BitstreamInit();

    /* Motion vector file picture header line I/O */
    switch (cur_orig->mvfileusage) {

    case 1:
      (*cur_orig->mvlinenop)++;
      if (fgets(line, sizeof line, cur_orig->mvfile) == NULL) {
          fprintf(stderr, "EOF on %s at %d\n",
              cur_orig->mvfilename, *cur_orig->mvlinenop);
          exit(1);
      }
      if ((sscanf(line + 4, "%d %d %d %d %d %d",
                &pic, &type, &fref, &bref, &frad, &brad) != 6) ||
          (pic != cur_orig->frame) ||
          (type != cur_orig->prediction_type) ||
          (fref != fwd_orig->frame) ||
          (bref != bak_orig->frame)) {
          fprintf(stderr, "MV read error: expected %c%d(%d,%d), got %c%d(%d,%d)\n",
              "IPBS"[cur_orig->prediction_type],
              cur_orig->frame, fwd_orig->frame,
              bak_orig->frame, "IPBS"[type], pic, fref, bref);
          exit(1);
      }
      if ((frad > (8 << cur_orig->fcode_for)) ||
          (brad > (8 << cur_orig->fcode_back))) {
          fprintf(stderr,
              "%s:%d(%c%d): MV file ME radii (%d, %d) exceed current fcodes (%d %d)\n",
              cur_orig->mvfilename, *cur_orig->mvlinenop, "IPBS"[cur_orig->prediction_type],
              cur_orig->frame, frad, brad, cur_orig->fcode_for, cur_orig->fcode_back);
          exit(1);  
      }
      break;

    case 2:
      (*cur_orig->mvlinenop)++;
      fprintf(cur_orig->mvfile, "Pic %d %d %d %d %d %d\n",
          b.cur_orig->frame,
          b.cur_orig->prediction_type,
          b.fwd_orig->frame,
          b.bak_orig->frame,
          b.cur_orig->sr_for,
          b.cur_orig->sr_back);
      break;
    }
    b.ewidth = cur_orig->width + 2*EDGE;
    if ((b.ewidth != fwd_orig->width) ||
        (b.ewidth != fwd_rec->width) ||
        (b.ewidth != bak_orig->width) ||
        (b.ewidth != bak_rec->width)) {
        fprintf(stderr, "Reference VOP padding error\n");
        exit(1);
    }
    b.ecwidth = b.ewidth >> 1;
    b.cwidth = cur_orig->width >> 1;

    b.f.fp_ref = fwd_orig->y_chan;
    b.f.hp_ref = fwd_rec->y_chan;
    b.b.fp_ref = bak_orig->y_chan;
    b.b.hp_ref = bak_rec->y_chan;

    /*
     * Limit unrestricted ME to MB_SIZE full pels beyond picture
     */
    i = (EDGE > MB_SIZE) ? MB_SIZE : EDGE;
    xmin = - i;
    xmax = cur_orig->width + i - MB_SIZE;
    ymin = - i;
    ymax = cur_orig->height + i - MB_SIZE;

    b.ref_yoff = EDGE * (1 + b.ewidth);
    b.ref_coff = (EDGE / 2) * (1 + b.ecwidth);
    b.cur_yoff = 0;
    b.cur_coff = 0;
    b.mb_ndx = 0;

    if ((rc_type != 0) && (rc_type < TM5_RATE_CONTROL)) {
        fprintf(stderr, "Unsupported rate control type: %d\n", rc_type);
        exit(1);
    }
    b.prev_quant = b.quant = cur_orig->B_quantizer;
    qp = (SInt *)GetImageData(b.cur_rec->QP_chan);

    for (b.y = 0, b.mby = 0; b.mby < b.mb_height; b.mby++) {
        memset(b.pmv, 0, sizeof(b.pmv));
        b.prev_bmt = MBM_B_DIRECT;
        for (b.x = 0, b.mbx = 0; b.mbx < b.mb_width; b.mbx++) {
            /*
             * Copy current luma macroblock to work area
             */
            p = (SInt *)GetImageData(cur_orig->y_chan) + b.cur_yoff;
            q = b.mb;
            for (i = 0; i < MB_SIZE; i++) {
                memcpy(q, p, MB_SIZE * sizeof(SInt));
                p += cur_orig->width;
                q += MB_SIZE;
            }
            /* MPEG-2 TM-5 Rate Control */
            if(rc_type >= TM5_RATE_CONTROL)
                b.quant = (SInt)tm5rc_calc_mquant(b.mb_ndx, b.mottext->x, vol_config->rcdata);
#if 0
            switch (b.quant - b.prev_quant) {
                case -1: case 0: case 1: break;
                default:  if (!(b.quant &= ~1)) b.quant = 1; break;
            }
#else
            /* Quantizer can change by only +/- 2 per macroblock */
            if (b.quant >= (b.prev_quant + 2))
                b.quant = b.prev_quant + 2;
            else if (b.quant <= (b.prev_quant - 2))
                b.quant = b.prev_quant - 2;
            else
                b.quant = b.prev_quant;
#endif
            *qp++ = b.quant;

            /*
             * Obtain full pel motion vectors (search or read from file).
             * Note: .merad_for, .merad_bak and .edge are all assumed even!
             */
            i = cur_orig->sr_for;
            b.f.vxmin = -MIN(b.x - xmin, i);
            b.f.vxmax =  MIN(xmax - b.x, i);
            b.f.vymin = -MIN(b.y - ymin, i);   
            b.f.vymax =  MIN(ymax - b.y, i);
            i = cur_orig->sr_back;
            b.b.vxmin = -MIN(b.x - xmin, i);
            b.b.vxmax =  MIN(xmax - b.x, i);
            b.b.vymin = -MIN(b.y - ymin, i);
            b.b.vymax =  MIN(ymax - b.y, i);
            if (cur_orig->mvfileusage == 1) {
                (*cur_orig->mvlinenop)++;
                if (fgets(line, sizeof line, cur_orig->mvfile) == NULL) {
                    fprintf(stderr, "%d: Unexpected EOF at %s, line %d\n",
                        cur_orig->frame, cur_orig->mvfilename,
                        *cur_orig->mvlinenop);
                    exit(1);
                }
                if (sscanf(line,
"%hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd %hd",
                    &b.f.mv[0], &b.f.mv[1], &b.f.mv[2], &b.f.mv[3], &b.f.mv[4],
                    &b.f.mv[5], &b.f.mv[6], &b.f.mv[7], &b.f.mv[8], &b.f.mv[9],
                    &b.b.mv[0], &b.b.mv[1], &b.b.mv[2], &b.b.mv[3], &b.b.mv[4],
                    &b.b.mv[5], &b.b.mv[6], &b.b.mv[7], &b.b.mv[8], &b.b.mv[9]) != 20) {
                    fprintf(stderr, "%d: Bad MV file format: %s:%d\n",
                        cur_orig->frame, cur_orig->mvfilename,
                        *cur_orig->mvlinenop);
                    exit(1);
                }
                for (i = 0; i < 10; i += 2) {
                    if ((b.f.mv[i+0] >> 1) < b.f.vxmin) b.f.mv[i+0] = b.f.vxmin << 1;
                    if ((b.f.mv[i+0] >> 1) > b.f.vxmax) b.f.mv[i+0] = b.f.vxmax << 1;
                    if ((b.f.mv[i+1] >> 1) < b.f.vymin) b.f.mv[i+1] = b.f.vymin << 1;
                    if ((b.f.mv[i+1] >> 1) > b.f.vymax) b.f.mv[i+1] = b.f.vymax << 1;
                    if ((b.b.mv[i+0] >> 1) < b.b.vxmin) b.b.mv[i+0] = b.b.vxmin << 1;
                    if ((b.b.mv[i+0] >> 1) > b.b.vxmax) b.b.mv[i+0] = b.b.vxmax << 1;
                    if ((b.b.mv[i+1] >> 1) < b.b.vymin) b.b.mv[i+1] = b.b.vymin << 1;
                    if ((b.b.mv[i+1] >> 1) > b.b.vymax) b.b.mv[i+1] = b.b.vymax << 1;
                }
            } else {
                FullPelMotEstMB_B(&b, &b.f);
                FullPelMotEstMB_B(&b, &b.b);
                if (cur_orig->mvfileusage == 2) {
                    (*cur_orig->mvlinenop)++;
                    fprintf(cur_orig->mvfile,
"%3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n",
                        b.f.mv[0], b.f.mv[1], b.f.mv[2], b.f.mv[3],
                        b.f.mv[4], b.f.mv[5], b.f.mv[6], b.f.mv[7],
                        b.f.mv[8], b.f.mv[9], b.b.mv[0], b.b.mv[1],
                        b.b.mv[2], b.b.mv[3], b.b.mv[4], b.b.mv[5],
                        b.b.mv[6], b.b.mv[7], b.b.mv[8], b.b.mv[9]);
                }
            }
#if 1                   /* #if 0 to disable skip due to co-located future MB skip */
            if ((b.Pmode != NULL) &&
                (((SInt *)GetImageData(b.Pmode))[b.mb_ndx] == MBM_SKIPPED)) {
                /*
                 * Skip current macroblock if future anchor picture macroblock
                 * is skipped.  Copy macroblock from past reconstruted anchor.
                 */
                b.quant = b.prev_quant;
                stats->text_bits.no_Pskip++;
                q = (SInt *)GetImageData(cur_rec->y_chan) + b.cur_yoff;
                p = (SInt *)GetImageData(fwd_rec->y_chan) + b.ref_yoff;
                for (i = 0; i < MB_SIZE; i++) {
                    memcpy(q, p, MB_SIZE * sizeof(SInt));
                    q += cur_rec->width;
                    p += b.ewidth;
                }
                q = (SInt *)GetImageData(cur_rec->u_chan) + b.cur_coff;
                p = (SInt *)GetImageData(fwd_rec->u_chan) + b.ref_coff;
                for (i = 0; i < B_SIZE; i++) {
                    memcpy(q, p, B_SIZE * sizeof(SInt));
                    q += b.cwidth;
                    p += b.ecwidth;
                }
                q = (SInt *)GetImageData(cur_rec->v_chan) + b.cur_coff;
                p = (SInt *)GetImageData(fwd_rec->v_chan) + b.ref_coff;
                for (i = 0; i < B_SIZE; i++) {
                    memcpy(q, p, B_SIZE * sizeof(SInt));
                    q += b.cwidth;
                    p += b.ecwidth;
                }
            } else
#endif
            {
                /*
                 * Half pel refinement
                 */
                HalfPelMotEstMB_B(&b, &b.f);
                HalfPelMotEstMB_B(&b, &b.b);
                DirectSAD(&b);
                BVOPdecision(&b);
                BMotComp(&b);
                b.fieldDCT = FrameFieldDCTDecide(b.diff);
                for (i = 0; i < 6; i++) {
                    type = (i < 4) ? 1 : 2;
                    u = &b.diff[i << 6];
                    v = &b.coef[i << 6];
                    BlockDCT((Int (*)[8])u, u);
                    if (cur_orig->quant_type) {
                        BlockQuantMPEG(u, b.quant, MODE_INTER, type, cur_orig->nonintra_quant_mat, v,GetVopBrightWhite(cur_orig));
                        BlockDequantMPEG(v, b.quant, MODE_INTER, type, cur_orig->nonintra_quant_mat, u);
                    } else {
                        BlockQuantH263(u, b.quant, MODE_INTER, type, v,GetVopBrightWhite(cur_orig)); 
                        BlockDequantH263(v ,b.quant , MODE_INTER, type, u); 
                    }
                    BlockIDCT(u, (Int (*)[8])u,GetVopBrightWhite(cur_orig));
                }
                if (b.fieldDCT)
                    fieldDCTtoFrame(b.diff);
                PutBMB(cur_rec, &b);
                b.cbp = FindCBP(b.coef, MODE_INTER, B_SIZE*B_SIZE);
                BCodeMB(&b, &stats->text_bits);
            }
            b.cur_yoff += MB_SIZE;
            b.cur_coff += B_SIZE;
            b.ref_yoff += MB_SIZE;
            b.ref_coff += B_SIZE;
            b.x += MB_SIZE;
            b.mb_ndx++;
            b.prev_quant = b.quant;
        }
        b.cur_yoff += (MB_SIZE - 1) * cur_orig->width;
        b.cur_coff += (B_SIZE  - 1) * b.cwidth;
        b.ref_yoff += 2*EDGE + (MB_SIZE - 1) * b.ewidth;
        b.ref_coff += EDGE   + (B_SIZE  - 1) * b.ecwidth;
        b.y += MB_SIZE;
    }
    cur_orig->B_quantizer = b.quant;
    stats->mot_shape_text += BitstreamPut(b.mottext,
        cur_orig->vo_id, cur_orig->ident); 
    BitstreamFree(b.mottext);
    stats->mot_shape_text += NextStartCode(cur_orig->vo_id, cur_orig->ident);
    FreeImage(b.motvec);
    FreeImage(b.modes);
    stats->psnr_y = VopPSNR_Y(cur_orig, cur_rec);
    stats->psnr_u = VopPSNR_U(cur_orig, cur_rec);
    stats->psnr_v = VopPSNR_V(cur_orig, cur_rec);
    stats->vop = stats->syntax + stats->mot_shape_text;
    stats->vol += stats->vop; 
    stats->psnr_y_ave += stats->psnr_y;
    stats->psnr_u_ave += stats->psnr_u;
    stats->psnr_v_ave += stats->psnr_v;
    stats->average += stats->vop;
}

/*
 * B-VOP full-pel motion estimation.  Does 16x16 and 4 field motion
 * vector searches.
 */

Void
FullPelMotEstMB_B(BState *b, RefPic *rp)
{
    Int x, y, sad, sad_min, mv_x=0, mv_y=0, vymin;
    SInt *ref = (SInt *)GetImageData(rp->fp_ref) + b->ref_yoff;
    
    /*
     * Do the 16x16 frame macroblock full pel motion estimation
     */
    sad_min = MV_MAX_ERROR;
    for (y = rp->vymin; y <= rp->vymax; y++) {
        for (x = rp->vxmin; x <= rp->vxmax; x++) {
            if ((x == 0) && (y == 0))
                sad = SAD_MacroblockB(&ref[0],
                    b->mb, b->ewidth, sad_min) -
                    (MB_SIZE*MB_SIZE/2 + 1);
            else
                sad = SAD_MacroblockB(&ref[x + y*b->ewidth],
                    b->mb, b->ewidth, sad_min);
          if ((sad < sad_min) ||
              ((sad == sad_min) && (MVLEN(x,y) < MVLEN(mv_x,mv_y)))) {
              sad_min = sad;
              mv_x = x;
              mv_y = y;
          }
       }
    }
    rp->mv[0] = (SInt)(mv_x << 1);
    rp->mv[1] = (SInt)(mv_y << 1);
    if (b->interlaced) {
        vymin = (rp->vymin + 1) & ~1;   /* Fld Y search minimum must be even */
        /*
         * Do field full pel MV search: cur=top, ref=top
         */
        sad_min = MV_MAX_ERROR;
        for (y = vymin; y <= rp->vymax; y += 2) {
            for (x = rp->vxmin; x <= rp->vxmax; x++) {
                sad = SAD_Field16x8B(&ref[x + y*b->ewidth],
                    b->mb, b->ewidth, sad_min);
              if ((sad < sad_min) ||
                  ((sad == sad_min) && (MVLEN(x,y) < MVLEN(mv_x,mv_y)))) {
                  sad_min = sad;
                  mv_x = x;
                  mv_y = y;
              }
           }
        }
        rp->mv[2] = (SInt)(mv_x << 1);
        rp->mv[3] = (SInt)(mv_y << 1);
        /*
         * Do field full pel MV search: cur=top, ref=bot
         */
        sad_min = MV_MAX_ERROR;
        for (y = vymin; y <= rp->vymax; y += 2) {
            for (x = rp->vxmin; x <= rp->vxmax; x++) {
                sad = SAD_Field16x8B(&ref[x + (y + 1)*b->ewidth],
                    b->mb, b->ewidth, sad_min);
              if ((sad < sad_min) ||
                  ((sad == sad_min) && (MVLEN(x,y) < MVLEN(mv_x,mv_y)))) {
                  sad_min = sad;
                  mv_x = x;
                  mv_y = y;
              }
           }
        }
        rp->mv[4] = (SInt)(mv_x << 1);
        rp->mv[5] = (SInt)(mv_y << 1);
        /*
         * Do field full pel MV search: cur=bot, ref=top
         */
        sad_min = MV_MAX_ERROR;
        for (y = vymin; y <= rp->vymax; y += 2) {
            for (x = rp->vxmin; x <= rp->vxmax; x++) {
                sad = SAD_Field16x8B(&ref[x + y*b->ewidth],
                    b->mb + MB_SIZE, b->ewidth, sad_min);
              if ((sad < sad_min) ||
                  ((sad == sad_min) && (MVLEN(x,y) < MVLEN(mv_x,mv_y)))) {
                  sad_min = sad;
                  mv_x = x;
                  mv_y = y;
              }
           }
        }
        rp->mv[6] = (SInt)(mv_x << 1);
        rp->mv[7] = (SInt)(mv_y << 1);
        /*
         * Do field full pel MV search: cur=bot, ref=bot
         */
        sad_min = MV_MAX_ERROR;
        for (y = vymin; y <= rp->vymax; y += 2) {
            for (x = rp->vxmin; x <= rp->vxmax; x++) {
                sad = SAD_Field16x8B(&ref[x + (y + 1)*b->ewidth],
                    b->mb + MB_SIZE, b->ewidth, sad_min);
              if ((sad < sad_min) ||
                  ((sad == sad_min) && (MVLEN(x,y) < MVLEN(mv_x,mv_y)))) {
                  sad_min = sad;
                  mv_x = x;
                  mv_y = y;
              }
           }
        }
        rp->mv[8] = (SInt)(mv_x << 1);
        rp->mv[9] = (SInt)(mv_y << 1);
    }
}

/*
 * Frame (16x16) marcoblock SAD calculation without alpha
 */

Int
SAD_MacroblockB(
   SInt   *ref,       /* <-- Pointer to the upper-left pel of first MB */
   SInt   *cur,       /* <-- Id, second MB (width=16)                  */
   UInt   h_length,   /* <-- Width of first area                       */
   Int    Min_FRAME   /* <-- Minimum prediction error so far           */
   )
{
  Int    i, d, sad = 0;

  for (i = 0; i < MB_SIZE; i++) {
    sad += (  DIFF(ref,cur,0)  + DIFF(ref,cur,1)
            + DIFF(ref,cur,2)  + DIFF(ref,cur,3)
            + DIFF(ref,cur,4)  + DIFF(ref,cur,5)
            + DIFF(ref,cur,6)  + DIFF(ref,cur,7)
            + DIFF(ref,cur,8)  + DIFF(ref,cur,9)
            + DIFF(ref,cur,10) + DIFF(ref,cur,11)
            + DIFF(ref,cur,12) + DIFF(ref,cur,13)
            + DIFF(ref,cur,14) + DIFF(ref,cur,15));

    ref += h_length;
    cur += MB_SIZE;
    if (sad > Min_FRAME)
      return MV_MAX_ERROR;
  }
  return sad;
}

/*
 * Field (16x8) marcoblock SAD calculation without alpha
 */

Int
SAD_Field16x8B(
   SInt   *ref,       /* <-- Pointer to the upper-left pel of first MB */
   SInt   *cur,       /* <-- Id, second MB (width=16)                  */
   UInt   h_length,   /* <-- Width of first area                       */
   Int    Min_FRAME   /* <-- Minimum prediction error so far           */
   )
{
  Int    i, d, sad = 0;

  h_length <<= 1;
  for (i = 0; i < MB_SIZE; i += 2) {
    sad += (  DIFF(ref,cur,0)  + DIFF(ref,cur,1)
            + DIFF(ref,cur,2)  + DIFF(ref,cur,3)
            + DIFF(ref,cur,4)  + DIFF(ref,cur,5)
            + DIFF(ref,cur,6)  + DIFF(ref,cur,7)
            + DIFF(ref,cur,8)  + DIFF(ref,cur,9)
            + DIFF(ref,cur,10) + DIFF(ref,cur,11)
            + DIFF(ref,cur,12) + DIFF(ref,cur,13)
            + DIFF(ref,cur,14) + DIFF(ref,cur,15));

    ref += h_length;
    cur += 2*MB_SIZE;
    if (sad > Min_FRAME)
      return MV_MAX_ERROR;
  }
  return sad;
}

/*
 * Half-pel refinement for B-macroblock (excluding reference)
 */

Void
HalfPelMotEstMB_B(BState *b, RefPic *rp)
{
    SInt *ref = (SInt *)GetImageData(rp->hp_ref) + b->ref_yoff;

    rp->sad[0] = HalfPelB(b, rp, b->mb,
             ref, &rp->mv[0], MB_SIZE);             /* 16x16 */
    if (b->interlaced) {
        rp->sad[1] = HalfPelB(b, rp, b->mb,
                 ref,
                 &rp->mv[2], MB_SIZE/2);            /* cur=top, ref=top */
        rp->sad[2] = HalfPelB(b, rp, b->mb,
                 ref + b->ewidth,
                 &rp->mv[4], MB_SIZE/2);            /* cur=top, ref=bot */
        rp->sad[3] = HalfPelB(b, rp, &b->mb[MB_SIZE],
                 ref,
                 &rp->mv[6], MB_SIZE/2);            /* cur=bot, ref=top */
        rp->sad[4] = HalfPelB(b, rp, &b->mb[MB_SIZE],
                 ref + b->ewidth,
                 &rp->mv[8], MB_SIZE/2);            /* cur=bot, ref=bot */
    }
}

Int
HalfPelB(BState *b, RefPic *rp, SInt *cur, SInt *ref, SInt *mv, Int h)
{
    Int rstride, cstride, dx0, dy0, i, bestdiff, sad, w, d, yinc;
    SInt *c, *r, *clast;

    dx0 = mv[0];
    dy0 = mv[1];
    ref += (dx0 >> 1) + (dy0 >> 1) * b->ewidth;
    if (h == MB_SIZE) {
        w = b->ewidth;
        cstride = 0;
        yinc = 1;
    } else {
        w = 2*b->ewidth;
        cstride = MB_SIZE;
        yinc = 2;
    }
    rstride = w - MB_SIZE;

    c = cur;                                            /* 1 (0.0, 0.0) */
    r = ref;
    for (sad = 0, i = 0; i < h; i++) {
        for (clast = c + MB_SIZE; c < clast; r++) {
            if ((d = r[0] - *c++) < 0)
                sad -= d;
            else
                sad += d;
        }
        c += cstride;
        r += rstride;
    }
    bestdiff = sad;
    if ((dx0 > rp->vxmin) && (dy0 > rp->vymin)) {       /* 2 (-0.5, -0.5) */
        c = cur;
        r = ref;
        for (sad = 0, i = 0; i < h; i++) {
            for (clast = c + MB_SIZE; c < clast; r++) {
                d = (r[-1] + r[0] + r[-w-1] + r[-w] + 2) >> 2;
                if ((d -= *c++) < 0) d = - d;
                sad += d;
            }
            c += cstride;
            r += rstride;
        }
        if ((sad < bestdiff) ||
            ((sad == bestdiff) &&
             (MVLEN(mv[0], mv[1]) > MVLEN(dx0 - 1, dy0 - yinc)))) {
            bestdiff = sad;
            mv[0] = (SInt)(dx0 - 1);
            mv[1] = (SInt)(dy0 - yinc);
        }
    }
    if (dx0 > rp->vxmin) {                              /* 3 (-0.5, 0.0) */
        c = cur;
        r = ref;
        for (sad = 0, i = 0; i < h; i++) {
            for (clast = c + MB_SIZE; c < clast; r++) {
                d = (r[-1] + r[0] + 1) >> 1;
                if ((d -= *c++) < 0) d = - d;
                sad += d;
            }
            c += cstride;
            r += rstride;
        }
        if ((sad < bestdiff) ||
            ((sad == bestdiff) &&
             (MVLEN(mv[0], mv[1]) > MVLEN(dx0 - 1, dy0)))) {
            bestdiff = sad;
            mv[0] = (SInt)(dx0 - 1);
            mv[1] = (SInt)dy0;
        }
    }
    if ((dx0 > rp->vxmin) && (dy0 < rp->vymax)) {       /* 4 (-0.5, 0.5) */
        c = cur;
        r = ref;
        for (sad = 0, i = 0; i < h; i++) {
            for (clast = c + MB_SIZE; c < clast; r++) {
                d = (r[-1] + r[0] + r[w-1] + r[w] + 2) >> 2;
                if ((d -= *c++) < 0) d = - d;
                sad += d;
            }
            c += cstride;
            r += rstride;
        }
        if ((sad < bestdiff) ||
            ((sad == bestdiff) &&
             (MVLEN(mv[0], mv[1]) > MVLEN(dx0 - 1, dy0 + yinc)))) {
            bestdiff = sad;
            mv[0] = (SInt)(dx0 - 1);
            mv[1] = (SInt)(dy0 + yinc);
        }
    }
    if (dy0 < rp->vymax) {                              /* 5 (0.0, 0.5) */
        c = cur;
        r = ref;
        for (sad = 0, i = 0; i < h; i++) {
            for (clast = c + MB_SIZE; c < clast; r++) {
                d = (r[0] + r[w] + 1) >> 1;
                if ((d -= *c++) < 0) d = - d;
                sad += d;
            }
            c += cstride;
            r += rstride;
        }
        if ((sad < bestdiff) ||
            ((sad == bestdiff) &&
             (MVLEN(mv[0], mv[1]) > MVLEN(dx0, dy0 + yinc)))) {
            bestdiff = sad;
            mv[0] = (SInt)dx0;
            mv[1] = (SInt)(dy0 + yinc);
        }
    }
    if ((dx0 < rp->vxmax) && (dy0 < rp->vymax)) {       /* 6 (0.5, 0.5) */
        c = cur;
        r = ref;
        for (sad = 0, i = 0; i < h; i++) {
            for (clast = c + MB_SIZE; c < clast; r++) {
                d = (r[1] + r[0] + r[w+1] + r[w] + 2) >> 2;
                if ((d -= *c++) < 0) d = - d;
                sad += d;
            }
            c += cstride;
            r += rstride;
        }
        if ((sad < bestdiff) ||
            ((sad == bestdiff) &&
             (MVLEN(mv[0], mv[1]) > MVLEN(dx0 + 1, dy0 + yinc)))) {
            bestdiff = sad;
            mv[0] = (SInt)(dx0 + 1);
            mv[1] = (SInt)(dy0 + yinc);
        }
    }
    if (dx0 < rp->vxmax) {                              /* 7 (0.5, 0.0) */
        c = cur;
        r = ref;
        for (sad = 0, i = 0; i < h; i++) {
            for (clast = c + MB_SIZE; c < clast; r++) {
                d = (r[1] + r[0] + 1) >> 1;
                if ((d -= *c++) < 0) d = - d;
                sad += d;
            }
            c += cstride;
            r += rstride;
        }
        if ((sad < bestdiff) ||
            ((sad == bestdiff) &&
             (MVLEN(mv[0], mv[1]) > MVLEN(dx0 + 1, dy0)))) {
            bestdiff = sad;
            mv[0] = (SInt)(dx0 + 1);
            mv[1] = (SInt)dy0;
        }
    }
    if ((dx0 < rp->vxmax) && (dy0 > rp->vymin)) {       /* 8 (0.5, -0.5) */
        c = cur;
        r = ref;
        for (sad = 0, i = 0; i < h; i++) {
            for (clast = c + MB_SIZE; c < clast; r++) {
                d = (r[1] + r[0] + r[-w+1] + r[-w] + 2) >> 2;
                if ((d -= *c++) < 0) d = - d;
                sad += d;
            }
            c += cstride;
            r += rstride;
        }
        if ((sad < bestdiff) ||
            ((sad == bestdiff) &&
             (MVLEN(mv[0], mv[1]) > MVLEN(dx0 + 1, dy0 - yinc)))) {
            bestdiff = sad;
            mv[0] = (SInt)(dx0 + 1);
            mv[1] = (SInt)(dy0 - yinc);
        }
    }
    if (dy0 > rp->vymin) {                              /* 9 (0.0, -0.5) */
        c = cur;
        r = ref;
        for (sad = 0, i = 0; i < h; i++) {
            for (clast = c + MB_SIZE; c < clast; r++) {
                d = (r[0] + r[-w] + 1) >> 1;
                if ((d -= *c++) < 0) d = - d;
                sad += d;
            }
            c += cstride;
            r += rstride;
        }
        if ((sad < bestdiff) ||
            ((sad == bestdiff) &&
             (MVLEN(mv[0], mv[1]) > MVLEN(dx0, dy0 - yinc)))) {
            bestdiff = sad;
            mv[0] = (SInt)dx0;
            mv[1] = (SInt)(dy0 - yinc);
        }
    }
    return(bestdiff);
}
    
/*
 * Compute the SAD for direct mode
 */

Void
DirectSAD(BState *b)
{
    static char dxmin[] = { 0, 2*B_SIZE, 0, 2*B_SIZE };
    static char dxmax[] = { -2*B_SIZE, 0, -2*B_SIZE, 0 };
    static char dymin[] = { 0, 0, 2*B_SIZE, 2*B_SIZE };
    static char dymax[] = { -2*B_SIZE, -2*B_SIZE, 0, 0 };
    static Float zero[] = { (Float)0.0 };
    Int mode, i, x, y, sad, trd, trb;
    SInt px, py, fx[4], fy[4], bx[4], by[4], dmv[4];
    Float *pmvx, *pmvy;

    b->direct_sad = MV_MAX_ERROR;
    b->f.vxmin <<= 1;   /* Convert vector min/max to half pel units */
    b->f.vxmax <<= 1;
    b->f.vymin <<= 1;
    b->f.vymax <<= 1;
    b->b.vxmin <<= 1;
    b->b.vxmax <<= 1;
    b->b.vymin <<= 1;
    b->b.vymax <<= 1;
    if (b->Pmode == NULL) {       /* Future anchor is an I-VOP */
        /*
         * Trick code into using zero motion vectors
         */
        dmv[0] = dmv[1] = dmv[2] = dmv[3] = 0;
        pmvx = pmvy = zero;
    } else {
        i = 2 * b->mbx + 4 * b->mby * b->mb_width;
        pmvx = (Float *)GetImageData(b->Pmvx) + i;
        pmvy = (Float *)GetImageData(b->Pmvy) + i;
        mode = (Int)((SInt *)GetImageData(b->Pmode))[b->mb_ndx];
        switch (mode) {

        default:
            return;

        case MBM_FIELD00:
        case MBM_FIELD01:
        case MBM_FIELD10:
        case MBM_FIELD11:
#if 1                      /* #if 0 to disable field direct mode */
            FieldDirectSAD(b, mode - MBM_FIELD00,
                (SInt)(2*pmvx[0]), (SInt)(2*pmvy[0]),
                (SInt)(2*pmvx[1]), (SInt)(2*pmvy[1]));
#endif
            return;

        case MBM_INTRA:
        case MBM_SKIPPED:
        case MBM_INTER16:
        case MBM_INTER8:
            break;
        }
        dmv[0] = 0;
        dmv[1] = 1;
        dmv[2] = (SInt)(2*b->mb_width);
        dmv[3] = (SInt)(dmv[2] + 1);
    }
    trd = b->bak_orig->frame - b->fwd_orig->frame;
    trb = b->cur_orig->frame - b->fwd_orig->frame;

    for (y = -b->direct_rad; y <= b->direct_rad; y++) {
        for (x = -b->direct_rad; x <= b->direct_rad; x++) {
            for (i = 0; i < 4; i++) {
                px = (SInt)(2 * pmvx[dmv[i]]);
                py = (SInt)(2 * pmvy[dmv[i]]);
                fx[i] = (SInt)((trb * px) / trd + x);
                fy[i] = (SInt)((trb * py) / trd + y);
                if ((fx[i] < (b->f.vxmin + dxmin[i])) ||
                    (fx[i] > (b->f.vxmax + dxmax[i])) ||
                    (fy[i] < (b->f.vymin + dymin[i])) ||
                    (fy[i] > (b->f.vymax + dymax[i])))
                    break;                          /* out-of-range */
                bx[i] = x ? (fx[i] - px) : (SInt)(((trb - trd) * px) / trd);
                by[i] = y ? (fy[i] - py) : (SInt)(((trb - trd) * py) / trd);
                if ((bx[i] < (b->b.vxmin + dxmin[i])) ||
                    (bx[i] > (b->b.vxmax + dxmax[i])) ||
                    (by[i] < (b->b.vymin + dymin[i])) ||
                    (by[i] > (b->b.vymax + dymax[i])))
                    break;                          /* out-of-range */
            }
            if (i < 4)
                continue;
            for (sad = 0, i = 0; i < 4; i++)
                sad += DirectSAD8x8(b, i, fx[i], fy[i], bx[i], by[i]);
            if (sad < b->direct_sad) {
                b->direct_sad = sad;
                b->direct_dx = (SInt)x;
                b->direct_dy = (SInt)y;
            }
        }
    }
#if 0
    printf(
    "%2d,%2d: frameDirect %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f dmv=%d %d\n",
        b->mbx,         b->mby,         2*pmvx[dmv[0]], 2*pmvy[dmv[0]],
        2*pmvx[dmv[1]], 2*pmvy[dmv[1]], 2*pmvx[dmv[2]], 2*pmvy[dmv[2]],
        2*pmvx[dmv[3]], 2*pmvy[dmv[3]], b->direct_dx, b->direct_dy);
#endif
}

/*
 * Compute a direct mode block SAD with respect to the original picture
 */
Int
DirectSAD8x8(BState *b, Int blk, SInt fx, SInt fy, SInt bx, SInt by)
{
    SInt fwd[B_SIZE*B_SIZE], bak[B_SIZE*B_SIZE], *p;
    Int i, d, sad;

    i = b->ref_yoff + ((blk & 1) << 3) + ((blk & 2) << 2) * b->ewidth;
    /*
     * Generate motion compensated blocks
     */
    MotCompBlk((SInt *)GetImageData(b->fwd_rec->y_chan) + i,
        b->ewidth, fwd, B_SIZE, B_SIZE, B_SIZE, fx, fy);
    MotCompBlk((SInt *)GetImageData(b->bak_rec->y_chan) + i,
        b->ewidth, bak, B_SIZE, B_SIZE, B_SIZE, bx, by);
    
    p = &b->mb[((blk & 1) << 3) + ((blk & 2) << 6)];
    sad = 0;
    for (i = 0; i < B_SIZE*B_SIZE; i++) {
        d = *p++ - ((fwd[i] + bak[i] + 1) >> 1);
        if (d < 0) d = - d;
        sad += d;
        if ((i & (B_SIZE - 1)) == (B_SIZE - 1))
            p += B_SIZE;
    }
    return(sad);
}

/*
 * MotCompBlk() returns a half-pel motion compensated block
 */

Void
MotCompBlk(SInt *sp, Int sw,    /* Source pointer & image width */
           SInt *dp, Int dw,    /* Destination pointer & image width */
           Int w, Int h,        /* width & height (h odd ==> field) */
           Int dx, Int dy)      /* half pel motion vector (frame coord) */
{
    Int code, x, y = sw;

    code = dx & 1;
    if (h & 1) {
        h >>= 1;
        code |= (dy & 2);
        dy &= ~3;
        sw <<= 1;
        dw <<= 1;
    } else
        if (dy & 1) code |= 2;

    sp += (dx >> 1) + y * (dy >> 1);

    switch (code) {

    case 0:                                 /* no interpolation */
        for (y = 0; y < h; y++) {
            for (x = 0; x < w; x++)
                dp[x] = sp[x];
            dp += dw;
            sp += sw;
        }
        break;

    case 1:                                 /* horizontal interp */
        for (y = 0; y < h; y++) {
            for (x = 0; x < w; x++)
                dp[x] = (sp[x] + sp[x+1] + 1) >> 1;
            dp += dw;
            sp += sw;
        }
        break;

    case 2:                                 /* vertical interp */
        for (y = 0; y < h; y++) {
            for (x = 0; x < w; x++)
                dp[x] = (sp[x] + sp[x+sw] + 1) >> 1;
            dp += dw;
            sp += sw;
        }
        break;

    case 3:                                 /* vert & horiz interp */
        for (y = 0; y < h; y++) {
            for (x = 0; x < w; x++)
                dp[x] = (sp[x] + sp[x+1] + sp[x+sw] + sp[x+sw+1] + 2) >> 2;
            dp += dw;
            sp += sw;
        }
        break;
    }
}

/*
 * FieldDirectSAD() does the search for the DMV to minimize the SAD
 * for field predicted macroblocks.  This is an extension of progressive
 * direct mode.
 */

Void
FieldDirectSAD(BState *b, Int code, SInt ptx, SInt pty, SInt pbx, SInt pby)
{
    static char dtrt[] = {  0, 0,  1, 1, 0, 0, -1, -1 };
    static char dtrb[] = { -1, 0, -1, 0, 1, 0,  1,  0 };
    SInt fwd[MB_SIZE*MB_SIZE], bak[MB_SIZE*MB_SIZE];
    Int x, y, toff, boff, i, d, sad;
    SInt ftx, fty, btx, bty, trbt, trdt;
    SInt fbx, fby, bbx, bby, trbb, trdb;
    SInt *fy = (SInt *)GetImageData(b->fwd_rec->y_chan) + b->ref_yoff;
    SInt *by = (SInt *)GetImageData(b->bak_rec->y_chan) + b->ref_yoff;

    toff = (code & 2) ? b->ewidth : 0;
    boff = (code & 1) ? b->ewidth : 0;
    if (b->cur_orig->top_field_first) code += 4;
    trdt = 2*(b->bak_orig->frame - b->fwd_orig->frame) + dtrt[code];
    trbt = 2*(b->cur_orig->frame - b->fwd_orig->frame) + dtrt[code];
    trdb = 2*(b->bak_orig->frame - b->fwd_orig->frame) + dtrb[code];
    trbb = 2*(b->cur_orig->frame - b->fwd_orig->frame) + dtrb[code];
    for (y = -b->direct_rad; y <= b->direct_rad; y++) {
        for (x = -b->direct_rad; x <= b->direct_rad; x++) {
            /* Find MVs for the top field */
            ftx = (SInt)((trbt * ptx) / trdt + x);
            fty = (SInt)((trbt * pty) / trdt + y);
            if ((ftx < b->f.vxmin) || (ftx > b->f.vxmax) ||
                (fty < b->f.vymin) || (fty > b->f.vymax))
                continue;                           /* out-of-range */
            btx = x ? (ftx - ptx) : (SInt)(((trbt - trdt) * ptx) / trdt);
            bty = y ? (fty - pty) : (SInt)(((trbt - trdt) * pty) / trdt);
            if ((btx < b->b.vxmin) || (btx > b->b.vxmax) ||
                (bty < b->b.vymin) || (bty > b->b.vymax))
                continue;                           /* out-of-range */

            /* Find MVs for the bottom field */
            fbx = (SInt)((trbb * pbx) / trdb + x);
            fby = (SInt)((trbb * pby) / trdb + y);
            if ((fbx < b->f.vxmin) || (fbx > b->f.vxmax) ||
                (fby < b->f.vymin) || (fby > b->f.vymax))
                continue;                           /* out-of-range */
            bbx = x ? (fbx - pbx) : (SInt)(((trbb - trdb) * pbx) / trdb);
            bby = y ? (fby - pby) : (SInt)(((trbb - trdb) * pby) / trdb);
            if ((bbx < b->b.vxmin) || (bbx > b->b.vxmax) ||
                (bby < b->b.vymin) || (bby > b->b.vymax))
                continue;                           /* out-of-range */
            
            /* Obtain the forward and backward parts of the top field */
            MotCompBlk(fy + toff, b->ewidth, fwd, MB_SIZE,
                MB_SIZE, MB_SIZE + 1, ftx, fty);
            MotCompBlk(by,        b->ewidth, bak, MB_SIZE,
                MB_SIZE, MB_SIZE + 1, btx, bty);

            /* Obtain the forward and backward parts of the bottom field */
            MotCompBlk(fy + boff,      b->ewidth, fwd + MB_SIZE, MB_SIZE,
                MB_SIZE, MB_SIZE + 1, fbx, fby);
            MotCompBlk(by + b->ewidth, b->ewidth, bak + MB_SIZE, MB_SIZE,
                MB_SIZE, MB_SIZE + 1, bbx, bby);

            for (sad = 0, i = 0; i < MB_SIZE*MB_SIZE; i++) {
                d = b->mb[i] - ((fwd[i] + bak[i] + 1) >> 1);
                if (d < 0) d = - d;
                sad += d;
            }
            if (sad < b->direct_sad) {
                b->direct_sad = sad;
                b->direct_dx = (SInt)x;
                b->direct_dy = (SInt)y;
            }
        }
    }
#if 0
    printf("%2d,%2d: fieldDirect %d top=%d %d bot=%d %d dmv=%d %d\n",
        b->mbx, b->mby, code, ptx, pty, pbx, pby, b->direct_dx, b->direct_dy);
#endif
}

/*
 * BVOPdecision() determines the prediction type for the current
 * macroblock
 */

Void
BVOPdecision(BState *b)
{
    SInt fwd[MB_SIZE*MB_SIZE], bak[MB_SIZE*MB_SIZE];
    SInt *fp = (SInt *)GetImageData(b->fwd_rec->y_chan) + b->ref_yoff;
    SInt *bp = (SInt *)GetImageData(b->bak_rec->y_chan) + b->ref_yoff;
    Int i, sad, d, frame_mode, frame_sad, field_mode, field_sad;
    Int fldfwd_sad, fldbak_sad;
    SInt *mv, *fmv[2], *bmv[2];

    b->mode = 0;
    if (b->f.sad[0] <= b->b.sad[0]) {
        frame_mode = MBM_B_FWDFRM;      /* Fwd is best 1 MV frame mode */
        frame_sad = b->f.sad[0];
    } else {
        frame_mode = MBM_B_BAKFRM;      /* Bak is best 1 MV frame mode */
        frame_sad = b->b.sad[0];
    }
    /*
     * Find the best frame predictor (minimum SAD).  To do this
     * need bidirection SAD computed below
     */
    MotCompBlk(fp, b->ewidth, fwd, MB_SIZE, MB_SIZE, MB_SIZE,
        b->f.mv[0], b->f.mv[1]);
    MotCompBlk(bp, b->ewidth, bak, MB_SIZE, MB_SIZE, MB_SIZE,
        b->b.mv[0], b->b.mv[1]);
    for (sad = 0, i = 0; i < MB_SIZE*MB_SIZE; i++) {
        d = b->mb[i] - ((fwd[i] + bak[i] + 1) >> 1);
        if (d < 0) d = - d;
        sad += d;
    }
    sad += (MB_SIZE*MB_SIZE/2) + 1;     /* Bias against AVE because more MVs */
    if (sad < frame_sad) {
        frame_mode = MBM_B_AVEFRM;      /* Bidiectional is best frame mode */
        frame_sad = sad;
    }

#if 0
    { static char *dir[] = { "Fwd", "Bak", "Ave" };
    printf("%2d,%2d: fwdsad=%4d %4d %4d %4d %4d baksad=%4d %4d %4d %4d %4d\n",
        b->mbx, b->mby,
        b->f.sad[0], b->f.sad[1], b->f.sad[2], b->f.sad[3], b->f.sad[4],
        b->b.sad[0], b->b.sad[1], b->b.sad[2], b->b.sad[3], b->b.sad[4]);
    printf("%2d,%2d: fwdmv=%d,%d %d,%d %d,%d %d,%d %d,%d bakmv=%d,%d %d,%d %d,%d %d,%d %d,%d\n",
        b->mbx, b->mby,         b->f.mv[0], b->f.mv[1], b->f.mv[2], b->f.mv[3],
        b->f.mv[4], b->f.mv[5], b->f.mv[6], b->f.mv[7], b->f.mv[8], b->f.mv[9],
        b->b.mv[0], b->b.mv[1], b->b.mv[2], b->b.mv[3], b->b.mv[4], b->b.mv[5],
        b->b.mv[6], b->b.mv[7], b->b.mv[8], b->b.mv[9]);
    printf("%2d,%2d: Frm%s=%4d", b->mbx, b->mby, dir[frame_mode],
        (frame_mode == MBM_B_AVEFRM) ?
        (frame_sad - (MB_SIZE*MB_SIZE/2) - 1) : frame_sad); }
#endif

    if (b->interlaced) {
        /* 
         * Determine best field predictor for each field and accumulate
         * the SADs for forward and backward field prediction.  Ties are
         * resolved in favor of same-parity prediction. Indexing conventions:
         *
         *      mv[]    sad[]       Predictor
         *      0,1     0           Frame prediction
         *      2,3     1           Field: cur=top, ref=top
         *      4,5     2           Field: cur=top, ref=bot
         *      6,7     3           Field: cur=bot, ref=top
         *      8,9     4           Field: cur=bot, ref=bot
         */
        if (b->f.sad[2] <  b->f.sad[1]) {       /* Choose best fwd top fld ref */
            b->mode |= MBM_B_FWDTOP;
            fldfwd_sad = b->f.sad[2];
            MotCompBlk(fp + b->ewidth, b->ewidth,
                fwd, MB_SIZE, MB_SIZE, MB_SIZE + 1,
                b->f.mv[4], b->f.mv[5]);
            fmv[0] = &b->f.mv[4];
        } else {
            fldfwd_sad = b->f.sad[1];
            MotCompBlk(fp, b->ewidth,
                fwd, MB_SIZE, MB_SIZE, MB_SIZE + 1,
                b->f.mv[2], b->f.mv[3]);
            fmv[0] = &b->f.mv[2];
        }
        if (b->f.sad[4] <= b->f.sad[3]) {       /* Choose best fwd bot fld ref */
            b->mode |= MBM_B_FWDBOT;
            fldfwd_sad += b->f.sad[4];
            MotCompBlk(fp + b->ewidth, b->ewidth,
                &fwd[MB_SIZE], MB_SIZE, MB_SIZE, MB_SIZE + 1,
                b->f.mv[8], b->f.mv[9]);
            fmv[1] = &b->f.mv[8];
        } else {
            fldfwd_sad += b->f.sad[3];
            MotCompBlk(fp, b->ewidth,
                &fwd[MB_SIZE], MB_SIZE, MB_SIZE, MB_SIZE + 1,
                b->f.mv[6], b->f.mv[7]);
            fmv[1] = &b->f.mv[6];
        }
        if (b->b.sad[2] <  b->b.sad[1]) {       /* Choose best bak top fld ref */
            b->mode |= MBM_B_BAKTOP;
            fldbak_sad = b->b.sad[2];
            MotCompBlk(bp + b->ewidth, b->ewidth,
                bak, MB_SIZE, MB_SIZE, MB_SIZE + 1,
                b->b.mv[4], b->b.mv[5]);
            bmv[0] = &b->b.mv[4];
        } else {
            fldbak_sad = b->b.sad[1];
            MotCompBlk(bp, b->ewidth,
                bak, MB_SIZE, MB_SIZE, MB_SIZE + 1,
                b->b.mv[2], b->b.mv[3]);
            bmv[0] = &b->b.mv[2];
        }
        if (b->b.sad[4] <= b->b.sad[3]) {       /* Choose best bak bot fld ref */
            b->mode |= MBM_B_BAKBOT;
            fldbak_sad += b->b.sad[4];
            MotCompBlk(bp + b->ewidth, b->ewidth,
                &bak[MB_SIZE], MB_SIZE, MB_SIZE, MB_SIZE + 1,
                b->b.mv[8], b->b.mv[9]);
            bmv[1] = &b->b.mv[8];
        } else {
            fldbak_sad += b->b.sad[3];
            MotCompBlk(bp, b->ewidth,
                &bak[MB_SIZE], MB_SIZE, MB_SIZE, MB_SIZE + 1,
                b->b.mv[6], b->b.mv[7]);
            bmv[1] = &b->b.mv[6];
        }
        
        if (fldbak_sad > fldfwd_sad) {
            field_mode = MBM_B_FWDFLD;      /* Fwd is best 1 dir field mode */
            field_sad = fldfwd_sad;
        } else {
            field_mode = MBM_B_BAKFLD;      /* Bak is best 1 dir field mode */
            field_sad = fldbak_sad;
        }

        /*
         * Compute the field bidirectional luminance macroblock in
         * order to compute its SAD.
         */
        for (sad = 0, i = 0; i < MB_SIZE*MB_SIZE; i++) {
            d = b->mb[i] - ((fwd[i] + bak[i] + 1) >> 1);
            if (d < 0) d = - d;
            sad += d;
        }
        sad += (MB_SIZE*MB_SIZE/1) + 1;     /* Bias against AVE because more MVs */
        field_sad += (MB_SIZE*MB_SIZE/2) + 1;
        if (sad < field_sad) {
            field_sad = sad;
            field_mode = MBM_B_AVEFLD;
        }

#if 0
        { static char *dir[] = { "Fwd", "Bak", "Ave" };
        printf(" fld%s%X=%4d", dir[field_mode - 4],
            (b->mode & MBM_B_REFFLDS) >> 4,
            (field_mode == MBM_B_AVEFLD) ?
            (field_sad - (MB_SIZE*MB_SIZE/1) - 1) :
            (field_sad - (MB_SIZE*MB_SIZE/2) - 1)); }
#endif

        /*
         * Compare frame & field SADs.
         */
        if (field_sad < frame_sad) {
            b->mode |= field_mode;
            sad = field_sad;
        } else {
            b->mode = (SInt)frame_mode;
            sad = frame_sad;
        }
    } else {
        sad = frame_sad;
        b->mode = (SInt)frame_mode;
    }
    #if 1           /* #if 0 to disable direct mode */
    /* Compare to direct mode */
    if (sad >= (b->direct_sad - (MB_SIZE*MB_SIZE/2) - 1))
        b->mode = MBM_B_DIRECT;
#endif
    /* Store mode and motion vectors */
    ((SInt *)GetImageData(b->modes))[b->mb_ndx] = b->mode;
    mv = (SInt *)GetImageData(b->motvec) + 8*b->mb_ndx;
    memset(mv, 0, 8*sizeof(SInt));
    switch (b->mode & MBM_B_MODE) {

    case MBM_B_FWDFRM:
        mv[0] = b->f.mv[0];   mv[1] = b->f.mv[1];
        break;

    case MBM_B_BAKFRM:
        mv[0] = b->b.mv[0];   mv[1] = b->b.mv[1];
        break;

    case MBM_B_AVEFRM:
        mv[0] = b->f.mv[0];   mv[1] = b->f.mv[1];
        mv[2] = b->b.mv[0];   mv[3] = b->b.mv[1];
        break;

    case MBM_B_FWDFLD:
        mv[0] = fmv[0][0];    mv[1] = fmv[0][1];
        mv[2] = fmv[1][0];    mv[3] = fmv[1][1];
        break;

    case MBM_B_BAKFLD:
        mv[0] = bmv[0][0];    mv[1] = bmv[0][1];
        mv[2] = bmv[1][0];    mv[3] = bmv[1][1];
        break;

    case MBM_B_AVEFLD:
        mv[0] = fmv[0][0];    mv[1] = fmv[0][1];
        mv[2] = fmv[1][0];    mv[3] = fmv[1][1];
        mv[4] = bmv[0][0];    mv[5] = bmv[0][1];
        mv[6] = bmv[1][0];    mv[7] = bmv[1][1];
        break;

    case MBM_B_DIRECT:
        mv[0] = b->direct_dx;
        mv[1] = b->direct_dy;
        b->quant = b->prev_quant;
        break;
    }
#if 0
    { static char *mode[] = {
        "FrmFwd", "FrmBak", "FrmAve", "Direct", "??", "FldFwd", "FldBak", "FldAve" };
    printf(" direct[%2d,%2d]=%4d win=%s mvs=%d %d %d %d %d %d %d %d\n",
        b->direct_dx, b->direct_dy, b->direct_sad, mode[b->mode & MBM_B_MODE],
        mv[0], mv[1], mv[2], mv[3], mv[4], mv[5], mv[6], mv[7]); }
#endif
}

/*
 * BMotComp() does motion compensation, leaving the error (residual)
 * in the b->mb[] working region.  The macroblock differences will be
 * organized as blocks (ie. Y0, Y1, Y2, Y3, Cb, Cr) of 64 entries.
 */

Void
BMotComp(BState *b)
{
    static unsigned char mbtob[] = {
          0, 64,  8, 72, 16, 80, 24, 88, 32, 96, 40,104, 48,112, 56,120,
        128,192,136,200,144,208,152,216,160,224,168,232,176,240,184,248,
    };
    SInt tmp2[6*B_SIZE*B_SIZE];
    Int i, j;
    SInt *mv;
    SInt *cu, *cv;

    mv = (SInt *)GetImageData(b->motvec) + 8*b->mb_ndx;
    switch (b->mode & MBM_B_MODE) {

    case MBM_B_FWDFRM:
        BMBMotComp(b->fwd_rec, b, b->mc, -1, 0, mv[0], mv[1]);
        break;

    case MBM_B_BAKFRM:
        BMBMotComp(b->bak_rec, b, b->mc, -1, 0, mv[0], mv[1]);
        break;

    case MBM_B_AVEFRM:
        BMBMotComp(b->fwd_rec, b, b->mc, -1, 0, mv[0], mv[1]);
        BMBMotComp(b->bak_rec, b, tmp2,  -1, 0, mv[2], mv[3]);
        for (i = 0; i < 6*B_SIZE*B_SIZE; i++)
            b->mc[i] = (b->mc[i] + tmp2[i] + 1) >> 1;
        break;

    case MBM_B_FWDFLD:
        BMBMotComp(b->fwd_rec, b, b->mc, 0, b->mode & MBM_B_FWDTOP, mv[0], mv[1]);
        BMBMotComp(b->fwd_rec, b, b->mc, 1, b->mode & MBM_B_FWDBOT, mv[2], mv[3]);
        break;

    case MBM_B_BAKFLD:
        BMBMotComp(b->bak_rec, b, b->mc, 0, b->mode & MBM_B_BAKTOP, mv[0], mv[1]);
        BMBMotComp(b->bak_rec, b, b->mc, 1, b->mode & MBM_B_BAKBOT, mv[2], mv[3]);
        break;

    case MBM_B_AVEFLD:
        BMBMotComp(b->fwd_rec, b, b->mc, 0, b->mode & MBM_B_FWDTOP, mv[0], mv[1]);
        BMBMotComp(b->fwd_rec, b, b->mc, 1, b->mode & MBM_B_FWDBOT, mv[2], mv[3]);
        BMBMotComp(b->bak_rec, b, tmp2,  0, b->mode & MBM_B_BAKTOP, mv[4], mv[5]);
        BMBMotComp(b->bak_rec, b, tmp2,  1, b->mode & MBM_B_BAKBOT, mv[6], mv[7]);
        for (i = 0; i < 6*B_SIZE*B_SIZE; i++)
            b->mc[i] = (b->mc[i] + tmp2[i] + 1) >> 1;
        break;

    case MBM_B_DIRECT:
        DirectMotComp(b, b->mc, tmp2);
        break;
    }
    /*
     * Subtract luminance prediction from original image and re-order
     * luminance difference from (MB_SIZE x MB_SIZE) to 6*(B_SIZE x B_SIZE)
     */
    for (i = 0; i < MB_SIZE*MB_SIZE; i++)
        b->diff[mbtob[i >> 3] + (i & 7)] = b->mb[i] - b->mc[i];
    /*
     * Compute the chrominance difference in b->mb[]
     */
    cu = (SInt *)GetImageData(b->cur_orig->u_chan) + b->cur_coff;
    cv = (SInt *)GetImageData(b->cur_orig->v_chan) + b->cur_coff;
    for (i = 0; i < B_SIZE*B_SIZE; i++) {
        j = i & (B_SIZE - 1);
        b->diff[4*B_SIZE*B_SIZE + i] = cu[j] - b->mc[4*B_SIZE*B_SIZE + i];
        b->diff[5*B_SIZE*B_SIZE + i] = cv[j] - b->mc[5*B_SIZE*B_SIZE + i];
        if (j == (B_SIZE - 1)) {
            cu += b->cwidth;
            cv += b->cwidth;
        }
    }
}

/*
 * BMBMotComp() motion compensates all components of the current
 * macroblock with respect to one motion vector.  The actions are
 * controlled by curfld & reffld:
 *
 *  curfld  reffld
 *    -1      X         Frame motion compensation
 *     0      0         Field motion compensation: cur=top ref=top
 *     0     !0         Field motion compensation: cur=top ref=bot
 *     1      0         Field motion compensation: cur=bot ref=top
 *     1     !0         Field motion compensation: cur=bot ref=bot
 */

Void
BMBMotComp(Vop *vop, BState *b, SInt *out,
           Int curfld, Int reffld, SInt dx, SInt dy)
{
    SInt *ry = (SInt *)GetImageData(vop->y_chan) + b->ref_yoff;
    SInt *ru = (SInt *)GetImageData(vop->u_chan) + b->ref_coff;
    SInt *rv = (SInt *)GetImageData(vop->v_chan) + b->ref_coff;

    if (curfld < 0) {
        /*
         * Frame motion compensation
         */
        MotCompBlk(ry, b->ewidth, out, MB_SIZE,
            MB_SIZE, MB_SIZE, dx, dy);
        dx = (dx & 3) ? ((dx >> 1) | 1) : (dx >> 1);
        dy = (dy & 3) ? ((dy >> 1) | 1) : (dy >> 1);
        MotCompBlk(ru, b->ecwidth, &out[4*B_SIZE*B_SIZE], B_SIZE,
            B_SIZE, B_SIZE, dx, dy);
        MotCompBlk(rv, b->ecwidth, &out[5*B_SIZE*B_SIZE], B_SIZE,
            B_SIZE, B_SIZE, dx, dy);
        return;
    }
    /*
     * Field motion compensation
     */
    if (reffld) {
        ry += b->ewidth;
        ru += b->ecwidth;
        rv += b->ecwidth;
    }
    curfld <<= 4;
    MotCompBlk(ry, b->ewidth, out + curfld, MB_SIZE,
        MB_SIZE, MB_SIZE + 1, dx, dy);
    dx = (dx & 3) ? ((dx >> 1) | 1) : (dx >> 1);
    dy = (dy & 6) ? ((dy >> 1) | 2) : (dy >> 1);
    curfld >>= 1;
    MotCompBlk(ru, b->ecwidth, &out[4*B_SIZE*B_SIZE] + curfld, B_SIZE,
        B_SIZE, B_SIZE + 1, dx, dy);
    MotCompBlk(rv, b->ecwidth, &out[5*B_SIZE*B_SIZE] + curfld, B_SIZE,
        B_SIZE, B_SIZE + 1, dx, dy);
}

/*
 * DirectMotComp() returns the motion compensated prediction for a
 * direct coded B-VOP macroblock.
 */

Void
DirectMotComp(BState *b, SInt *tmp1, SInt *tmp2)
{
    static char cround[] = {0,0,0,1,1,1,1,1,1,1,1,1,1,1,2,2};
    static Float zero[] = { (Float)0.0 };
    Int i, rec_off, tmp_off;
    SInt px, py, fx, fy, bx, by, trd, trb, dmv[4];
    Float *pmvx, *pmvy;
    SInt mode, cfx, cfy, cbx, cby, *fwdy, *baky;

    if (b->Pmode == NULL) {       /* Future anchor is an I-VOP */
        /*
         * Trick code into using zero motion vectors
         */
        dmv[0] = dmv[1] = dmv[2] = dmv[3] = 0;
        pmvx = pmvy = zero;
    } else {
        i = 2 * b->mbx + 4 * b->mby * b->mb_width;
        pmvx = (Float *)GetImageData(b->Pmvx) + i;
        pmvy = (Float *)GetImageData(b->Pmvy) + i;
        mode = ((SInt *)GetImageData(b->Pmode))[b->mb_ndx];
        switch (mode) {

        default:
            return;

        case MBM_FIELD00:
        case MBM_FIELD01:
        case MBM_FIELD10:
        case MBM_FIELD11:
            FieldDirectMotComp(b, mode - MBM_FIELD00, tmp1, tmp2,
                (SInt)(2*pmvx[0]), (SInt)(2*pmvy[0]),
                (SInt)(2*pmvx[1]), (SInt)(2*pmvy[1]));
            return;
                
        case MBM_INTRA:
        case MBM_SKIPPED:
        case MBM_INTER16:
        case MBM_INTER8:
            break;
        }
        dmv[0] = 0;
        dmv[1] = 1;
        dmv[2] = (SInt)(2*b->mb_width);
        dmv[3] = (SInt)(dmv[2] + 1);
    }
    trd = (SInt)(b->bak_orig->frame - b->fwd_orig->frame);
    trb = (SInt)(b->cur_orig->frame - b->fwd_orig->frame);
    cfx = cfy = cbx = cby = 0;
    fwdy = (SInt *)GetImageData(b->fwd_rec->y_chan) + b->ref_yoff;
    baky = (SInt *)GetImageData(b->bak_rec->y_chan) + b->ref_yoff;
    for (i = 0; i < 4; i++) {
        px = (SInt)(2 * pmvx[dmv[i]]);
        py = (SInt)(2 * pmvy[dmv[i]]);
        fx = (trb * px) / trd + b->direct_dx;
        fy = (trb * py) / trd + b->direct_dy;
        bx = b->direct_dx ? (fx - px) : (((trb - trd) * px) / trd);
        by = b->direct_dy ? (fy - py) : (((trb - trd) * py) / trd);
        cfx += fx;
        cfy += fy;
        cbx += bx;
        cby += by;
        /*
         * Generate motion compensated luminance blocks
         */
        rec_off = ((i & 1) << 3) + ((i & 2) << 2) * b->ewidth;
        tmp_off = ((i & 1) << 3) + ((i & 2) << 6);
        MotCompBlk(fwdy + rec_off, b->ewidth, tmp1 + tmp_off,
            MB_SIZE, B_SIZE, B_SIZE, fx, fy);
        MotCompBlk(baky + rec_off, b->ewidth, tmp2 + tmp_off,
            MB_SIZE, B_SIZE, B_SIZE, bx, by);
    }
    /*
     * Chrominance block predictions
     */
    fx = ((cfx >> 3) & ~1) + cround[cfx & 0xF];
    fy = ((cfy >> 3) & ~1) + cround[cfy & 0xF];
    bx = ((cbx >> 3) & ~1) + cround[cbx & 0xF];
    by = ((cby >> 3) & ~1) + cround[cby & 0xF];
    MotCompBlk((SInt *)GetImageData(b->fwd_rec->u_chan) + b->ref_coff,
        b->ecwidth, &tmp1[4*B_SIZE*B_SIZE], B_SIZE, B_SIZE, B_SIZE, fx, fy);
    MotCompBlk((SInt *)GetImageData(b->fwd_rec->v_chan) + b->ref_coff,
        b->ecwidth, &tmp1[5*B_SIZE*B_SIZE], B_SIZE, B_SIZE, B_SIZE, fx, fy);
    MotCompBlk((SInt *)GetImageData(b->bak_rec->u_chan) + b->ref_coff,
        b->ecwidth, &tmp2[4*B_SIZE*B_SIZE], B_SIZE, B_SIZE, B_SIZE, bx, by);
    MotCompBlk((SInt *)GetImageData(b->bak_rec->v_chan) + b->ref_coff,
        b->ecwidth, &tmp2[5*B_SIZE*B_SIZE], B_SIZE, B_SIZE, B_SIZE, bx, by);
    /*
     * Average forward and backward predictions
     */
    for (i = 0; i < 6*B_SIZE*B_SIZE; i++)
        tmp1[i] = (tmp1[i] + tmp2[i] + 1) >> 1;
}

/*
 * FieldDirectMotComp() does the search for the DMV to minimize the SAD
 * for field predicted macroblocks.  This is an extension of progressive
 * direct mode.
 */

Void
FieldDirectMotComp(BState *b, Int code, SInt *fwd, SInt *bak,
               SInt ptx, SInt pty, SInt pbx, SInt pby)
{
    static char dtrt[] = {  0, 0,  1, 1, 0, 0, -1, -1 };
    static char dtrb[] = { -1, 0, -1, 0, 1, 0,  1,  0 };
    SInt ftx, fty, btx, bty, trbt, trdt;
    SInt fbx, fby, bbx, bby, trbb, trdb;
    Int i;

    if (b->cur_orig->top_field_first) code += 4;
    trdt = 2*(b->bak_orig->frame - b->fwd_orig->frame) + dtrt[code];
    trbt = 2*(b->cur_orig->frame - b->fwd_orig->frame) + dtrt[code];
    trdb = 2*(b->bak_orig->frame - b->fwd_orig->frame) + dtrb[code];
    trbb = 2*(b->cur_orig->frame - b->fwd_orig->frame) + dtrb[code];

    /* Find MVs for the top field */
    ftx = (SInt)((trbt * ptx) / trdt + b->direct_dx);
    fty = (SInt)((trbt * pty) / trdt + b->direct_dy);
    btx = b->direct_dx ? (ftx - ptx) : (SInt)(((trbt - trdt) * ptx) / trdt);
    bty = b->direct_dy ? (fty - pty) : (SInt)(((trbt - trdt) * pty) / trdt);

    /* Find MVs for the bottom field */
    fbx = (SInt)((trbb * pbx) / trdb + b->direct_dx);
    fby = (SInt)((trbb * pby) / trdb + b->direct_dy);
    bbx = b->direct_dx ? (fbx - pbx) : (SInt)(((trbb - trdb) * pbx) / trdb);
    bby = b->direct_dy ? (fby - pby) : (SInt)(((trbb - trdb) * pby) / trdb);
    
    /* Obtain the forward and backward parts of the top field */
    BMBMotComp(b->fwd_rec, b, fwd, 0, code & 2, ftx, fty);
    BMBMotComp(b->bak_rec, b, bak, 0, 0,        btx, bty);

    /* Obtain the forward and backward parts of the bottom field */
    BMBMotComp(b->fwd_rec, b, fwd, 1, code & 1, fbx, fby);
    BMBMotComp(b->bak_rec, b, bak, 1, 1,        bbx, bby);
    /*
     * Average forward and backward predictions
     */
    for (i = 0; i < 6*B_SIZE*B_SIZE; i++)
        fwd[i] = (fwd[i] + bak[i] + 1) >> 1;
}


/*
 * PutBMB() copies the reconstructed B-VOP macroblock (computed by adding the
 * reconstructed error (b->mb[]) and the motion compensated macroblock
 * (b->mc[]) into the reconstructed picture.
 */

Void
PutBMB(Vop *vop, BState *b)
{
    static unsigned char blkloc[] = {
        0, B_SIZE, B_SIZE*MB_SIZE, B_SIZE*MB_SIZE + B_SIZE };
    SInt *p, *q, t;
    Int i, j, *s;

    #if 1           /* #if 0 to write prediction */
    /*
     * Add decoded error image to the motion compensated predition
     * clamping at 0 and 255.
     */
    s = b->diff;
    for (i = 0; i < 4; i++) {
        q = &b->mc[blkloc[i]];
        for (j = 0; j < B_SIZE*B_SIZE; j++) {
            if ((t = *q + *s++) & ~0xFF)
                t = (t < 0) ? 0 : 255;
            *q++ = t;
            if ((j & (B_SIZE - 1)) == (B_SIZE - 1))
                q += B_SIZE;
        }
    }
    q = &b->mc[4*B_SIZE*B_SIZE];
    for (j = 0; j < 2*B_SIZE*B_SIZE; j++) {
        if ((t = *q + *s++) & ~0xFF)
            t = (t < 0) ? 0 : 255;
        *q++ = t;
    }
#endif
    /*
     * Store reconstructed macroblock into the reconstructed image
     */
    q = (SInt *)GetImageData(vop->y_chan) + b->cur_yoff;
    p = b->mc;
    for (i = 0; i < MB_SIZE; i++) {
        memcpy(q, p, MB_SIZE*sizeof(SInt));
        p += MB_SIZE;
        q += vop->width;
    }
    q = (SInt *)GetImageData(vop->u_chan) + b->cur_coff;
    for (i = 0; i < B_SIZE; i++) {
        memcpy(q, p, B_SIZE*sizeof(SInt));
        p += B_SIZE;
        q += b->cwidth;
    }
    q = (SInt *)GetImageData(vop->v_chan) + b->cur_coff;
    for (i = 0; i < B_SIZE; i++) {
        memcpy(q, p, B_SIZE*sizeof(SInt));
        p += B_SIZE;
        q += b->cwidth;
    }
}

/*
 * BCodeMB() codes a B-VOP macroblock
 */

#ifdef BMINI

Int Bmbt_hist[21];

Void
BCodeMB(BState *b, Bits *bits)
{
    static Char nmvcount[]   = { -1, 0, 1, -1, 2 };
    static Char nmv[]        = { 1, 1, 2, 1, -1, 2, 2, 4 };
    static Char frame_mode[] = { 0, 1, 2, 3, -1, 0, 1, 2 };
    static Char bmt_stats[]  = { 1, 2, 3, 0, -1, 4, 5, 6 };
        static Int opaque[4] = { 0, 0, 0, 0 };
        static mbt_cwd[] = {
            7, 4, 5, 4, 4,
            6, 6, 7, 5, 3,
            3, 2, 3, 4, 3,
            5, 5, 6, 7, 2,
        };
        static mbt_len[] = {
            3, 7, 7, 6, 3,
            3, 7, 7, 6, 3,
            4, 8, 8, 8, 5,
            3, 8, 8, 8, 3,
        };

    static short pmvtab[] = {
    /*  ================= Current Macroblock =================  */
    /*  FWDFRM  BAKFRM  AVEFRM  DIRECT  xxxxxx  FWDFLD  BAKFLD  AVEFRM  */
        0x0001, 0x0003, 0x0031, 0x0000, 0x0000, 0x0021, 0x0043, 0x4321 };
    SInt *mv;
    Int fcode, bmt, i, j, n, pmv_select;

    bmt = b->mode & MBM_B_MODE;
    mv = (SInt *)GetImageData(b->motvec) + 8*b->mb_ndx;
    /* Check if current macroblock is skipped */
    if ((b->cbp == 0) && (b->mode == MBM_B_DIRECT) &&
        (mv[0] == 0) && (mv[1] == 0)) {
        bits->no_skipped++;
        b->prev_bmt = MBM_B_DIRECT;
        b->quant = b->prev_quant;
#if 1
        BitstreamPutBits(b->mottext, 2, 4);
        bits->MBTYPE += 4;
        Bmbt_hist[20]++;
        return;
    }
    i = 5*frame_mode[bmt] + (b->cbp ? (b->cbp & 3) : 4);
    BitstreamPutBits(b->mottext, mbt_cwd[i], mbt_len[i]);
    bits->MBTYPE += mbt_len[i];
    Bmbt_hist[i]++;
    if (b->cbp != 0) {
        bits->CBPB += PutCBPY(b->cbp >> 2, 0, opaque , b->mottext);
#else
        BitstreamPutBits(b->mottext, 0, 1);                              /*MODB*/
        bits->MODB++;
        return;                                           /* skipped macroblock */
    }
	send_MODB_MTYPE(frame_mode[bmt], b->cbp, 0, bits, b->mottext);
    if (b->cbp != 0) {
#if 1
        if (b->cbp & 3) {
            BitstreamPutBits(b->mottext, b->cbp & 3, 3);
            bits->CBPB += 3;
        } else {
            BitstreamPutBits(b->mottext, 1, 1);
            bits->CBPB++;
        }
        bits->CBPB += PutCBPY(b->cbp >> 2, 0, opaque , b->mottext);
#else
        BitstreamPutBits(b->mottext, b->cbp, 6);                          /*CBPB*/
        bits->CBPB += 6;
#endif
#endif
        if (bmt != MBM_B_DIRECT) {
#if 1
            switch (b->quant - b->prev_quant) {
                case -1:  BitstreamPutBits(b->mottext, 5, 3);          bits->DQUANT += 3; break;
                case  0:  BitstreamPutBits(b->mottext, 3, 2);          bits->DQUANT += 2; break;
                case  1:  BitstreamPutBits(b->mottext, 4, 3);          bits->DQUANT += 3; break;
                default:  BitstreamPutBits(b->mottext, b->quant/2, 5); bits->DQUANT += 5; break;
            }
#else
			send_B_DQUANT(0, b->quant - b->prev_quant, bits, b->mottext);
#endif
        }
    } else {
        b->quant = b->prev_quant;
        bits->no_noDCT++;
    }
    if (b->interlaced) {
        if (b->cbp) {
            BitstreamPutBits(b->mottext, b->fieldDCT, 1);                 /*DCT_TYPE*/
            bits->fieldDCT += b->fieldDCT;
            bits->interlaced += 1;
        }
        switch (bmt) {

        default:
            BitstreamPutBits(b->mottext, 0, 1);                    /*field_prediction*/
            bits->interlaced += 1;
            break;

        case MBM_B_FWDFLD:
            i = 4;
            if (b->mode & MBM_B_FWDTOP) i |= 2;
            if (b->mode & MBM_B_FWDBOT) i |= 1;
            BitstreamPutBits(b->mottext, i, 3);
            bits->interlaced += 3;
            bits->no_field++;
            break;

        case MBM_B_BAKFLD:
            i = 4;
            if (b->mode & MBM_B_BAKTOP) i |= 2;
            if (b->mode & MBM_B_BAKBOT) i |= 1;
            BitstreamPutBits(b->mottext, i, 3);
            bits->interlaced += 3;
            bits->no_field++;
            break;

        case MBM_B_AVEFLD:
            i = 16;
            if (b->mode & MBM_B_FWDTOP) i |= 8;
            if (b->mode & MBM_B_FWDBOT) i |= 4;
            if (b->mode & MBM_B_BAKTOP) i |= 2;
            if (b->mode & MBM_B_BAKBOT) i |= 1;
            BitstreamPutBits(b->mottext, i, 5);
            bits->interlaced += 5;
            bits->no_field++;
            break;

        case MBM_B_DIRECT:
            break;
        }
    }
    bits->Btype[bmt_stats[bmt]]++;
    pmv_select = pmvtab[bmt];
    n = nmv[bmt];
    bits->Nmvs[nmvcount[n]]++;
    n <<= 1;
    for (i = 0; i < n; i += 2, pmv_select >>= 4) {
        j = (pmv_select & 0xF) << 1;
        fcode = j ? ((j >= 6) ? b->cur_orig->fcode_back : b->cur_orig->fcode_for) : 1;
        bits->vec += WriteMVcomponent(fcode,
            mv[i+0] - b->pmv[j+0], b->mottext);
        bits->vec += WriteMVcomponent(fcode,
            (bmt >= MBM_B_FWDFLD) ? (mv[i+1]/2 - b->pmv[j+1]/2) :
            (mv[i+1] - b->pmv[j+1]), b->mottext);
        if (j) {
            b->pmv[j+0] = mv[i+0];
            b->pmv[j+1] = mv[i+1];
            if (bmt < MBM_B_DIRECT) {
                b->pmv[j+2] = mv[i+0];
                b->pmv[j+3] = mv[i+1];
            }
        }
    }
    b->prev_bmt = (SInt)bmt;
    MB_CodeCoeff(bits, b->coef, MODE_INTER,
        b->cbp, B_SIZE*B_SIZE, 1, b->mottext, b->mottext,
        NULL, NULL, 1, 0, 0, b->cur_orig->alternate_scan);
}

#else

Void
BCodeMB(BState *b, Bits *bits)
{
    static Char nmvcount[]   = { -1, 0, 1, -1, 2 };
    static Char nmv[]        = { 1, 1, 2, 1, -1, 2, 2, 4 };
    static Char frame_mode[] = { 0, 1, 2, 3, -1, 0, 1, 2 };
    static Char bmt_stats[]  = { 1, 2, 3, 0, -1, 4, 5, 6 };
    static SInt pmvtab[] = {
    /*  ================= Current Macroblock =================  */
    /*  FWDFRM  BAKFRM  AVEFRM  DIRECT  xxxxxx  FWDFLD  BAKFLD  AVEFRM  */
        0x0001, 0x0003, 0x0031, 0x0000, 0x0000, 0x0021, 0x0043, 0x4321 };
    SInt *mv;
    Int fcode, bmt, i, j, n, pmv_select;

    bmt = b->mode & MBM_B_MODE;
    mv = (SInt *)GetImageData(b->motvec) + 8*b->mb_ndx;

    /* Check if current macroblock is skipped */
    if ((b->cbp == 0) && (b->mode == MBM_B_DIRECT) &&
        (mv[0] == 0) && (mv[1] == 0)) {
        BitstreamPutBits(b->mottext, 0, 1);                              /*MODB*/
        bits->MODB += 1;
        bits->no_skipped++;
        b->prev_bmt = MBM_B_DIRECT;
        b->quant = b->prev_quant;
        return;                                           /* skipped macroblock */
    }
	send_MODB_MTYPE(frame_mode[bmt], b->cbp, 0, bits, b->mottext);
    if (b->cbp != 0) {
        BitstreamPutBits(b->mottext, b->cbp, 6);                          /*CBPB*/
        bits->CBPB += 6;
        if (bmt != MBM_B_DIRECT)
			send_B_DQUANT(0, b->quant - b->prev_quant, bits, b->mottext);
    } else {
        b->quant = b->prev_quant;
        bits->no_noDCT++;
    }
    if (b->interlaced) {
        if (b->cbp) {
            BitstreamPutBits(b->mottext, b->fieldDCT, 1);                 /*DCT_TYPE*/
            bits->fieldDCT += b->fieldDCT;
            bits->interlaced += 1;
        }
        switch (bmt) {

        default:
            BitstreamPutBits(b->mottext, 0, 1);                    /*field_prediction*/
            bits->interlaced += 1;
            break;

        case MBM_B_FWDFLD:
            i = 4;
            if (b->mode & MBM_B_FWDTOP) i |= 2;
            if (b->mode & MBM_B_FWDBOT) i |= 1;
            BitstreamPutBits(b->mottext, i, 3);
            bits->interlaced += 3;
            bits->no_field++;
            break;

        case MBM_B_BAKFLD:
            i = 4;
            if (b->mode & MBM_B_BAKTOP) i |= 2;
            if (b->mode & MBM_B_BAKBOT) i |= 1;
            BitstreamPutBits(b->mottext, i, 3);
            bits->interlaced += 3;
            bits->no_field++;
            break;

        case MBM_B_AVEFLD:
            i = 16;
            if (b->mode & MBM_B_FWDTOP) i |= 8;
            if (b->mode & MBM_B_FWDBOT) i |= 4;
            if (b->mode & MBM_B_BAKTOP) i |= 2;
            if (b->mode & MBM_B_BAKBOT) i |= 1;
            BitstreamPutBits(b->mottext, i, 5);
            bits->interlaced += 5;
            bits->no_field++;
            break;

        case MBM_B_DIRECT:
            break;
        }
    }
    bits->Btype[bmt_stats[bmt]]++;
    pmv_select = pmvtab[bmt];
    n = nmv[bmt];
    bits->Nmvs[nmvcount[n]]++;
    n <<= 1;
    for (i = 0; i < n; i += 2, pmv_select >>= 4) {
        j = (pmv_select & 0xF) << 1;
        fcode = j ? ((j >= 6) ? b->cur_orig->fcode_back : b->cur_orig->fcode_for) : 1;
        bits->vec += WriteMVcomponent(fcode,
            mv[i+0] - b->pmv[j+0], b->mottext);
        bits->vec += WriteMVcomponent(fcode,
            (bmt >= MBM_B_FWDFLD) ? (mv[i+1]/2 - b->pmv[j+1]/2) :
            (mv[i+1] - b->pmv[j+1]), b->mottext);
        if (j) {
            b->pmv[j+0] = mv[i+0];
            b->pmv[j+1] = mv[i+1];
            if (bmt < MBM_B_DIRECT) {
                b->pmv[j+2] = mv[i+0];
                b->pmv[j+3] = mv[i+1];
            }
        }
    }
    b->prev_bmt = (SInt)bmt;
    MB_CodeCoeff(bits, b->coef, MODE_INTER,
        b->cbp, B_SIZE*B_SIZE, 1, b->mottext, b->mottext,
        NULL, NULL, 1, 0, 0, b->cur_orig->alternate_scan);
}

#endif
