#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <limits.h>
#include "mrp.h"

extern POINT dyx[];
extern double sigma_h[], sigma_a[];

IMAGE *read_pgm(char *filename)
{
    int i, j, width, height, maxval;
    char tmp[256];
    IMAGE *img;
    FILE *fp;

    fp = fileopen(filename, "rb");
    fgets(tmp, 256, fp);
    if (tmp[0] != 'P' || tmp[1] != '5') {
	fprintf(stderr, "Not a PGM file!\n");
	exit(1);
    }
    while (*(fgets(tmp, 256, fp)) == '#');
    sscanf(tmp, "%d %d", &width, &height);
    while (*(fgets(tmp, 256, fp)) == '#');
    sscanf(tmp, "%d", &maxval);
    if ((width % BLOCK_SIZE) || (height % BLOCK_SIZE)) {
	fprintf(stderr, "Image width and height must be multiples of %d!\n",
		BLOCK_SIZE);
	exit(1);
    }
    if (maxval > 255) {
	fprintf(stderr, "Sorry, this version only supports 8bpp images!\n");
	exit(1);
    }
    img = alloc_image(width, height, maxval);
    for (i = 0; i < img->height; i++) {
	for (j = 0; j < img->width; j++) {
	    img->val[i][j] = (img_t)fgetc(fp);
	}
    }
    fclose(fp);
    return (img);
}

img_t ***init_ref_pels(IMAGE *img, int prd_order)
{
    img_t ***ref_pels, *ptr;
    int x, y, rx, ry, k;

    ref_pels = (img_t ***)alloc_2d_array(img->height, img->width,
					 sizeof(img_t *));
    ptr = (img_t *)alloc_mem(sizeof(img_t) * img->height * img->width
			     * prd_order);
    for (y = 0; y < img->height; y++) {
	for (x = 0; x < img->width; x++) {
	    ref_pels[y][x] = ptr;
	    ptr += prd_order;
	}
    }
    for (y = 0; y < img->height; y++) {
	for (x = 0; x < img->width; x++) {
	    ptr = ref_pels[y][x];
	    if (y == 0) {
		if (x == 0) {
		    for (k = 0; k < prd_order; k++) {
			ptr[k] = ((img->maxval + 1) >> 1);
		    }
		} else {
		    ry = 0;
		    for (k = 0; k < prd_order; k++) {
			rx = x + dyx[k].x;
			if (rx < 0) rx = 0;
			else if (rx >= x) rx = x - 1;
			ptr[k] = img->val[ry][rx];
		    }
		}
	    } else {
		if (x == 0) {
		    for (k = 0; k < prd_order; k++) {
			ry = y + dyx[k].y;
			if (ry < 0) ry = 0;
			else if (ry >= y) ry = y - 1;
			rx = x + dyx[k].x;
			if (rx < 0) rx = 0;
			ptr[k] = img->val[ry][rx];
		    }
		} else {
		    for (k = 0; k < prd_order; k++) {
			ry = y + dyx[k].y;
			if (ry < 0) ry = 0;
			rx = x + dyx[k].x;
			if (rx < 0) rx = 0;
			else if (rx >= img->width) rx = img->width - 1;
			ptr[k] = img->val[ry][rx];
		    }
		}
	    }
	}
    }
    return (ref_pels);
}

ENCODER *init_encoder(IMAGE *img, int num_class, int num_group,
		      int prd_order, int coef_precision,
		      int f_huffman, int num_pmodel, int pm_accuracy)
{
    ENCODER *enc;
    int x, y, i, j, k;
    double p;

    enc = (ENCODER *)alloc_mem(sizeof(ENCODER));
    enc->height = img->height;
    enc->width = img->width;
    enc->maxval = img->maxval;
    enc->num_class = num_class;
    enc->num_group = num_group;
    enc->prd_order = prd_order;
    enc->coef_precision = coef_precision;
    enc->f_huffman = f_huffman;
    enc->num_pmodel = num_pmodel;
    enc->pm_accuracy = pm_accuracy;
    enc->maxprd = enc->maxval << enc->coef_precision;
    enc->predictor = (int **)alloc_2d_array(enc->num_class, enc->prd_order,
					    sizeof(int));
    enc->th = (int **)alloc_2d_array(enc->num_class, enc->num_group - 1,
				     sizeof(int));
    for (i = 0; i < enc->num_class; i++) {
	for (j = 0; j < enc->num_group - 1; j++) {
	    enc->th[i][j] = 0;
	}
    }
    enc->upara = (int **)alloc_2d_array(enc->height, enc->width, sizeof(int));
    enc->prd = (int **)alloc_2d_array(enc->height, enc->width, sizeof(int));
    enc->ref_pels = init_ref_pels(img, enc->prd_order);
    enc->epara = NULL;
    enc->org = img->val;
    enc->abs_e = (img_t **)alloc_2d_array(enc->height, enc->width,
					  sizeof(img_t));
    enc->idx_E = (img_t **)alloc_2d_array(enc->height, enc->width,
					  sizeof(img_t));
    enc->class = (uchar **)alloc_2d_array(enc->height, enc->width,
					  sizeof(uchar));
    enc->group = (uchar **)alloc_2d_array(enc->height, enc->width,
					  sizeof(uchar));
    enc->aconv = (img_t **)alloc_2d_array(enc->maxval+1, (enc->maxval<<1)+1,
					  sizeof(img_t));
    enc->econv = (img_t **)alloc_2d_array(enc->maxval+1, (enc->maxval<<1)+1,
					  sizeof(img_t));
    for (i = 0; i <= enc->maxval; i++) {
	for (j = 0; j <= (enc->maxval << 1); j++) {
            k = (j + 1) >> 1;
	    enc->aconv[i][j] = (i > k)? (i - k) : (k - i);
	    if (enc->pm_accuracy < 0) {
		enc->econv[i][j] = e2E(i - k, k, j&1, enc->maxval);
	    } else {
		k = (i << 1) - j;
		if (k < 0) k = -(k + 1);
		if (k > enc->maxval) k = enc->maxval;
		enc->econv[i][j] = k;
	    }
	}
    }
    enc->bconv = (img_t *)alloc_mem((enc->maxprd + 1) * sizeof(img_t));
    enc->fconv = (img_t *)alloc_mem((enc->maxprd + 1) * sizeof(img_t));
    for (y = 0; y < enc->height; y++) {
	for (x = 0; x < enc->width; x++) {
		enc->group[y][x] = 0;
	}
    }
    enc->pmlist = (PMODEL **)alloc_mem(enc->num_group * sizeof(PMODEL *));
    enc->spm.freq = alloc_mem((MAX_SYMBOL * 2 + 1) * sizeof(uint));
    enc->spm.cumfreq = &(enc->spm.freq[MAX_SYMBOL]);
    if (enc->f_huffman == 1) {
	enc->sigma = sigma_h;
    } else {
	enc->sigma = sigma_a;
    }
    enc->mtfbuf = (int *)alloc_mem(enc->num_class * sizeof(int));
    enc->coef_m = (int *)alloc_mem(enc->prd_order * sizeof(int));
    for (i = 0; i < enc->prd_order; i++) {
	enc->coef_m[i] = 0;
    }
    enc->coef_cost = (cost_t **)alloc_2d_array(16, MAX_COEF + 1,
					       sizeof(cost_t));
    for (i = 0; i < 16; i++) {
	if (enc->f_huffman == 1) {
	    for (j = 0; j <= MAX_COEF; j++) {
		enc->coef_cost[i][j] = ((j >> i) + i + 1);
		if (j > 0) enc->coef_cost[i][j] += 1.0;
	    }
	} else {
	    set_spmodel(&enc->spm, MAX_COEF + 1, i);
	    p = log(enc->spm.cumfreq[MAX_COEF + 1]);
	    for (j = 0; j <= MAX_COEF; j++) {
		enc->coef_cost[i][j] = (p - log(enc->spm.freq[j])) / log(2.0);
		if (j > 0) enc->coef_cost[i][j] += 1.0;
	    }
	}
    }
    enc->th_cost = (cost_t *)alloc_mem((MAX_UPARA + 2) * sizeof(cost_t));
    for (i = 0; i < MAX_UPARA + 2; i++) {
	enc->th_cost[i] = 0;
    }
    enc->class_cost = (cost_t *)alloc_mem(enc->num_class * sizeof(cost_t));
    for (i = 0; i < enc->num_class; i++) {
	enc->class_cost[i] = 0;
    }
    return (enc);
}

void init_class(ENCODER *enc)
{
    int k, x, y, i, j, v, cl, sum, num_block;
    int *var, *tmp, **ptr;

    num_block = enc->height * enc->width / (BLOCK_SIZE * BLOCK_SIZE);
    var = (int *)alloc_mem(num_block * sizeof(int));
    ptr = (int **)alloc_mem(num_block * sizeof(int *));
    for (k = 0; k < num_block; k++) {
	y = (k / (enc->width / BLOCK_SIZE)) * BLOCK_SIZE;
	x = (k % (enc->width / BLOCK_SIZE)) * BLOCK_SIZE;
	var[k] = sum = 0;
	for (i = 0; i < BLOCK_SIZE; i++) {
	    for (j = 0; j < BLOCK_SIZE; j++) {
		v = enc->org[y + i][x + j];
		sum += v;
		var[k] += v * v;
	    }
	}
	var[k] -= sum * sum / (BLOCK_SIZE * BLOCK_SIZE);
	ptr[k] = &(var[k]);
    }
    /* sort */
    for (i = num_block - 1; i > 0; i--) {
	for (j = 0; j < i; j++) {
	    if (*ptr[j] > *ptr[j + 1]) {
		tmp = ptr[j];
		ptr[j] = ptr[j + 1];
		ptr[j + 1] = tmp;
	    }
	}
    }
    for (k = 0; k < num_block; k++) {
	cl = (k * enc->num_class) / num_block;
	v = (int)(ptr[k] - var);
	y = (v / (enc->width / BLOCK_SIZE)) * BLOCK_SIZE;
	x = (v % (enc->width / BLOCK_SIZE)) * BLOCK_SIZE;
	for (i = 0; i < BLOCK_SIZE; i++) {
	    for (j = 0; j < BLOCK_SIZE; j++) {
		enc->class[y + i][x + j] = cl;
	    }
	}
    }
    free(ptr);
    free(var);
}

void set_cost_model(ENCODER *enc, int f_mmse)
{
    int gr, k;
    double a, b, var;
    PMODEL *pm;

    enc->epara = enc->abs_e;
    /* parameter used for context modeling */
    for (gr = 0; gr < enc->num_group; gr++) {
	var = enc->sigma[gr] * enc->sigma[gr];
	if (f_mmse) {
	    a = 0;
	    b = 1.0;
	} else {
	    a = 0.5 * log(2 * M_PI * var) / log(2.0);
	    b = 1.0 / (2.0 * log(2.0) * var);
	}
	enc->pmlist[gr] = pm = enc->pmodels[gr][enc->num_pmodel >> 1];
	for (k = 0; k <= enc->maxval; k++) {
	    pm->cost[k] = a + b * (double)(k * k);
	}
	pm->subcost[0] = 0.0;
    }
    for (k = 0; k <= enc->maxprd; k++) {
	enc->bconv[k] = 0;
	enc->fconv[k] = 0;
    }
    return;
}

void set_cost_rate(ENCODER *enc)
{
    int gr, k, i, j, mask, shift, num_spm;
    double a, c;
    PMODEL *pm;

    if (enc->pm_accuracy < 0) {
	enc->epara = enc->idx_E;
	num_spm = 1;
    } else {
	enc->epara = enc->org;
	mask = (1 << enc->pm_accuracy) - 1;
	shift = enc->coef_precision - enc->pm_accuracy;
	for (k = 0; k <= enc->maxprd; k++) {
	    i = (enc->maxprd - k + (1 << shift) / 2) >> shift;
	    enc->fconv[k] = (i & mask);
	    enc->bconv[k] = (i >> enc->pm_accuracy);
	}
	num_spm = 1 << enc->pm_accuracy;
    }
    a = 1.0 / log(2.0);
    for (gr = 0; gr < enc->num_group; gr++) {
	for (i = 0; i < enc->num_pmodel; i++) {
	    pm = enc->pmodels[gr][i];
	    if (enc->f_huffman == 1) {
		for (k = 0; k < pm->size; k++) {
                    pm->cost[k] = enc->vlcs[gr][pm->id].len[k];
		}
		pm->subcost[0] = 0.0;
	    } else if (enc->pm_accuracy < 0) {
		for (k = 0; k < pm->size; k++) {
		    pm->cost[k] = -a * log(pm->freq[k]);
		}
                c = pm->cumfreq[enc->maxval + 1];
		pm->subcost[0] = a * log(c);
	    } else {
		for (j = 0; j < num_spm; j++) {
		    for (k = 0; k < pm->size; k++) {
			pm->cost[k] = -a * log(pm->freq[k]);
		    }
		    for (k = 0; k <= enc->maxval; k++) {
			c = pm->cumfreq[k + enc->maxval + 1] - pm->cumfreq[k];
			pm->subcost[k] = a * log(c);
		    }
		    pm++;
		}		
	    }
	}
    }
}

void predict_region(ENCODER *enc, int tly, int tlx, int bry, int brx)
{
    int x, y, k, cl, prd, org;
    int *coef_p;
    int *prd_p;
    img_t *abs_e_p, *idx_E_p, *ref_p, **ref_pp, *org_p;
    uchar *class_p;

    for (y = tly; y < bry; y++) {
	class_p = &enc->class[y][tlx];
	org_p = &enc->org[y][tlx];
	ref_pp = &enc->ref_pels[y][tlx];
	idx_E_p = &enc->idx_E[y][tlx];
	abs_e_p = &enc->abs_e[y][tlx];
	prd_p = &enc->prd[y][tlx];
	for (x = tlx; x < brx; x++) {
	    cl = *class_p++;
	    org = *org_p++;
	    ref_p = *ref_pp++;
	    coef_p = enc->predictor[cl];
	    prd = 0;
	    for (k = 0; k < enc->prd_order; k++) {
		prd += (*coef_p++) * (*ref_p++);
	    }
            if (prd < 0) prd = 0;
            else if (prd > enc->maxprd) prd = enc->maxprd;
	    *prd_p++ = prd;
	    prd >>= (enc->coef_precision - 1);
            *abs_e_p++ = enc->aconv[org][prd];
            *idx_E_p++ = enc->econv[org][prd];
	}
    }
}

cost_t calc_cost(ENCODER *enc, int tly, int tlx, int bry, int brx)
{
    cost_t cost;
    int x, y, u, cl, gr, prd, e, base, frac;
    int *th_p, *upara_p, *prd_p;
    img_t *epara_p;
    uchar *class_p, *group_p;
    PMODEL *pm;

    bry += UPEL_DIST;
    if (bry > enc->height) bry = enc->height;
    tlx -= UPEL_DIST;
    if (tlx < 0) tlx = 0;
    brx += UPEL_DIST;
    if (brx > enc->width) brx = enc->width;
    cost = 0;
    for (y = tly; y < bry; y++) {
	class_p = &enc->class[y][tlx];
	group_p = &enc->group[y][tlx];
	upara_p = &enc->upara[y][tlx];
	epara_p = &enc->epara[y][tlx];
	prd_p = &enc->prd[y][tlx];
	for (x = tlx; x < brx; x++) {
	    *upara_p++ = u = calc_u(enc->idx_E, enc->width, y, x);
	    cl = *class_p++;
	    e = *epara_p++;
	    prd = *prd_p++;
	    base = enc->bconv[prd];
	    frac = enc->fconv[prd];
	    th_p = enc->th[cl];
	    for (gr = 0; gr < enc->num_group - 1; gr++) {
		if (u < *th_p++) break;
	    }
	    *group_p++ = gr;
	    pm = enc->pmlist[gr] + frac;
	    cost += pm->cost[base + e] + pm->subcost[base];
	}
    }
    return (cost);
}

cost_t design_predictor(ENCODER *enc, int f_mmse)
{
    double **mat, *weight, w, e, d, pivot;
    int x, y, i, j, k, cl, gr, pivpos, *index;
    img_t *ref_p;

    mat = (double **)alloc_2d_array(enc->prd_order, enc->prd_order + 1,
				    sizeof(double));
    index = (int *)alloc_mem(sizeof(int) * enc->prd_order);
    weight = (double *)alloc_mem(sizeof(double) * enc->num_group);
    for (gr = 0; gr < enc->num_group; gr++) {
	if (f_mmse) {
	    weight[gr] = 1.0;
	} else {
	    weight[gr] = 1.0 / (enc->sigma[gr] * enc->sigma[gr]);
	}
    }
    for (cl = 0; cl < enc->num_class; cl++) {
	for (i = 0; i < enc->prd_order; i++) {
	    for (j = 0; j <= enc->prd_order; j++) {
		mat[i][j] = 0.0;
	    }
	}
	for (y = 0; y < enc->height; y++) {
	    for (x = 0; x < enc->width; x++) {
		if (enc->class[y][x] != cl) {
		    x += BLOCK_SIZE - 1;
		    continue;
		}
		gr = enc->group[y][x];
		ref_p = enc->ref_pels[y][x];
		for (i = 0; i < enc->prd_order; i++) {
		    w = weight[gr] * ref_p[i];
		    for (j = i; j < enc->prd_order; j++) {
			mat[i][j] += w * ref_p[j];
		    }
		    mat[i][enc->prd_order] += w * enc->org[y][x];
		}
	    }
	}
	for (i = 0; i < enc->prd_order; i++) {
	    index[i] = i;
	    for (j = 0; j < i; j++) {
		mat[i][j] = mat[j][i];
	    }
	}
	for (i = 0; i < enc->prd_order; i++) {
	    pivpos = i;
	    pivot = fabs(mat[index[i]][i]);
	    for (k = i + 1; k < enc->prd_order; k++) {
		if (fabs(mat[index[k]][i]) > pivot) {
		    pivot = fabs(mat[index[k]][i]);
		    pivpos = k;
		}
	    }
	    k = index[i];
	    index[i] = index[pivpos];
	    index[pivpos] = k;
	    if (pivot > 1E-10) {
		d = mat[index[i]][i];
		for (j = i; j <= enc->prd_order; j++) {
		    mat[index[i]][j] /= d;
		}
		for (k = 0; k < enc->prd_order; k++) {
		    if (k == i) continue;
		    d = mat[index[k]][i];
		    for (j = i; j <= enc->prd_order; j++) {
			mat[index[k]][j] -= d * mat[index[i]][j];
		    }
		}
	    }
	}
	w = (1 << enc->coef_precision);
	e = 0.0;
	for (i = 0; i < enc->prd_order; i++) {
	    if (fabs(mat[index[i]][i]) > 1E-10) {
                d = mat[index[i]][enc->prd_order] * w;
	    } else {
                d = 0.0;
	    }
            k = d;
            if (k > d) k--;
	    if (k < -MAX_COEF) {
		k = d = -MAX_COEF;
	    } else if (k > MAX_COEF) {
		k = d = MAX_COEF;
	    }
            enc->predictor[cl][i] = k;
	    d -= k;
	    e += d;
	    mat[index[i]][enc->prd_order] = d;
	}
	/* minimize mean rounding errors */
	k = e + 0.5;
	for (;k > 0; k--) {
	    d = 0;
	    for (j = i = 0; i < enc->prd_order; i++) {
		if (mat[index[i]][enc->prd_order] > d) {
		    d = mat[index[i]][enc->prd_order];
		    j = i;
		}
	    }
	    if (enc->predictor[cl][j] < MAX_COEF) enc->predictor[cl][j]++;
	    mat[index[j]][enc->prd_order] = 0;
	}
    }
    free(weight);
    free(index);
    free(mat);

    predict_region(enc, 0, 0, enc->height, enc->width);
    return (calc_cost(enc, 0, 0, enc->height, enc->width));
}


cost_t optimize_group(ENCODER *enc)
{
    cost_t cost, min_cost, **cbuf, *dpcost, *cbuf_p, *thc_p;
    int x, y, th1, th0, k, u, cl, gr, prd, e, base, frac;
    int **trellis, *tre_p;
    PMODEL *pm, **pm_p;

    trellis = (int **)alloc_2d_array(enc->num_group, MAX_UPARA + 2,
				     sizeof(int));
    dpcost = (cost_t *)alloc_mem((MAX_UPARA + 2) * sizeof(cost_t));
    cbuf = (cost_t **)alloc_2d_array(enc->num_group, MAX_UPARA + 2,
				     sizeof(cost_t));
    thc_p = enc->th_cost;
    for (k = 0; k < MAX_UPARA + 2; k++) trellis[0][k] = 0;
    /* Dynamic programming */
    for (cl = 0; cl < enc->num_class; cl++) {
	for (gr = 0; gr < enc->num_group; gr++) {
	    cbuf_p = cbuf[gr];
	    for (u = 0; u < MAX_UPARA + 2; u++) {
		cbuf_p[u] = 0;
	    }
	}
	for (y = 0; y < enc->height; y++) {
	    for (x = 0; x < enc->width; x++) {
		if (enc->class[y][x] == cl) {
		    u = enc->upara[y][x] + 1;
		    e = enc->epara[y][x];
		    prd = enc->prd[y][x];
		    base = enc->bconv[prd];
		    frac = enc->fconv[prd];
		    pm_p = enc->pmlist;
		    for (gr = 0; gr < enc->num_group; gr++) {
			pm = (*pm_p++) + frac;
			cbuf[gr][u] += pm->cost[base + e] + pm->subcost[base];
		    }
		}
	    }
	}
	for (gr = 0; gr < enc->num_group; gr++) {
	    cbuf_p = cbuf[gr];
	    for (u = 1; u < MAX_UPARA + 2; u++) {
		cbuf_p[u] += cbuf_p[u - 1];
	    }
	}
	cbuf_p = cbuf[0];
	for (u = 0; u < MAX_UPARA + 2; u++) {
	    dpcost[u] = cbuf_p[u];
	}
	for (gr = 1; gr < enc->num_group; gr++) {
	    cbuf_p = cbuf[gr];
	    tre_p = trellis[gr - 1];
	    /* minimize (cbuf_p[th1] - cbuf_p[th0] + dpcost[th0]) */
	    for (th1 = MAX_UPARA + 1; th1 >= 0; th1--) {
		th0 = th1;
		min_cost = dpcost[th1] - cbuf_p[th1]
		         + thc_p[0] + thc_p[th0 - tre_p[th0]];
		for (k = 0; k < th1; k++) {
		    cost = dpcost[k] - cbuf_p[k]
			 + thc_p[th1 - k] + thc_p[k - tre_p[k]];
		    if (cost < min_cost) {
			min_cost = cost;
			th0 = k;
		    }
		}
		dpcost[th1] = min_cost + cbuf_p[th1];
		trellis[gr][th1] = th0;
		if (gr == enc->num_group - 1) break;
	    }
	}
	th1 = MAX_UPARA + 1;
	for (gr = enc->num_group - 1; gr > 0; gr--) {
	    th1 = trellis[gr][th1];
	    enc->th[cl][gr - 1] = th1;
	}
    }
    /* renew groups */
    cost = 0;
    pm_p = enc->pmlist;
    for (y = 0; y < enc->height; y++) {
	for (x = 0; x < enc->width; x++) {
            cl = enc->class[y][x];
            u = enc->upara[y][x];
	    for (gr = 0; gr < enc->num_group - 1; gr++) {
                if (u < enc->th[cl][gr]) break;
	    }
            enc->group[y][x] = gr;
	    e = enc->epara[y][x];
	    prd = enc->prd[y][x];
	    base = enc->bconv[prd];
	    pm = pm_p[gr] + enc->fconv[prd];
            cost += pm->cost[base + e] + pm->subcost[base];
	}
    }
    /* optimize probability models */
    if (enc->epara != enc->abs_e && enc->num_pmodel > 1) {
	if (enc->num_pmodel > MAX_UPARA + 2) {
	    free(cbuf);
	    cbuf = (cost_t **)alloc_2d_array(enc->num_group, enc->num_pmodel,
					     sizeof(cost_t));
	}
	for (gr = 0; gr < enc->num_group; gr++) {
	    for (k = 0; k < enc->num_pmodel; k++) {
		cbuf[gr][k] = 0;
	    }
	}
	for (y = 0; y < enc->height; y++) {
	    for (x = 0; x < enc->width; x++) {
		gr = enc->group[y][x];
		e = enc->epara[y][x];
		prd = enc->prd[y][x];
		base = enc->bconv[prd];
		frac = enc->fconv[prd];
		for (k = 0; k < enc->num_pmodel; k++) {
		    pm = enc->pmodels[gr][k] + frac;
		    cbuf[gr][k] += pm->cost[base + e] + pm->subcost[base];
		}
	    }
	}
	for (gr = 0; gr < enc->num_group; gr++) {
	    pm = enc->pmodels[gr][0];
	    cost = cbuf[gr][0];
	    for (k = 1; k < enc->num_pmodel; k++) {
		if (cost > cbuf[gr][k]) {
		    cost = cbuf[gr][k];
		    pm = enc->pmodels[gr][k];
		}
	    }
	    pm_p[gr] = pm;
	}
	cost = 0.0;
	for (gr = 0; gr < enc->num_group; gr++) {
	    cost += cbuf[gr][pm_p[gr]->id];
	}
    }
    free(cbuf);
    free(dpcost);
    free(trellis);
    return (cost);
}

cost_t optimize_class(ENCODER *enc)
{
    cost_t cost, min_cost;
    int x, y, i, j, cl, min_cl;
    
    for (i = 0; i < enc->num_class; i++) {
        enc->mtfbuf[i] = i;
    }
    for (y = 0; y < enc->height; y += BLOCK_SIZE) {
	for (x = 0; x < enc->width; x += BLOCK_SIZE) {
	    min_cost = 1E8;
	    min_cl = 0;
	    mtf_classlabel(x, y, enc->class, enc->mtfbuf,
			   enc->width, enc->num_class);
	    for (cl = 0; cl < enc->num_class; cl++) {
		cost = enc->class_cost[enc->mtfbuf[cl]];
		for (i = y; i < y + BLOCK_SIZE; i++) {
		    for (j = x; j < x + BLOCK_SIZE; j++) {
			enc->class[i][j] = cl;
		    }
		}
		predict_region(enc, y, x, y+BLOCK_SIZE, x+BLOCK_SIZE);
		cost += calc_cost(enc, y, x, y+BLOCK_SIZE, x+BLOCK_SIZE);
		if (cost < min_cost) {
		    min_cost = cost;
		    min_cl = cl;
		}
	    }
	    for (i = y; i < y + BLOCK_SIZE; i++) {
		for (j = x; j < x + BLOCK_SIZE; j++) {
		    enc->class[i][j] = min_cl;
		}
	    }
	    predict_region(enc, y, x, y+BLOCK_SIZE, x+BLOCK_SIZE);
	}
    }
    return (calc_cost(enc, 0, 0, enc->height, enc->width));
}

void optimize_coef(ENCODER *enc, int cl, int pos1, int pos2)
{
#define SEARCH_RANGE 11
    cost_t cbuf[SEARCH_RANGE * SEARCH_RANGE], *cbuf_p;
    int i, j, k, x, y, df1, df2, e, org, base, *ep;
    int prd, prd_f1, prd_f2, shift, maxprd;
    img_t *ref_p, *econv_p, *bconv_p, *fconv_p;
    PMODEL *pm, *pm_p;

    k = 0;
    for (i = 0; i < SEARCH_RANGE; i++) {
	y = enc->predictor[cl][pos1] + i - (SEARCH_RANGE >> 1);
	if (y < 0) y = -y;
	if (y > MAX_COEF) y = MAX_COEF;
	for (j = 0; j < SEARCH_RANGE; j++) {
	    x = enc->predictor[cl][pos2] + j - (SEARCH_RANGE >> 1);
	    if (x < 0) x = -x;
	    if (x > MAX_COEF) x = MAX_COEF;
	    cbuf[k++] = enc->coef_cost[enc->coef_m[pos1]][y]
		      + enc->coef_cost[enc->coef_m[pos2]][x];
	}
    }
    ep = (enc->epara == enc->idx_E)? &e : &org;
    bconv_p = enc->bconv;
    fconv_p = enc->fconv;
    shift = enc->coef_precision - 1;
    maxprd = enc->maxprd;
    for (y = 0; y < enc->height; y++) {
	for (x = 0; x < enc->width; x++) {
	    if (enc->class[y][x] != cl) {
		x += BLOCK_SIZE - 1;
		continue;
	    }
	    ref_p = enc->ref_pels[y][x];
	    org = enc->org[y][x];
	    econv_p = enc->econv[org];
	    df1 = ref_p[pos1];
	    df2 = ref_p[pos2];
	    pm_p = enc->pmlist[enc->group[y][x]];
	    prd_f1 = enc->prd[y][x] - (df1 + df2) * (SEARCH_RANGE >> 1);
	    cbuf_p = cbuf;
	    if (prd_f1 < 0 || prd_f1 + (df1 + df2) * SEARCH_RANGE > maxprd) {
		for (i = 0; i < SEARCH_RANGE; i++, prd_f1 += df1) {
		    prd_f2 = prd_f1;
		    for (j = 0; j < SEARCH_RANGE; j++, prd_f2 += df2) {
			prd = prd_f2;
			if (prd < 0) prd = 0;
			else if (prd > maxprd) prd = maxprd;
			e = econv_p[prd >> shift];
			base = bconv_p[prd];
			pm = pm_p + fconv_p[prd];
			(*cbuf_p++) += pm->cost[*ep + base]
			            + pm->subcost[base];
		    }
		}
	    } else {
		for (i = 0; i < SEARCH_RANGE; i++, prd_f1 += df1) {
		    prd_f2 = prd_f1;
		    for (j = 0; j < SEARCH_RANGE; j++, prd_f2 += df2) {
			e = econv_p[prd_f2 >> shift];
			base = bconv_p[prd_f2];
			pm = pm_p + fconv_p[prd_f2];
			(*cbuf_p++) += pm->cost[*ep + base]
			            + pm->subcost[base];
		    }
		}
	    }
	}
    }
    j = (SEARCH_RANGE * SEARCH_RANGE) >> 1;
    for (i = 0; i < SEARCH_RANGE * SEARCH_RANGE; i++) {
	if (cbuf[i] < cbuf[j]) {
	    j = i;
	}
    }
    i = (j / SEARCH_RANGE) - (SEARCH_RANGE >> 1) + enc->predictor[cl][pos1];
    j = (j % SEARCH_RANGE) - (SEARCH_RANGE >> 1) + enc->predictor[cl][pos2];
    if (i < -MAX_COEF) i = -MAX_COEF;
    else if (i > MAX_COEF) i = MAX_COEF;
    if (j < -MAX_COEF) j = -MAX_COEF;
    else if (j > MAX_COEF) j = MAX_COEF;
    i -= enc->predictor[cl][pos1];
    j -= enc->predictor[cl][pos2];
    if (i != 0 || j != 0) {
	for (y = 0; y < enc->height; y++) {
	    for (x = 0; x < enc->width; x++) {
		if (enc->class[y][x] == cl) {
		    ref_p = enc->ref_pels[y][x];
		    enc->prd[y][x] += ref_p[pos1] * i + ref_p[pos2] * j;
		}
	    }
	}
	enc->predictor[cl][pos1] += i;
	enc->predictor[cl][pos2] += j;
    }
}

cost_t optimize_predictor(ENCODER *enc)
{
    int cl, k, pos1, pos2;
#ifndef RAND_MAX
#  define RAND_MAX 32767
#endif

    for (cl = 0; cl < enc->num_class; cl++) {
	for (k = 0; k < enc->prd_order; k++) {
retry:
	    pos1 = (int)(((double)rand() * enc->prd_order) / (RAND_MAX+1.0));
	    pos2 = (int)(((double)rand() * enc->prd_order) / (RAND_MAX+1.0));
	    if (pos1 == pos2) goto retry;
	    optimize_coef(enc, cl, pos1, pos2);
	}
    }
    predict_region(enc, 0, 0, enc->height, enc->width);
    return (calc_cost(enc, 0, 0, enc->height, enc->width));
}

int putbits(FILE *fp, int n, uint x)
{
    static int bitpos = 8;
    static uint bitbuf = 0;
    int bits;

    bits = n;
    if (bits <= 0) return (0);
    while (n >= bitpos) {
        n -= bitpos;
	if (n < 32) {
            bitbuf |= ((x >> n) & (0xff >> (8 - bitpos)));
	}
        putc(bitbuf, fp);
        bitbuf = 0;
        bitpos = 8;
    }
    bitpos -= n;
    bitbuf |= ((x & (0xff >> (8 - n))) << bitpos);
    return (bits);
}

void remove_emptyclass(ENCODER *enc)
{
    int cl, i, k, x, y;

    for (cl = 0; cl < enc->num_class; cl++) {
	enc->mtfbuf[cl] = 0;
    }
    for (y = 0; y < enc->height; y += BLOCK_SIZE) {
	for (x = 0; x < enc->width; x += BLOCK_SIZE) {
	    cl = enc->class[y][x];
	    enc->mtfbuf[cl]++;
	}
    }
    for (i = cl = 0; i < enc->num_class; i++) {
	if (enc->mtfbuf[i] == 0) {
	    enc->mtfbuf[i] = -1;
	} else {
	    enc->mtfbuf[i] = cl++;
	}
    }
    if (cl == enc->num_class) return;	/* no empty class */
    for (y = 0; y < enc->height; y++) {
	for (x = 0; x < enc->width; x++) {
	    i = enc->class[y][x];
	    enc->class[y][x] = enc->mtfbuf[i];
	}
    }
    for (i = cl = 0; i < enc->num_class; i++) {
	if (enc->mtfbuf[i] < 0) continue;
	if (cl != i) {
	    for (k = 0; k < enc->prd_order; k++) {
		enc->predictor[cl][k] = enc->predictor[i][k];
	    }
	    for (k = 0; k < enc->num_group - 1; k++) {
		enc->th[cl][k] = enc->th[i][k];
	    }
	}
	cl++;
    }
    enc->num_class = cl;
}

int write_header(ENCODER *enc, FILE *fp)
{
    int bits;

    bits = putbits(fp, 16, MAGIC_NUMBER);
    bits += putbits(fp, 8, VERSION);
    bits += putbits(fp, 16, enc->width);
    bits += putbits(fp, 16, enc->height);
    bits += putbits(fp, 16, enc->maxval);
    bits += putbits(fp, 6, 1);	/* number of components (1 = monochrome) */
    bits += putbits(fp, 6, enc->num_class);
    bits += putbits(fp, 6, enc->num_group);
    bits += putbits(fp, 6, enc->prd_order);
    bits += putbits(fp, 6, enc->num_pmodel - 1);
    bits += putbits(fp, 4, enc->coef_precision - 1);
    bits += putbits(fp, 3, enc->pm_accuracy + 1);
    bits += putbits(fp, 1, enc->f_huffman);
    return (bits);
}

int encode_golomb(FILE *fp, int m, int v)
{
    int bits, p;

    bits = p = (v >> m) + 1;
    while (p > 32) {
	putbits(fp, 32, 0);
	p -= 32;
    }
    putbits(fp, p, 1);	/* prefix code */
    putbits(fp, m, v);
    return (bits + m);
}

int encode_class(FILE *fp, ENCODER *enc)
{
    int i, blk, x, y, num_block, bits;
    uint *hist, *index;

    num_block = enc->height * enc->width / (BLOCK_SIZE * BLOCK_SIZE);
    hist = (uint *)alloc_mem(enc->num_class * sizeof(uint));
    index = (uint *)alloc_mem(num_block * sizeof(uint));
    for (i = 0; i < enc->num_class; i++) {
	enc->mtfbuf[i] = i;
	hist[i] = 0;
    }
    blk = 0;
    for (y = 0; y < enc->height; y += BLOCK_SIZE) {
	for (x = 0; x < enc->width; x += BLOCK_SIZE) {
	     mtf_classlabel(x, y, enc->class, enc->mtfbuf,
			    enc->width, enc->num_class);
	    /* conversion */
	    i = enc->mtfbuf[enc->class[y][x]];
	    index[blk++] = i;
	    hist[i]++;
        }
    }
    bits = 0;
    if (enc->f_huffman == 1) {	/* Huffman */
	VLC *vlc;
	vlc = make_vlc(hist, enc->num_class, 16);
	if (fp == NULL) {
	    for (i = 0; i < enc->num_class; i++) {
		enc->class_cost[i] = vlc->len[i];
	    }
	    for (blk = 0; blk < num_block; blk++) {
		i = index[blk];
		bits += enc->class_cost[i];
	    }
	} else {	/* actually encode */
	    for (i = 0; i < enc->num_class; i++) {
		bits += putbits(fp, 4, vlc->len[i] - 1);
	    }
	    for (blk = 0; blk < num_block; blk++) {
		i = index[blk];
		bits += putbits(fp, vlc->len[i], vlc->code[i]);
	    }
	}
	free_vlc(vlc);
    } else {			/* Arithmetic */
	PMODEL *pm;
	double p, c;
	int l;

	pm = &enc->spm;
	pm->size = enc->num_class + PMCLASS_LEVEL;
	pm->cumfreq[0] = 0;
	/* quantization of log-transformed probability */
	for (i = 0; i < enc->num_class; i++) {
	    p = (double)hist[i] / num_block;
	    if (p > 0.0) {
		l = -log(p) / log(2.0) * (PMCLASS_LEVEL / PMCLASS_MAX);
		if (l >= PMCLASS_LEVEL) l = PMCLASS_LEVEL - 1;
	    } else {
		l = PMCLASS_LEVEL - 1;
	    }
	    enc->mtfbuf[i] = l;
	    p = exp(-log(2.0) * ((double)l+0.5) * PMCLASS_MAX/PMCLASS_LEVEL);
	    pm->freq[i] = p * (1 << 16);
	    if (pm->freq[i] == 0) pm->freq[i]++;
	    pm->cumfreq[i + 1] = pm->cumfreq[i] + pm->freq[i];
	}
	c = log((double)pm->cumfreq[enc->num_class]);
	if (fp == NULL) {
	    p = 1.0 / log(2.0);
	    for (i = 0; i < enc->num_class; i++) {
		enc->class_cost[i] = p * (c - log(pm->freq[i]));
	    }
	    p = 0.0;
	    for (blk = 0; blk < num_block; blk++) {
		i = index[blk];
		p += enc->class_cost[i];
	    }
	    bits = (int)p;
	} else {	/* actually encode */
	    range_t freqoff;

	    for (i = enc->num_class; i < pm->size; i++) {
		pm->freq[i] = 1;
		pm->cumfreq[i + 1] = pm->cumfreq[i] + pm->freq[i];
	    }
	    freqoff = pm->cumfreq[enc->num_class];
	    for (i = 0; i < enc->num_class; i++) {
		l = enc->mtfbuf[i] + enc->num_class;
		rc_encode(fp, enc->rc, pm->cumfreq[l] - freqoff, pm->freq[l],
			  pm->cumfreq[pm->size] - freqoff);
		if (pm->cumfreq[pm->size] - freqoff < (MAX_TOTFREQ << 1)) {
		    for (; l < pm->size; l++) {
			pm->freq[l] *= 2;
			pm->cumfreq[l + 1] = pm->cumfreq[l] + pm->freq[l];
		    }
		}
	    }
	    for (blk = 0; blk < num_block; blk++) {
		i = index[blk];
		rc_encode(fp, enc->rc, pm->cumfreq[i], pm->freq[i], freqoff);
	    }
	    bits += enc->rc->code;
	    enc->rc->code = 0;
	}
    }
    free(index);
    free(hist);
    return (bits);
}

int encode_predictor(FILE *fp, ENCODER *enc)
{
    int cl, coef, sgn, k, m, min_m, bits;
    cost_t cost, min_cost, t_cost;

    t_cost = 0.0;
    for (k = 0; k < enc->prd_order; k++) {
	min_cost = INT_MAX;
	for (m = min_m = 0; m < 16; m++) {
	    cost = 0.0;
	    for (cl = 0; cl < enc->num_class; cl++) {
		coef = enc->predictor[cl][k];
		if (coef < 0) coef = -coef;
		cost += enc->coef_cost[m][coef];
	    }
	    if (cost < min_cost) {
		min_cost = cost;
		min_m = m;
	    }
	}
	t_cost += min_cost;
	enc->coef_m[k] = min_m;
    }
    bits = t_cost;
    if (fp != NULL) {
	bits = 0;
	if (enc->f_huffman == 1) {	/* Huffman */
	    for (k = 0; k < enc->prd_order; k++) {
		bits += putbits(fp, 4, enc->coef_m[k]);
		for (cl = 0; cl < enc->num_class; cl++) {
		    coef = enc->predictor[cl][k];
		    sgn = (coef < 0)? 1 : 0;
		    if (coef < 0) coef = -coef;
		    bits += encode_golomb(fp, enc->coef_m[k], coef);
		    if (coef != 0) {
			bits += putbits(fp, 1, sgn);
		    }
		}
	    }
	} else {			/* Arithmetic */
	    PMODEL *pm;
	    pm = &enc->spm;
	    for (k = 0; k < enc->prd_order; k++) {
		set_spmodel(pm, MAX_COEF + 1, enc->coef_m[k]);
		rc_encode(fp, enc->rc, enc->coef_m[k], 1, 16);
		for (cl = 0; cl < enc->num_class; cl++) {
		    coef = enc->predictor[cl][k];
		    sgn = (coef < 0)? 1 : 0;
		    if (coef < 0) coef = -coef;
		    rc_encode(fp, enc->rc, pm->cumfreq[coef],  pm->freq[coef],
			      pm->cumfreq[pm->size]);
		    if (coef > 0) {
			rc_encode(fp, enc->rc, sgn, 1, 2);
		    }
		}
	    }
	    bits = enc->rc->code;
	    enc->rc->code = 0;
	}
    }
    return (bits);
}

int encode_threshold(FILE *fp, ENCODER *enc)
{
    int cl, gr, i, k, m, min_m, bits;
    cost_t cost, min_cost;
    PMODEL *pm;

    if (enc->f_huffman == 1) {	/* Huffman */
	min_cost = INT_MAX;
	for (m = min_m = 0; m < 16; m++) {
	    bits = 0;
	    for (cl = 0; cl < enc->num_class; cl++) {
		k = 0;
		for (gr = 1; gr < enc->num_group; gr++) {
		    i = enc->th[cl][gr - 1] - k;
		    bits++;
		    if (i > 0) {
			bits += ((i - 1) >> m) + m + 1;
		    }
		    k += i;
		    if (k > MAX_UPARA) break;
		}
	    }
	    if ((cost = bits) < min_cost) {
		min_cost = cost;
		min_m = m;
	    }
	}
	if (fp == NULL) {
	    enc->th_cost[0] = 1.0;
	    for (i = 1; i < MAX_UPARA + 2; i++) {
		enc->th_cost[i] =((i - 1) >> min_m) + min_m + 1 + 1;
	    }
	    bits = min_cost;
	} else {
	    bits = putbits(fp, 4, min_m);
	    for (cl = 0; cl < enc->num_class; cl++) {
		k = 0;
		for (gr = 1; gr < enc->num_group; gr++) {
		    i = enc->th[cl][gr - 1] - k;
		    if (i == 0) {
			bits += putbits(fp, 1, 0);
		    } else {
			bits += putbits(fp, 1, 1);
			bits += encode_golomb(fp, min_m, i - 1);
		    }
		    k += i;
		    if (k > MAX_UPARA) break;
		}
	    }
	    if (enc->num_pmodel > 1) {
		for (k = 1; (1 << k) < enc->num_pmodel; k++);
		for (gr = 0; gr < enc->num_group; gr++) {
		    pm = enc->pmlist[gr];
		    bits += putbits(fp, k, pm->id);
		}
	    }
	}
    } else {			/* Arithmetic */
	double p;
	pm = &enc->spm;
	min_cost = INT_MAX;
	for (m = min_m = 0; m < 16; m++) {
	    set_spmodel(pm, MAX_UPARA + 2, m);
	    cost = 0.0;
	    for (cl = 0; cl < enc->num_class; cl++) {
                k = 0;
		for (gr = 1; gr < enc->num_group; gr++) {
		    i = enc->th[cl][gr - 1] - k;
		    p = (double)pm->freq[i]
                        / (pm->cumfreq[pm->size - k]);
                    cost += -log(p);
                    k += i;
                    if (k > MAX_UPARA) break;
		}
	    }
	    cost /= log(2.0);
	    if (cost < min_cost) {
                min_cost = cost;
                min_m = m;
	    }
	}
	set_spmodel(pm, MAX_UPARA + 2, min_m);
	p = log(pm->cumfreq[MAX_UPARA + 2]);
	if (fp == NULL) {
	    for (i = 0; i < MAX_UPARA + 2; i++) {
		enc->th_cost[i] = (p - log(pm->freq[i])) / log(2.0);
	    }
	    bits = min_cost;
	} else {
	    rc_encode(fp, enc->rc, min_m, 1, 16);
	    for (cl = 0; cl < enc->num_class; cl++) {
		k = 0;
		for (gr = 1; gr < enc->num_group; gr++) {
		    i = enc->th[cl][gr - 1] - k;
		    rc_encode(fp, enc->rc, pm->cumfreq[i],  pm->freq[i],
			      pm->cumfreq[pm->size - k]);
		    k += i;
		    if (k > MAX_UPARA) break;
		}
	    }
	    if (enc->num_pmodel > 1) {
		for (gr = 0; gr < enc->num_group; gr++) {
		    pm = enc->pmlist[gr];
		    rc_encode(fp, enc->rc, pm->id, 1, enc->num_pmodel);
		}
	    }
	    bits = enc->rc->code;
	    enc->rc->code = 0;
	}
    }
    return (bits);
}

int encode_error(FILE *fp, ENCODER *enc)
{
    int x, y, e, prd, base, bits, gr, cumbase;
    PMODEL *pm;

    bits = 0;
    if (enc->f_huffman == 1) {	/* Huffman */
	VLC *vlc;
	for (y = 0; y < enc->height; y++) {
	    for (x = 0; x < enc->width; x++) {
		gr = enc->group[y][x];
		e = enc->epara[y][x];
		pm = enc->pmlist[gr];
		vlc = &enc->vlcs[gr][pm->id];
		bits += putbits(fp, vlc->len[e], vlc->code[e]);
	    }
	}
	putbits(fp, 7, 0);	/* flush remaining bits */
    } else {			/* Arithmetic */
	for (y = 0; y < enc->height; y++) {
	    for (x = 0; x < enc->width; x++) {
		gr = enc->group[y][x];
		prd = enc->prd[y][x];
		e = enc->epara[y][x];
		base = enc->bconv[prd];
		pm = enc->pmlist[gr] + enc->fconv[prd];
		cumbase = pm->cumfreq[base];
		rc_encode(fp, enc->rc,
			  pm->cumfreq[base + e] - cumbase,
			  pm->freq[base + e],
			  pm->cumfreq[base + enc->maxval + 1] - cumbase);
	    }
	}
	rc_finishenc(fp, enc->rc);
	bits += enc->rc->code;
    }
    return (bits);
}

int main(int argc, char **argv)
{
    cost_t cost, min_cost, side_cost;
    int i, j, k, cl, bits, **prd_save;
    double rate;
    IMAGE *img;
    ENCODER *enc;
    double elapse = 0.0;
    int f_mmse = 0;
    int f_optpred = 0;
    int f_huffman = 0;
    int num_class = NUM_CLASS;
    int num_group = NUM_GROUP;
    int prd_order = PRD_ORDER;
    int coef_precision = COEF_PRECISION;
    int num_pmodel = NUM_PMODEL;
    int pm_accuracy = PM_ACCURACY;
    int max_iteration = MAX_ITERATION;
    char *infile, *outfile;
    FILE *fp;

    cpu_time();
    setbuf(stdout, 0);
    infile = outfile = NULL;
    for (i = 1; i < argc; i++) {
	if (argv[i][0] == '-') {
	    switch (argv[i][1]) {
		case 'M':
		    num_class = atoi(argv[++i]);
		    if (num_class <= 0 || num_class > 63) {
			num_class = NUM_CLASS;
		    }
		    break;
		case 'K':
		    prd_order = atoi(argv[++i]);
		    if (prd_order <= 0 || prd_order > 56) {
			prd_order = PRD_ORDER;
		    }
		    break;
		case 'P':
		    coef_precision = atoi(argv[++i]);
		    if (coef_precision <= 0 || coef_precision > 16) {
			coef_precision = COEF_PRECISION;
		    }
		    break;
		case 'V':
		    num_pmodel = atoi(argv[++i]);
		    if (num_pmodel <= 0 || num_pmodel > 64) {
			num_pmodel = NUM_PMODEL;
		    }
		    break;
		case 'A':
		    pm_accuracy = atoi(argv[++i]);
		    if (pm_accuracy < -1 || pm_accuracy > 6) {
			pm_accuracy = PM_ACCURACY;
		    }
		    break;
		case 'I':
		    max_iteration = atoi(argv[++i]);
		    if (max_iteration <= 0) {
			max_iteration = MAX_ITERATION;
		    }
		    break;
		case 'm':
		    f_mmse = 1;
		    break;
		case 'o':
		    f_optpred = 1;
		    break;
		case 'h':
		    f_huffman = 1;
		    break;
		default:
		    fprintf(stderr, "Unknown option: %s!\n", argv[i]);
		    exit (1);
	    }
	} else {
	    if (infile == NULL) {
		infile = argv[i];
	    } else {
		outfile = argv[i];
	    }
	}
    }
    if (f_huffman == 1) pm_accuracy = -1;
    if (pm_accuracy > coef_precision) pm_accuracy = coef_precision;
    if (infile == NULL || outfile == NULL) {
	printf(BANNER"\n", 0.1 * VERSION);
	printf("usage: encmrp [options] infile outfile\n");
	printf("options:\n");
	printf("    -M num  Number of predictors [%d]\n", num_class);
	printf("    -K num  Prediction order [%d]\n", prd_order);
	printf("    -P num  Precision of prediction coefficients (fractional bits) [%d]\n", coef_precision);
	printf("    -V num  Number of probability models [%d]\n", num_pmodel);
	printf("    -A num  Accuracy of probability models [%d]\n", pm_accuracy);
	printf("    -I num  Maximum number of iterations [%d]\n", max_iteration);
	printf("    -m      Use MMSE predictors\n");
	printf("    -h      Use Huffman coding\n");
	printf("    -o      Further optimization of predictors (experimental)\n");
	printf("infile:     Input file (must be in a raw PGM format)\n");
	printf("outfile:    Output file\n");
	exit(0);
    }
    img = read_pgm(infile);
    fp = fileopen(outfile, "wb");
    k = img->width * img->height;
    if (num_class < 0) {
	num_class = 7.5E-5 * k + 10.5;
	if (num_class > 63) num_class = 63;
    }
    if (prd_order < 0) {
	prd_order = 6.5E-5 * k + 17;
	for (i = 1; i < 7; i++) {
	    if (prd_order < (i+1) * (i+1)) {
		prd_order = i * (i+1);
		break;
	    }
	}
	if (i >= 7) prd_order = 56;
    }
    printf("%s -> %s (%dx%d)\n", infile, outfile, img->width, img->height);
    printf("M = %d, K = %d, P = %d, V = %d, A = %d\n",
	   num_class, prd_order, coef_precision, num_pmodel, pm_accuracy);
    enc = init_encoder(img, num_class, num_group, prd_order,
		       coef_precision, f_huffman, num_pmodel, pm_accuracy);
    enc->pmodels = init_pmodels(enc->num_group, enc->num_pmodel,
				enc->pm_accuracy, NULL, enc->sigma,
				enc->maxval + 1);
    if (enc->f_huffman == 1) {
	enc->vlcs = init_vlcs(enc->pmodels, enc->num_group, enc->num_pmodel);
    }
    set_cost_model(enc, f_mmse);
    init_class(enc);
    prd_save = (int **)alloc_2d_array(enc->num_class, enc->prd_order,
				      sizeof(int));
    /* 1st loop */
    min_cost = INT_MAX;
    for (i = j = 0; i < max_iteration; i++) {
	printf("[%2d] cost =", i);
	cost = design_predictor(enc, f_mmse);
	printf(" %d ->", (int)cost);
	cost = optimize_group(enc);
	printf(" %d ->", (int)cost);
	cost = optimize_class(enc);
	printf(" %d", (int)cost);
	if (cost < min_cost) {
	    printf(" *\n");
	    min_cost = cost;
	    j = i;
	    for (cl = 0; cl < enc->num_class; cl++) {
		for (k= 0; k < enc->prd_order; k++) {
		    prd_save[cl][k] = enc->predictor[cl][k];
		}
	    }
	} else {
	    printf("\n");
	}
	if (i - j >= EXTRA_ITERATION) break;
	elapse += cpu_time();
    }
    for (cl = 0; cl < enc->num_class; cl++) {
	for (k= 0; k < enc->prd_order; k++) {
	    enc->predictor[cl][k] = prd_save[cl][k];
	}
    }
    set_cost_rate(enc);
    predict_region(enc, 0, 0, enc->height, enc->width);
    cost = calc_cost(enc, 0, 0, enc->height, enc->width);
    printf("cost = %d\n", (int)cost);

    /* 2nd loop */
    min_cost = INT_MAX;
    for (i = j = 0; i < max_iteration; i++) {
	printf("(%2d) cost =", i);
	if (f_optpred) {
	    cost = optimize_predictor(enc);
	    printf(" %d ->", (int)cost);
	}
	side_cost = encode_predictor(NULL, enc);
	cost = optimize_group(enc);
	side_cost += encode_threshold(NULL, enc);
	printf(" %d ->", (int)cost);
	cost = optimize_class(enc);
	side_cost += encode_class(NULL, enc);
	printf(" %d (%d)", (int)cost, (int)side_cost);
	cost += side_cost;
	if (cost < min_cost) {
	    printf(" *\n");
	    min_cost = cost;
	    j = i;
	    if (f_optpred) {
		for (cl = 0; cl < enc->num_class; cl++) {
		    for (k= 0; k < enc->prd_order; k++) {
			prd_save[cl][k] = enc->predictor[cl][k];
		    }
		}
	    }
	} else {
	    printf("\n");
	}
	if (f_optpred) {
	    if (i - j >= EXTRA_ITERATION) break;
	} else {
	    if (i > j) break;
	}
	elapse += cpu_time();
    }
    if (f_optpred) {
	for (cl = 0; cl < enc->num_class; cl++) {
	    for (k= 0; k < enc->prd_order; k++) {
		enc->predictor[cl][k] = prd_save[cl][k];
	    }
	}
	predict_region(enc, 0, 0, enc->height, enc->width);
	calc_cost(enc, 0, 0, enc->height, enc->width);
	optimize_group(enc);
	optimize_class(enc);
    }

    remove_emptyclass(enc);
    bits = k = write_header(enc, fp);
    printf("header info.\t:%10d bits\n", k);
    if (enc->f_huffman == 0) {
	enc->rc = rc_init();
	putbits(fp, 7, 0);	/* byte alignment for the rangecoder */
    }	
    bits += k = encode_class(fp, enc);
    printf("class info.\t:%10d bits\n", k);
    bits += k = encode_predictor(fp, enc);
    printf("predictors\t:%10d bits\n", k);
    bits += k = encode_threshold(fp, enc);
    printf("thresholds\t:%10d bits\n", k);
    bits += k = encode_error(fp, enc);
    printf("pred. errors\t:%10d bits\n", k);
    printf("------------------------------\n");
    printf("total\t\t:%10d bits\n", bits);
    rate = (double)bits / (enc->height * enc->width);
    printf("coding rate\t:%10.5f b/p\n", rate);
    fclose(fp);
    elapse += cpu_time();
    printf("cpu time :%.2f sec.\n", elapse);
    return (0);
}
