/*
  class for decoders
  Copyright (C) 1999  Martin Vogt

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation.

  For more information look at the file COPYRIGHT in this package

 */


#include "decoderClass.h"



#define NDEBUG
#include <assert.h>

/* Array mapping zigzag to array pointer offset. */


static const int zigzag_direct_nommx[64] = {
  0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12,
  19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35,
  42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
  58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};


static const int zigzag_direct_mmx[64] = {

    0*8+0/* 0*/, 1*8+0/* 1*/, 0*8+1/* 8*/, 0*8+2/*16*/, 1*8+1/* 9*/, 2*8+0/* 2*/, 3*8+0/* 3*/, 2*8+1/*10*/,
    1*8+2/*17*/, 0*8+3/*24*/, 0*8+4/*32*/, 1*8+3/*25*/, 2*8+2/*18*/, 3*8+1/*11*/, 4*8+0/* 4*/, 5*8+0/* 5*/,
    4*8+1/*12*/, 5*8+2/*19*/, 2*8+3/*26*/, 1*8+4/*33*/, 0*8+5/*40*/, 0*8+6/*48*/, 1*8+5/*41*/, 2*8+4/*34*/,
    3*8+3/*27*/, 4*8+2/*20*/, 5*8+1/*13*/, 6*8+0/* 6*/, 7*8+0/* 7*/, 6*8+1/*14*/, 5*8+2/*21*/, 4*8+3/*28*/,
    3*8+4/*35*/, 2*8+5/*42*/, 1*8+6/*49*/, 0*8+7/*56*/, 1*8+7/*57*/, 2*8+6/*50*/, 3*8+5/*43*/, 4*8+4/*36*/,
    5*8+3/*29*/, 6*8+2/*22*/, 7*8+1/*15*/, 7*8+2/*23*/, 6*8+3/*30*/, 5*8+4/*37*/, 4*8+5/*44*/, 3*8+6/*51*/,
    2*8+7/*58*/, 3*8+7/*59*/, 4*8+6/*52*/, 5*8+5/*45*/, 6*8+4/*38*/, 7*8+3/*31*/, 7*8+4/*39*/, 6*8+5/*46*/,
    7*8+6/*53*/, 4*8+7/*60*/, 5*8+7/*61*/, 6*8+6/*54*/, 7*8+5/*47*/, 7*8+6/*55*/, 6*8+7/*62*/, 7*8+7/*63*/
};



DecoderClass::DecoderClass(VidStream* vid_stream) {
  this->vid_stream=vid_stream;
#ifdef INTEL
  cout << "check mm support"<<endl;
  lmmx=mm_support();
  if (lmmx) {
    cout << "Jau! Intel MMX"<<endl;
  } else {
    cout << "Sorry folks, no MMX."<<endl;
  }
#else
  lmmx=false;
  cout << "no INTEL arch- disable MMX"<<endl;

#endif

  int i;
  for(i=0;i<64;i++) {
    zigzag_direct[i]=zigzag_direct_nommx[i];
  }  
  if (lmmx) {
    for(i=0;i<64;i++) {
      zigzag_direct[i]=zigzag_direct_mmx[i];
    }
  } 
  for(i=64;i<256;i++) {
      zigzag_direct[i]=0;
  }
}
 

DecoderClass::~DecoderClass() {
}
 


int DecoderClass::decodeDCTDCSizeLum() {
  unsigned int macro_val;
  unsigned int index;
  index=(vid_stream->bitwindow)->showBits(5);
  if (index < 31) {
    macro_val = dct_dc_size_luminance[index].value;
    (vid_stream->bitwindow)->flushBits(dct_dc_size_luminance[index].num_bits);
  } else {
    index=(vid_stream->bitwindow)->showBits(9);	
    index -= 0x1f0;
    macro_val = dct_dc_size_luminance1[index].value;
    (vid_stream->bitwindow)->flushBits(dct_dc_size_luminance1[index].num_bits);
  }	
  return macro_val;
}


int DecoderClass::decodeDCTDCSizeChrom() {
  unsigned int macro_val;
  unsigned int index;
  index=(vid_stream->bitwindow)->showBits(5);
  if (index < 31) {
    macro_val = dct_dc_size_chrominance[index].value;
    (vid_stream->bitwindow)->flushBits(dct_dc_size_chrominance[index].num_bits);
  }else {
    index=(vid_stream->bitwindow)->showBits(10);
    index -= 0x3e0;
    macro_val = dct_dc_size_chrominance1[index].value;
    (vid_stream->bitwindow)->flushBits(dct_dc_size_chrominance1[index].num_bits);
  }
  return macro_val;
}

/*
 *--------------------------------------------------------------
 *
 * DecodeMBAddrInc --
 *
 *      Huffman DecoderClass for macro_block_address_increment; the location
 *      in which the result will be placed is being passed as argument.
 *      The decoded value is obtained by doing a table lookup on
 *      mb_addr_inc.
 *
 * Results:
 *      The decoded value for macro_block_address_increment or ERROR
 *      for unbound values will be placed in the location specified.
 *
 * Side effects:
 *      Bit stream is irreversibly parsed.
 *
 *--------------------------------------------------------------
 */
int DecoderClass::decodeMBAddrInc() {
  unsigned int index;
  int val;
  index=(vid_stream->bitwindow)->showBits(11);
  val = mb_addr_inc[index].value;
  (vid_stream->bitwindow)->flushBits(mb_addr_inc[index].num_bits);
  if (mb_addr_inc[index].num_bits==0) {
    printf("num_bits==0\n");
    val=1;
  }
  if (val==MB_ESCAPE) {
    vid_stream->mblock.mb_address += 33;
    val=MB_STUFFING;
  }
  if (val == -1) {
    cout <<"EROR: decodeMBAddrInc"<<endl;
    val=MB_STUFFING;
  }
  return val;
}


/*
 *--------------------------------------------------------------
 *
 * DecodeMotionVectors --
 *
 *      Huffman DecoderClass for the various motion vectors, including
 *      motion_horizontal_forward_code, motion_vertical_forward_code,
 *      motion_horizontal_backward_code, motion_vertical_backward_code.
 *      Location where the decoded result will be placed is being passed
 *      as argument. The decoded values are obtained by doing a table
 *      lookup on motion_vectors.
 *
 * Results:
 *      The decoded value for the motion vector or ERROR for unbound
 *      values will be placed in the location specified.
 *
 * Side effects:
 *      Bit stream is irreversibly parsed.
 *
 *--------------------------------------------------------------
 */
int DecoderClass::decodeMotionVectors() {
  unsigned int index;
  int value;
  index=(vid_stream->bitwindow)->showBits(11);
  value = motion_vectors[index].code;

  (vid_stream->bitwindow)->flushBits(motion_vectors[index].num_bits);
  return value;
}


/*
 *--------------------------------------------------------------
 *
 * DecodeCBP --
 *
 *      Huffman DecoderClass for coded_block_pattern; location in which the
 *      decoded result will be placed is being passed as argument. The
 *      decoded values are obtained by doing a table lookup on
 *      coded_block_pattern.
 *
 * Results:
 *      The decoded value for coded_block_pattern or ERROR for unbound
 *      values will be placed in the location specified.
 *
 * Side effects:
 *      Bit stream is irreversibly parsed.
 *
 *--------------------------------------------------------------
 */
int DecoderClass::decodeCBP() {
  unsigned int index;
  unsigned int coded_bp;
  index=(vid_stream->bitwindow)->showBits(9);
  coded_bp = coded_block_pattern[index].cbp;
  (vid_stream->bitwindow)->flushBits(coded_block_pattern[index].num_bits);
  return coded_bp;
}







/*
 *--------------------------------------------------------------
 *
 * DecodeMBTypeB --
 *
 *      Huffman Decoder for macro_block_type in bidirectionally-coded
 *      pictures;locations in which the decoded results: macroblock_quant,
 *      macroblock_motion_forward, macro_block_motion_backward,
 *      macroblock_pattern, macro_block_intra, will be placed are
 *      being passed as argument. The decoded values are obtained by
 *      doing a table lookup on mb_type_B.
 *
 * Results:
 *      The various decoded values for macro_block_type in
 *      bidirectionally-coded pictures or ERROR for unbound values will
 *      be placed in the locations specified.
 *
 * Side effects:
 *      Bit stream is irreversibly parsed.
 *
 *--------------------------------------------------------------
 */
void DecoderClass::decodeMBTypeB(int& quant,int& motion_fwd,
				 int& motion_bwd,int& pat,int& intra){
  unsigned int index;							
									
  index=(vid_stream->bitwindow)->showBits(6);				
									
  quant = mb_type_B[index].mb_quant;					
  motion_fwd = mb_type_B[index].mb_motion_forward;			
  motion_bwd = mb_type_B[index].mb_motion_backward;			
  pat = mb_type_B[index].mb_pattern;					
  intra = mb_type_B[index].mb_intra;					
  if (index == 0) {
    cout << "error in decodeMBTypeB"<<endl;
  }
  (vid_stream->bitwindow)->flushBits(mb_type_B[index].num_bits);	
}


/*
 *--------------------------------------------------------------
 *
 * DecodeMBTypeI --
 *
 *      Huffman Decoder for macro_block_type in intra-coded pictures;
 *      locations in which the decoded results: macroblock_quant,
 *      macroblock_motion_forward, macro_block_motion_backward,
 *      macroblock_pattern, macro_block_intra, will be placed are
 *      being passed as argument.
 *
 * Results:
 *      The various decoded values for macro_block_type in intra-coded
 *      pictures or ERROR for unbound values will be placed in the
 *      locations specified.
 *
 * Side effects:
 *      Bit stream is irreversibly parsed.
 *
 *--------------------------------------------------------------
 */
void DecoderClass::decodeMBTypeI(int& quant,int& motion_fwd,
				 int& motion_bwd,int& pat,int& intra) {

  unsigned int index;							
  static int quantTbl[4] = {ERROR, 1, 0, 0};				
									
  index=(vid_stream->bitwindow)->showBits(2);				
									
  motion_fwd = 0;							
  motion_bwd = 0;							
  pat = 0;								
  intra = 1;								
  quant = quantTbl[index];						
  if (quant == ERROR) {
    cout << "decodeMBTypeI Error"<<endl;
  }
  if (index) {								
    (vid_stream->bitwindow)->flushBits (1 + quant);			
  }									

}



/*
 *--------------------------------------------------------------
 *
 * DecodeMBTypeP --
 *
 *      Huffman Decoder for macro_block_type in predictive-coded pictures;
 *      locations in which the decoded results: macroblock_quant,
 *      macroblock_motion_forward, macro_block_motion_backward,
 *      macroblock_pattern, macro_block_intra, will be placed are
 *      being passed as argument. The decoded values are obtained by
 *      doing a table lookup on mb_type_P.
 * 
 * Results:
 *      The various decoded values for macro_block_type in
 *      predictive-coded pictures or ERROR for unbound values will be
 *      placed in the locations specified.
 *
 * Side effects:
 *      Bit stream is irreversibly parsed.
 *
 *--------------------------------------------------------------
 */
void DecoderClass::decodeMBTypeP(int& quant,int& motion_fwd,
				 int& motion_bwd,int& pat,int& intra) {

  unsigned int index;							
  
  index=(vid_stream->bitwindow)->showBits(6);				
  
  quant = mb_type_P[index].mb_quant;					
  motion_fwd = mb_type_P[index].mb_motion_forward;			
  motion_bwd = mb_type_P[index].mb_motion_backward;			
  pat = mb_type_P[index].mb_pattern;					
  intra = mb_type_P[index].mb_intra;					
  if (index == 0) {
    cout << "error in decodeMBTypeP"<<endl;
  }
  (vid_stream->bitwindow)->flushBits(mb_type_P[index].num_bits);	
}

/*
 *--------------------------------------------------------------
 *
 * decodeDCTCoeff --
 *
 *      Huffman Decoder for dct_coeff_first and dct_coeff_next;
 *      locations where the results of decoding: run and level, are to
 *      be placed and also the type of DCT coefficients, either
 *      dct_coeff_first or dct_coeff_next, are being passed as argument.
 *      
 *      The decoder first examines the next 8 bits in the input stream,
 *      and perform according to the following cases:
 *      
 *      '0000 0000' - examine 8 more bits (i.e. 16 bits total) and
 *                    perform a table lookup on dct_coeff_tbl_0.
 *                    One more bit is then examined to determine the sign
 *                    of level.
 *
 *      '0000 0001' - examine 4 more bits (i.e. 12 bits total) and 
 *                    perform a table lookup on dct_coeff_tbl_1.
 *                    One more bit is then examined to determine the sign
 *                    of level.
 *      
 *      '0000 0010' - examine 2 more bits (i.e. 10 bits total) and
 *                    perform a table lookup on dct_coeff_tbl_2.
 *                    One more bit is then examined to determine the sign
 *                    of level.
 *
 *      '0000 0011' - examine 2 more bits (i.e. 10 bits total) and 
 *                    perform a table lookup on dct_coeff_tbl_3.
 *                    One more bit is then examined to determine the sign
 *                    of level.
 *
 *      otherwise   - perform a table lookup on dct_coeff_tbl. If the
 *                    value of run is not ESCAPE, extract one more bit
 *                    to determine the sign of level; otherwise 6 more
 *                    bits will be extracted to obtain the actual value 
 *                    of run , and then 8 or 16 bits to get the value of level.
 *                    
 *      
 *
 * Results:
 *      The decoded values of run and level or ERROR for unbound values
 *      are placed in the locations specified.
 *
 * Side effects:
 *      Bit stream is irreversibly parsed.
 *
 *--------------------------------------------------------------
 */

void DecoderClass::decodeDCTCoeff(unsigned short int* dct_coeff_tbl,
				  unsigned RUNTYPE& run,
				  int& level) {


  unsigned int temp, index;
  unsigned int value, next32bits, flushed;
  MpegPlayBitWindow* bitwindow=vid_stream->bitwindow;
  /*									
   * Grab the next 32 bits and use it to improve performance of		
   * getting the bits to parse. Thus, calls are translated as:		
   *									
   *	show_bitsX  <-->   next32bits >> (32-X)				
   *	get_bitsX   <-->   val = next32bits >> (32-flushed-X);		
   *			   flushed += X;				
   *			   next32bits &= bitMask[flushed];		
   *	flush_bitsX <-->   flushed += X;				
   *			   next32bits &= bitMask[flushed];		
   *									
   * I've streamlined the code a lot, so that we don't have to mask	
   * out the low order bits and a few of the extra adds are removed.	
   */									

  next32bits=bitwindow->showBits32(); 
  /* show_bits8(index); */						
  index = next32bits >> 24;						
									
  if (index > 3) {							
    value = dct_coeff_tbl[index];					
    run = value >> RUN_SHIFT;						
    if (run != END_OF_BLOCK) {						
      /* num_bits = (value & NUM_MASK) + 1; */				
      /* flush_bits(num_bits); */					
      if (run != ESCAPE) {						
	 /* get_bits1(value); */					
	 /* if (value) level = -level; */				
	 flushed = (value & NUM_MASK) + 2;				
         level = (value & LEVEL_MASK) >> (LEVEL_SHIFT-1);
	 value = next32bits >> (32-flushed);				

	 if (value&0x1) level *= -1;


	 /* next32bits &= ((~0) >> flushed);  last op before update */	
       }								
       else {    /* run == ESCAPE */					
	 /* Get the next six into run, and next 8 into temp */		
         /* get_bits14(temp); */					
	 flushed = (value & NUM_MASK) + 1;				
	 temp = next32bits >> (18-flushed);				
	 /* Normally, we'd ad 14 to flushed, but I've saved a few	
	  * instr by moving the add below */				
	 temp &= 0x3fff;						
	 run = temp >> 8;						
	 temp &= 0xff;							
	 if (temp == 0) {						
            /* get_bits8(level); */					
	    level = next32bits >> (10-flushed);				
	    level &= 0xff;						
	    level +=level;

	    flushed += 22;						
 	    assert(level >= 128);					
	 } else if (temp != 128) {					
	    /* Grab sign bit */						
	    flushed += 14;
	    level = ((int) (temp << 24)) >> 24-1;


	 } else {							
            /* get_bits8(level); */					
	    level = next32bits >> (10-flushed);				
	    level &= 0xff;						
	    flushed += 22;						
	    level -= 256;
	    level +=level;

	 }								
       }
       /* Update bitstream... */					
      bitwindow->flushBitsDirect(flushed);
    }
  }									
  else {								
   switch (index) {                                                    
    case 2: {   							
      /* show_bits10(index); */						
      index = next32bits >> 22;						
      value = dct_coeff_tbl_2[index & 3];				
      break;                                                            
    }									
    case 3: { 						                
      /* show_bits10(index); */						
      index = next32bits >> 22;						
      value = dct_coeff_tbl_3[index & 3];				
      break;                                                            
    }									
    case 1: {                                             		
      /* show_bits12(index); */						
      index = next32bits >> 20;						
      value = dct_coeff_tbl_1[index & 15];				
      break;                                                            
    }									
    default: { /* index == 0 */						
      /* show_bits16(index); */						
      index = next32bits >> 16;						
      value = dct_coeff_tbl_0[index & 255];				
    }}									
    run = value >> RUN_SHIFT;						
    level = (value & LEVEL_MASK) >> (LEVEL_SHIFT-1);

    /*									
     * Fold these operations together to make it fast...		
     */									
    /* num_bits = (value & NUM_MASK) + 1; */				
    /* flush_bits(num_bits); */						
    /* get_bits1(value); */						
    /* if (value) level = -level; */					
									
    flushed = (value & NUM_MASK) + 2;					
    value = next32bits >> (32-flushed);					

    if (value&0x1) level *= -1;						
									
    /* Update bitstream ... */						
    bitwindow->flushBitsDirect(flushed);			
  }

}




/*
 *--------------------------------------------------------------
 *
 * ParseReconBlock --
 *
 *    Parse values for block structure from bitstream.
 *      n is an indication of the position of the block within
 *      the macroblock (i.e. 0-5) and indicates the type of 
 *      block (i.e. luminance or chrominance). Reconstructs
 *      coefficients from values parsed and puts in 
 *      block.dct_recon array in vid stream structure.
 *      sparseFlag is set when the block contains only one
 *      coeffictient and is used by the IDCT.
 *
 * Results:
 *    
 *
 * Side effects:
 *      Bit stream irreversibly parsed.
 *
 *--------------------------------------------------------------
 */

#define DCT_dc_y_past blockPtr->dct_dc_y_past
#define DCT_dc_cr_past blockPtr->dct_dc_cr_past
#define DCT_dc_cb_past blockPtr->dct_dc_cb_past


void DecoderClass::ParseReconBlock(int n) {

   

  Block *blockPtr = &vid_stream->block;
  MpegPlayBitWindow* bitwindow=vid_stream->bitwindow;
  int coeffCount=0;
  if (bitwindow->hasBytes(512) == false) {
    cout << "cannot get 512 raw bytes"<<endl;
    return;
  }

  {
    /*
     * Copy the VidStream fields curBits, bitOffset, and bitBuffer
     * into local variables with the same names, so the macros use the
     * local variables instead.  This allows register allocation and
     * can provide 1-2 fps speedup.  On machines with not so many registers,
     * don't do this.
     */
    int diff;
    int size,  pos, coeff;
    int level=0;
    unsigned RUNTYPE run;
    unsigned RUNTYPE i;
   
    short int *reconptr;
    unsigned char* iqmatrixptr;
    unsigned char* niqmatrixptr;
    int qscale;

    reconptr = blockPtr->dct_recon[0];

    /* 
     * Hand coded version of memset that's a little faster...
     * Old call:
     *    memset((char *) DCT_recon, 0, 64*sizeof(short int));
     */
    memset((char *) blockPtr->dct_recon, 0, 64*sizeof(short int));

    if (vid_stream->mblock.mb_intra) {
          
      if (n < 4) {
	
	/*
	 * Get the luminance bits.  This code has been hand optimized to
	 * get by the normal bit parsing routines.  We get some speedup
	 * by grabbing the next 16 bits and parsing things locally.
	 * Thus, calls are translated as:
	 *
	 *    show_bitsX  <-->   next16bits >> (16-X)
	 *    get_bitsX   <-->   val = next16bits >> (16-flushed-X);
	 *               flushed += X;
	 *               next16bits &= bitMask[flushed];
	 *    flush_bitsX <-->   flushed += X;
	 *               next16bits &= bitMask[flushed];
	 *
	 * I've streamlined the code a lot, so that we don't have to mask
	 * out the low order bits and a few of the extra adds are removed.
	 *    bsmith
	 */
	unsigned int next16bits, index, flushed;
        next16bits=bitwindow->showBits(16);

        index = next16bits >> (16-5);
        if (index < 31) {
          size = dct_dc_size_luminance[index].value;
          flushed = dct_dc_size_luminance[index].num_bits;
        } else {
          index = next16bits >> (16-9);
          index -= 0x1f0;
          size = dct_dc_size_luminance1[index].value;
          flushed = dct_dc_size_luminance1[index].num_bits;
        }
        next16bits &= bitMask[(16+flushed)&0x1f];
        if (size != 0) {
          flushed += size;
          diff = next16bits >> (16-flushed);
          if (!(diff & bitTest[32-size])) {
            diff = rBitMask[size&0x1f] | (diff + 1);
          }
        } else {
          diff = 0;
        }
        bitwindow->flushBitsDirect(flushed);
	
        if (n == 0) {
          coeff = diff << 3;
          if (vid_stream->mblock.mb_address -
              vid_stream->mblock.past_intra_addr > 1) {
            coeff += 1024;
          } else {
	    coeff += DCT_dc_y_past;
          }
          DCT_dc_y_past = coeff;
        } else {
          coeff = DCT_dc_y_past + (diff << 3);
          DCT_dc_y_past = coeff;
        }

      } else { /* n = 4 or 5 */
	/*
	 * Get the chrominance bits.  This code has been hand optimized to
	 * as described above
	 */
	
	unsigned int next16bits, index, flushed;
        next16bits=bitwindow->showBits(16);
	
        index = next16bits >> (16-5);
        if (index < 31) {
          size = dct_dc_size_chrominance[index].value;
          flushed = dct_dc_size_chrominance[index].num_bits;
        } else {
          index = next16bits >> (16-10);
          index -= 0x3e0;
          size = dct_dc_size_chrominance1[index].value;
          flushed = dct_dc_size_chrominance1[index].num_bits;
        }
        next16bits &= bitMask[(16+flushed)&0x1f];

        if (size != 0) {
          flushed += size;
          diff = next16bits >> (16-flushed);
          if (!(diff & bitTest[32-size])) {
            diff = rBitMask[size&0x1f] | (diff + 1);
          }
        } else {
          diff = 0;
        }
        bitwindow->flushBitsDirect(flushed);

	/* We test 5 first; a result of the mixup of Cr and Cb */
        if (n == 5) {
          coeff = diff << 3;
	  
          if (vid_stream->mblock.mb_address -
              vid_stream->mblock.past_intra_addr > 1) {
            coeff += 1024;
          } else {
            coeff += DCT_dc_cr_past;
          }
          DCT_dc_cr_past = coeff;
        } else {
          coeff = diff << 3;
          if (vid_stream->mblock.mb_address -
              vid_stream->mblock.past_intra_addr > 1) {
            coeff += 1024;
          } else {
            coeff += DCT_dc_cb_past;
          }
          DCT_dc_cb_past = coeff;
        }
      }
    
      *reconptr = coeff;
      if (lmmx) {
	*reconptr <<= 4;
      }


      i = 0; 
      pos = 0;
      coeffCount = (coeff != 0);
      
      if (vid_stream->picture.code_type != D_TYPE) {
        qscale = (vid_stream->slice)->getQuantScale();
        iqmatrixptr = vid_stream->sequence.intra_quant_matrix[0];

        while(1) {
	  decodeDCTCoeff(dct_coeff_next,run,level);

          if (run >= END_OF_BLOCK) {
	    break;
	  }
	  
          i += run + 1;
          pos = zigzag_direct[i&0x3f];

	  // MMX check
	  if (lmmx) {
	    coeff = (level*qscale*iqmatrixptr[pos]);
	    if (level < 0) {
	      coeff -=16;
	    } else {
	      coeff +=16;
	    }
	    coeff |=16;
	  } else {
	    coeff = (level*qscale*((int) (iqmatrixptr[pos]))) / 16; 
	    if (level < 0) {
	      coeff = (coeff-1) | 1;
	    } else {
	      coeff = (coeff+1) | 1;
	    }
	  }



	  reconptr[pos] = coeff;
          coeffCount++;
	  
        }
        bitwindow->flushBitsDirect(2);

	goto end;
      }
    
    } else { /* non-intra-coded macroblock */
      niqmatrixptr = vid_stream->sequence.non_intra_quant_matrix[0];
      qscale = (vid_stream->slice)->getQuantScale();

      decodeDCTCoeff(dct_coeff_first,run,level);
      i = run;

      pos = zigzag_direct[i&0x3f];
      
      /* quantizes and oddifies each coefficient */
      if (lmmx) {
	if (level < 0) {
	  coeff = ((level - 1) * qscale * ((int) (niqmatrixptr[pos]))) ; 
	  coeff -=16;
	} else {
	  coeff = ((level + 1) * qscale * ((int) (niqmatrixptr[pos]))) ; 
	  coeff +=16;
	}
	coeff |=16;
      } else {
	if (level < 0) {
	  coeff = ((level - 1) * qscale *  ((int) (niqmatrixptr[pos]))) / 16; 
	  coeff = (coeff-1) | 1;
	} else {
	  coeff = ((level + 1) * qscale *  ((int) (niqmatrixptr[pos]))) / 16; 
	  coeff = (coeff+1) | 1;

	}
      }

      
      reconptr[pos] = coeff;
      if (coeff) {
	coeffCount = 1;
      }
      
      if (vid_stream->picture.code_type != D_TYPE) {

        while(1) {
	  decodeDCTCoeff(dct_coeff_next,run,level);

          if (run >= END_OF_BLOCK) {
	    break;
          }

          i+=run+1;
          pos = zigzag_direct[i&0x3f];
	  if (lmmx) {
	    if (level < 0) {
	      coeff = ((level - 1) * qscale * ((int) (niqmatrixptr[pos]))) ; 
	      coeff -=16;
	    } else {
	      coeff = ((level + 1) * qscale * ((int) (niqmatrixptr[pos]))) ; 
	      coeff +=16;
	    }
	    coeff |=16;
	  } else {
	    if (level < 0) {
	      coeff = ((level - 1) * qscale * ((int) (niqmatrixptr[pos])))/16; 
	      coeff = (coeff-1) | 1;
	    } else {
	      coeff = ((level + 1) * qscale * 
		       ((int) (niqmatrixptr[pos]))) / 16; 
	      coeff = (coeff+1) | 1;
	      
	    }
	  }
	  reconptr[pos] = coeff;
	  coeffCount++;
        } /* end while */

        bitwindow->flushBitsDirect(2);
	
        goto end;
      } /* end if (vid_stream->picture.code_type != D_TYPE) */
    }
  
  end:
    if (coeffCount == 1) {

      if (lmmx) {
	IDCT_mmx(reconptr);
      } else {
	j_rev_dct_sparse (reconptr, pos);
      }

    } else {

      if (lmmx) {
	IDCT_mmx(reconptr);
      } else {
	j_rev_dct(reconptr);
      }

    }
  }
  if (lmmx) {
    emms();
  }

  return;

}

    
#undef DCT_recon 
#undef DCT_dc_y_past 
#undef DCT_dc_cr_past 
#undef DCT_dc_cb_past 



int DecoderClass::ParseStuffing() {
  int addr_incr;
  /*
   * Parse off macroblock address increment and add to macroblock address.
   */
  do {
    addr_incr=decodeMBAddrInc();
  } while (addr_incr == MB_STUFFING);
  return addr_incr;
}



void DecoderClass::print() {
  int i;
  for(i=0;i<64;i++) {
    printf(" %d ",zigzag_direct[i]);
  }  
  printf("\n");
}


















