Logo Search packages:      
Sourcecode: libtheora version File versions  Download package

decode.c

/********************************************************************
 *                                                                  *
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
 * by the Xiph.Org Foundation http://www.xiph.org/                  *
 *                                                                  *
 ********************************************************************

  function:
    last mod: $Id: decode.c 13911 2007-09-28 01:39:26Z tterribe $
  
 ********************************************************************/

#include <stdlib.h>
#include <string.h>
#include <ogg/ogg.h>
#include "decint.h"
#if defined(OC_DUMP_IMAGES)
# include <stdio.h>
# include "png.h"
#endif



/*No post-processing.*/
#define OC_PP_LEVEL_DISABLED  (0)
/*Keep track of DC qi for each block only.*/
#define OC_PP_LEVEL_TRACKDCQI (1)
/*Deblock the luma plane.*/
#define OC_PP_LEVEL_DEBLOCKY  (2)
/*Dering the luma plane.*/
#define OC_PP_LEVEL_DERINGY   (3)
/*Stronger luma plane deringing.*/
#define OC_PP_LEVEL_SDERINGY  (4)
/*Deblock the chroma planes.*/
#define OC_PP_LEVEL_DEBLOCKC  (5)
/*Dering the chroma planes.*/
#define OC_PP_LEVEL_DERINGC   (6)
/*Stronger chroma plane deringing.*/
#define OC_PP_LEVEL_SDERINGC  (7)
/*Maximum valid post-processing level.*/
#define OC_PP_LEVEL_MAX       (7)

/*Read 32 bits.
  *_ret is set to 0 on failure.
  Return: 0 on success, or a negative value on failure.*/
int theora_read32(oggpack_buffer *_opb,long *_ret){
  long ret1;
  long ret2;
  long mask;
  ret1=oggpackB_read(_opb,16);
  ret2=oggpackB_read(_opb,16);
  mask=ret2>>31;
  *_ret=((ret1<<16)|ret2)&~mask;
  return (int)mask;
}

/*Read n bits, where n <= 31 for libogg1.
  *_ret is set to 0 on failure.
  Return: 0 on success, or a negative value on failure.*/
int theora_read(oggpack_buffer *_opb,int _nbits,long *_ret){
  long mask;
  *_ret=oggpackB_read(_opb,_nbits);
  mask=*_ret>>31;
  *_ret&=~mask;
  return (int)mask;
}

/*Read 1 bit,
  *_ret is set to 0 on failure.
  Return: 0 on success, or a negative value on failure.*/
int theora_read1(oggpack_buffer *_opb,long *_ret){
  int mask;
  *_ret=oggpackB_read1(_opb);
  mask=(int)*_ret>>31;
  *_ret&=~mask;
  return mask;
}

/*Look ahead n bits, where n <= 31 for libogg1.
  In the event that there are some bits remaining, but fewer than n, then the
   remaining bits are returned, with the missing bits set to 0, and the
   function succeeds.
  The stream can be advanced afterwards with oggpackB_adv().
  *_ret is set to 0 on failure.
  Return: 0 on success, or a negative value on failure.*/
int theora_look(oggpack_buffer *_opb,int _nbits,long *_ret){
  int nbits;
  *_ret=oggpackB_look(_opb,_nbits);
  if(*_ret>=0)return 0;
  /*libogg1 fails if we try to look past the end of the stream.
    We might be looking ahead more bits than we actually need, however, and so
     we must return the ones that are actually there.*/
  /*There's no accessor for the storage field, which we need to figure out
     how many bits _are_ left in the buffer (without resorting to trial and
     error, which would be silly).*/
  nbits=(_opb->storage<<3)-oggpackB_bits(_opb);
  if(nbits>0){
    /*If there are some bits left, return them.*/
    *_ret=oggpackB_look(_opb,nbits)<<_nbits-nbits;
    /*Success should be guaranteed.*/
    return 0;
  }
  /*If there are no bits left, then we truly should fail.*/
  *_ret=0;
  return -1;
}


/*The mode alphabets for the various mode coding schemes.
  Scheme 0 uses a custom alphabet, which is not stored in this table.*/
static const int OC_MODE_ALPHABETS[7][OC_NMODES]={
  /*Last MV dominates */
  {
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
    OC_MODE_INTER_MV_FOUR
  },
  {
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
    OC_MODE_INTER_MV_FOUR
  },
  {
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
    OC_MODE_INTER_MV_FOUR
  },
  {
    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
    OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
    OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
  },
  /*No MV dominates.*/
  {
    OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
    OC_MODE_INTER_MV_FOUR
  },
  {
    OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
    OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
    OC_MODE_INTER_MV_FOUR
  },
  /*Default ordering.*/
  {
    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
    OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
    OC_MODE_INTER_MV_FOUR
  }
};



static int oc_sb_run_unpack(oggpack_buffer *_opb){
  long bits;
  int ret;
  /*Coding scheme:
       Codeword            Run Length
     0                       1
     10x                     2-3
     110x                    4-5
     1110xx                  6-9
     11110xxx                10-17
     111110xxxx              18-33
     111111xxxxxxxxxxxx      34-4129*/
  theora_read1(_opb,&bits);
  if(bits==0)return 1;
  theora_read(_opb,2,&bits);
  if((bits&2)==0)return 2+(int)bits;
  else if((bits&1)==0){
    theora_read1(_opb,&bits);
    return 4+(int)bits;
  }
  theora_read(_opb,3,&bits);
  if((bits&4)==0)return 6+(int)bits;
  else if((bits&2)==0){
    ret=10+((bits&1)<<2);
    theora_read(_opb,2,&bits);
    return ret+(int)bits;
  }
  else if((bits&1)==0){
    theora_read(_opb,4,&bits);
    return 18+(int)bits;
  }
  theora_read(_opb,12,&bits);
  return 34+(int)bits;
}

static int oc_block_run_unpack(oggpack_buffer *_opb){
  long bits;
  long bits2;
  /*Coding scheme:
     Codeword             Run Length
     0x                      1-2
     10x                     3-4
     110x                    5-6
     1110xx                  7-10
     11110xx                 11-14
     11111xxxx               15-30*/
  theora_read(_opb,2,&bits);
  if((bits&2)==0)return 1+(int)bits;
  else if((bits&1)==0){
    theora_read1(_opb,&bits);
    return 3+(int)bits;
  }
  theora_read(_opb,2,&bits);
  if((bits&2)==0)return 5+(int)bits;
  else if((bits&1)==0){
    theora_read(_opb,2,&bits);
    return 7+(int)bits;
  }
  theora_read(_opb,3,&bits);
  if((bits&4)==0)return 11+bits;
  theora_read(_opb,2,&bits2);
  return 15+((bits&3)<<2)+bits2;
}



static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
 const th_setup_info *_setup){
  int qti;
  int pli;
  int qi;
  int ret;
  ret=oc_state_init(&_dec->state,_info);
  if(ret<0)return ret;
  oc_huff_trees_copy(_dec->huff_tables,_setup->huff_tables);
  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
    _dec->state.dequant_tables[qti][pli]=
     _dec->state.dequant_table_data[qti][pli];
  }
  oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
   &_setup->qinfo);
  for(qi=0;qi<64;qi++){
    int qsum;
    qsum=0;
    for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
      qsum+=_dec->state.dequant_tables[qti][pli][qi][18]+
       _dec->state.dequant_tables[qti][pli][qi][19]+
       _dec->state.dequant_tables[qti][pli][qi][26]+
       _dec->state.dequant_tables[qti][pli][qi][27]<<(pli==0);
    }
    _dec->pp_sharp_mod[qi]=-(qsum>>11);
  }
  _dec->dct_tokens=(unsigned char **)oc_calloc_2d(64,
   _dec->state.nfrags,sizeof(_dec->dct_tokens[0][0]));
  _dec->extra_bits=(ogg_uint16_t **)oc_calloc_2d(64,
   _dec->state.nfrags,sizeof(_dec->extra_bits[0][0]));
  memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
   sizeof(_dec->state.loop_filter_limits));
  _dec->pp_level=OC_PP_LEVEL_DISABLED;
  _dec->dc_qis=NULL;
  _dec->variances=NULL;
  _dec->pp_frame_data=NULL;
  _dec->stripe_cb.ctx=NULL;
  _dec->stripe_cb.stripe_decoded=NULL;
  return 0;
}

static void oc_dec_clear(oc_dec_ctx *_dec){
  _ogg_free(_dec->pp_frame_data);
  _ogg_free(_dec->variances);
  _ogg_free(_dec->dc_qis);
  oc_free_2d(_dec->extra_bits);
  oc_free_2d(_dec->dct_tokens);
  oc_huff_trees_clear(_dec->huff_tables);
  oc_state_clear(&_dec->state);
}


static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
  long val;
  /*Check to make sure this is a data packet.*/
  theora_read1(&_dec->opb,&val);
  if(val!=0)return TH_EBADPACKET;
  /*Read in the frame type (I or P).*/
  theora_read1(&_dec->opb,&val);
  _dec->state.frame_type=(int)val;
  /*Read in the current qi.*/
  theora_read(&_dec->opb,6,&val);
  _dec->state.qis[0]=(int)val;
  theora_read1(&_dec->opb,&val);
  if(!val)_dec->state.nqis=1;
  else{
    theora_read(&_dec->opb,6,&val);
    _dec->state.qis[1]=(int)val;
    theora_read1(&_dec->opb,&val);
    if(!val)_dec->state.nqis=2;
    else{
      theora_read(&_dec->opb,6,&val);
      _dec->state.qis[2]=(int)val;
      _dec->state.nqis=3;
    }
  }
  if(_dec->state.frame_type==OC_INTRA_FRAME){
    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
      Most of the other unused bits in the VP3 headers were eliminated.
      I don't know why these remain.*/
    theora_read(&_dec->opb,3,&val);
    if(val!=0)return TH_EIMPL;
  }
  return 0;
}

/*Mark all fragments as coded and in OC_MODE_INTRA.
  This also builds up the coded fragment list (in coded order), and clears the
   uncoded fragment list.
  It does not update the coded macro block list, as that is not used when
   decoding INTRA frames.*/
static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
  oc_sb *sb;
  oc_sb *sb_end;
  int    pli;
  int    ncoded_fragis;
  int    prev_ncoded_fragis;
  prev_ncoded_fragis=ncoded_fragis=0;
  sb=sb_end=_dec->state.sbs;
  for(pli=0;pli<3;pli++){
    const oc_fragment_plane *fplane;
    fplane=_dec->state.fplanes+pli;
    sb_end+=fplane->nsbs;
    for(;sb<sb_end;sb++){
      int quadi;
      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
        int bi;
        for(bi=0;bi<4;bi++){
          int fragi;
          fragi=sb->map[quadi][bi];
          if(fragi>=0){
            oc_fragment *frag;
            frag=_dec->state.frags+fragi;
            frag->coded=1;
            frag->mbmode=OC_MODE_INTRA;
            _dec->state.coded_fragis[ncoded_fragis++]=fragi;
          }
        }
      }
    }
    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
    prev_ncoded_fragis=ncoded_fragis;
    _dec->state.nuncoded_fragis[pli]=0;
  }
}

/*Decodes the bit flags for whether or not each super block is partially coded
   or not.
  Return: The number of partially coded super blocks.*/
static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
  oc_sb *sb;
  oc_sb *sb_end;
  long   val;
  int    flag;
  int    npartial;
  int    run_count;
  theora_read1(&_dec->opb,&val);
  flag=(int)val;
  sb=_dec->state.sbs;
  sb_end=sb+_dec->state.nsbs;
  run_count=npartial=0;
  while(sb<sb_end){
    int full_run;
    run_count=oc_sb_run_unpack(&_dec->opb);
    full_run=run_count>=4129;
    do{
      sb->coded_partially=flag;
      sb->coded_fully=0;
      npartial+=flag;
      sb++;
    }
    while(--run_count>0&&sb<sb_end);
    if(full_run&&sb<sb_end){
      theora_read1(&_dec->opb,&val);
      flag=(int)val;
    }
    else flag=!flag;
  }
  /*TODO: run_count should be 0 here.
    If it's not, we should issue a warning of some kind.*/
  return npartial;
}

/*Decodes the bit flags for whether or not each non-partially-coded super
   block is fully coded or not.
  This function should only be called if there is at least one
   non-partially-coded super block.
  Return: The number of partially coded super blocks.*/
static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
  oc_sb *sb;
  oc_sb *sb_end;
  long   val;
  int    flag;
  int    run_count;
  sb=_dec->state.sbs;
  sb_end=sb+_dec->state.nsbs;
  /*Skip partially coded super blocks.*/
  for(;sb->coded_partially;sb++);
  theora_read1(&_dec->opb,&val);
  flag=(int)val;
  while(sb<sb_end){
    int full_run;
    run_count=oc_sb_run_unpack(&_dec->opb);
    full_run=run_count>=4129;
    for(;sb<sb_end;sb++){
      if(sb->coded_partially)continue;
      if(run_count--<=0)break;
      sb->coded_fully=flag;
    }
    if(full_run&&sb<sb_end){
      theora_read1(&_dec->opb,&val);
      flag=(int)val;
    }
    else flag=!flag;
  }
  /*TODO: run_count should be 0 here.
    If it's not, we should issue a warning of some kind.*/
}

static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
  oc_sb *sb;
  oc_sb *sb_end;
  long   val;
  int    npartial;
  int    pli;
  int    flag;
  int    run_count;
  int    ncoded_fragis;
  int    prev_ncoded_fragis;
  int    nuncoded_fragis;
  int    prev_nuncoded_fragis;
  npartial=oc_dec_partial_sb_flags_unpack(_dec);
  if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
  if(npartial>0){
    theora_read1(&_dec->opb,&val);
    flag=!(int)val;
  }
  else flag=0;
  run_count=0;
  prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
  sb=sb_end=_dec->state.sbs;
  for(pli=0;pli<3;pli++){
    const oc_fragment_plane *fplane;
    fplane=_dec->state.fplanes+pli;
    sb_end+=fplane->nsbs;
    for(;sb<sb_end;sb++){
      int quadi;
      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
        int bi;
        for(bi=0;bi<4;bi++){
          int fragi;
          fragi=sb->map[quadi][bi];
          if(fragi>=0){
            oc_fragment *frag;
            frag=_dec->state.frags+fragi;
            if(sb->coded_fully)frag->coded=1;
            else if(!sb->coded_partially)frag->coded=0;
            else{
              if(run_count<=0){
                run_count=oc_block_run_unpack(&_dec->opb);
                flag=!flag;
              }
              run_count--;
              frag->coded=flag;
            }
            if(frag->coded)_dec->state.coded_fragis[ncoded_fragis++]=fragi;
            else *(_dec->state.uncoded_fragis-++nuncoded_fragis)=fragi;
          }
        }
      }
    }
    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
    prev_ncoded_fragis=ncoded_fragis;
    _dec->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis;
    prev_nuncoded_fragis=nuncoded_fragis;
  }
  /*TODO: run_count should be 0 here.
    If it's not, we should issue a warning of some kind.*/
}



typedef int (*oc_mode_unpack_func)(oggpack_buffer *_opb);

static int oc_vlc_mode_unpack(oggpack_buffer *_opb){
  long val;
  int  i;
  for(i=0;i<7;i++){
    theora_read1(_opb,&val);
    if(!val)break;
  }
  return i;
}

static int oc_clc_mode_unpack(oggpack_buffer *_opb){
  long val;
  theora_read(_opb,3,&val);
  return (int)val;
}

/*Unpacks the list of macro block modes for INTER frames.*/
void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
  oc_mode_unpack_func  mode_unpack;
  oc_mb               *mb;
  oc_mb               *mb_end;
  const int           *alphabet;
  long                 val;
  int                  scheme0_alphabet[8];
  int                  mode_scheme;
  theora_read(&_dec->opb,3,&val);
  mode_scheme=(int)val;
  if(mode_scheme==0){
    int mi;
    /*Just in case, initialize the modes to something.
      If the bitstream doesn't contain each index exactly once, it's likely
       corrupt and the rest of the packet is garbage anyway, but this way we
       won't crash, and we'll decode SOMETHING.*/
    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
    for(mi=0;mi<OC_NMODES;mi++){
      theora_read(&_dec->opb,3,&val);
      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
    }
    alphabet=scheme0_alphabet;
  }
  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
  if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
  else mode_unpack=oc_vlc_mode_unpack;
  mb=_dec->state.mbs;
  mb_end=mb+_dec->state.nmbs;
  for(;mb<mb_end;mb++)if(mb->mode!=OC_MODE_INVALID){
    int bi;
    for(bi=0;bi<4;bi++){
      int fragi;
      fragi=mb->map[0][bi];
      if(fragi>=0&&_dec->state.frags[fragi].coded)break;
    }
    if(bi<4)mb->mode=alphabet[(*mode_unpack)(&_dec->opb)];
    else mb->mode=OC_MODE_INTER_NOMV;
  }
}



typedef int (*oc_mv_comp_unpack_func)(oggpack_buffer *_opb);

static int oc_vlc_mv_comp_unpack(oggpack_buffer *_opb){
  long bits;
  int  mvsigned[2];
  theora_read(_opb,3,&bits);
  switch(bits){
    case 0:return 0;
    case 1:return 1;
    case 2:return -1;
    case 3:{
      mvsigned[0]=2;
      theora_read1(_opb,&bits);
    }break;
    case 4:{
      mvsigned[0]=3;
      theora_read1(_opb,&bits);
    }break;
    case 5:{
      theora_read(_opb,3,&bits);
      mvsigned[0]=4+(bits>>1);
      bits&=1;
    }break;
    case 6:{
      theora_read(_opb,4,&bits);
      mvsigned[0]=8+(bits>>1);
      bits&=1;
    }break;
    case 7:{
      theora_read(_opb,5,&bits);
      mvsigned[0]=16+(bits>>1);
      bits&=1;
    }break;
  }
  mvsigned[1]=-mvsigned[0];
  return mvsigned[bits];
}

static int oc_clc_mv_comp_unpack(oggpack_buffer *_opb){
  long bits;
  int  mvsigned[2];
  theora_read(_opb,6,&bits);
  mvsigned[0]=bits>>1;
  mvsigned[1]=-mvsigned[0];
  return mvsigned[bits&1];
}

/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
   block modes and motion vectors to the individual fragments.*/
static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
  oc_set_chroma_mvs_func  set_chroma_mvs;
  oc_mv_comp_unpack_func  mv_comp_unpack;
  oc_mb                  *mb;
  oc_mb                  *mb_end;
  const int              *map_idxs;
  long                    val;
  int                     map_nidxs;
  oc_mv                   last_mv[2];
  oc_mv                   cbmvs[4];
  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
  theora_read1(&_dec->opb,&val);
  mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
  memset(last_mv,0,sizeof(last_mv));
  mb=_dec->state.mbs;
  mb_end=mb+_dec->state.nmbs;
  for(;mb<mb_end;mb++)if(mb->mode!=OC_MODE_INVALID){
    oc_fragment *frag;
    oc_mv        mbmv;
    int          coded[13];
    int          codedi;
    int          ncoded;
    int          mapi;
    int          mapii;
    int          fragi;
    int          mb_mode;
    /*Search for at least one coded fragment.*/
    ncoded=mapii=0;
    do{
      mapi=map_idxs[mapii];
      fragi=mb->map[mapi>>2][mapi&3];
      if(fragi>=0&&_dec->state.frags[fragi].coded)coded[ncoded++]=mapi;
    }
    while(++mapii<map_nidxs);
    if(ncoded<=0)continue;
    mb_mode=mb->mode;
    switch(mb_mode){
      case OC_MODE_INTER_MV_FOUR:{
        oc_mv       lbmvs[4];
        int         bi;
        /*Mark the tail of the list, so we don't accidentally go past it.*/
        coded[ncoded]=-1;
        for(bi=codedi=0;bi<4;bi++){
          if(coded[codedi]==bi){
            codedi++;
            frag=_dec->state.frags+mb->map[0][bi];
            frag->mbmode=mb_mode;
            frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
            frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
          }
          else lbmvs[bi][0]=lbmvs[bi][1]=0;
        }
        if(codedi>0){
          last_mv[1][0]=last_mv[0][0];
          last_mv[1][1]=last_mv[0][1];
          last_mv[0][0]=lbmvs[coded[codedi-1]][0];
          last_mv[0][1]=lbmvs[coded[codedi-1]][1];
        }
        if(codedi<ncoded){
          (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
          for(;codedi<ncoded;codedi++){
            mapi=coded[codedi];
            bi=mapi&3;
            frag=_dec->state.frags+mb->map[mapi>>2][bi];
            frag->mbmode=mb_mode;
            frag->mv[0]=cbmvs[bi][0];
            frag->mv[1]=cbmvs[bi][1];
          }
        }
      }break;
      case OC_MODE_INTER_MV:{
        last_mv[1][0]=last_mv[0][0];
        last_mv[1][1]=last_mv[0][1];
        mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
        mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
      }break;
      case OC_MODE_INTER_MV_LAST:{
        mbmv[0]=last_mv[0][0];
        mbmv[1]=last_mv[0][1];
      }break;
      case OC_MODE_INTER_MV_LAST2:{
        mbmv[0]=last_mv[1][0];
        mbmv[1]=last_mv[1][1];
        last_mv[1][0]=last_mv[0][0];
        last_mv[1][1]=last_mv[0][1];
        last_mv[0][0]=mbmv[0];
        last_mv[0][1]=mbmv[1];
      }break;
      case OC_MODE_GOLDEN_MV:{
        mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
        mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
      }break;
      default:mbmv[0]=mbmv[1]=0;break;
    }
    /*4MV mode fills in the fragments itself.
      For all other modes we can use this common code.*/
    if(mb_mode!=OC_MODE_INTER_MV_FOUR){
      for(codedi=0;codedi<ncoded;codedi++){
        mapi=coded[codedi];
        fragi=mb->map[mapi>>2][mapi&3];
        frag=_dec->state.frags+fragi;
        frag->mbmode=mb_mode;
        frag->mv[0]=mbmv[0];
        frag->mv[1]=mbmv[1];
      }
    }
  }
}

static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
  oc_fragment *frag;
  int         *coded_fragi;
  int         *coded_fragi_end;
  int          ncoded_fragis;
  ncoded_fragis=_dec->state.ncoded_fragis[0]+
   _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
  if(ncoded_fragis<=0)return;
  coded_fragi=_dec->state.coded_fragis;
  coded_fragi_end=coded_fragi+ncoded_fragis;
  if(_dec->state.nqis==1){
    /*If this frame has only a single qi value, then just set it in all coded
       fragments.*/
    while(coded_fragi<coded_fragi_end){
      _dec->state.frags[*coded_fragi++].qi=_dec->state.qis[0];
    }
  }
  else{
    long val;
    int  flag;
    int  nqi0;
    int  run_count;
    /*Otherwise, we decode a qi index for each fragment, using two passes of
      the same binary RLE scheme used for super-block coded bits.
     The first pass marks each fragment as having a qii of 0 or greater than
      0, and the second pass (if necessary), distinguishes between a qii of
      1 and 2.
     At first we just store the qii in the fragment.
     After all the qii's are decoded, we make a final pass to replace them
      with the corresponding qi's for this frame.*/
    theora_read1(&_dec->opb,&val);
    flag=(int)val;
    run_count=nqi0=0;
    while(coded_fragi<coded_fragi_end){
      int full_run;
      run_count=oc_sb_run_unpack(&_dec->opb);
      full_run=run_count>=4129;
      do{
        _dec->state.frags[*coded_fragi++].qi=flag;
        nqi0+=!flag;
      }
      while(--run_count>0&&coded_fragi<coded_fragi_end);
      if(full_run&&coded_fragi<coded_fragi_end){
        theora_read1(&_dec->opb,&val);
        flag=(int)val;
      }
      else flag=!flag;
    }
    /*TODO: run_count should be 0 here.
      If it's not, we should issue a warning of some kind.*/
    /*If we have 3 different qi's for this frame, and there was at least one
       fragment with a non-zero qi, make the second pass.*/
    if(_dec->state.nqis==3&&nqi0<ncoded_fragis){
      /*Skip qii==0 fragments.*/
      for(coded_fragi=_dec->state.coded_fragis;
       _dec->state.frags[*coded_fragi].qi==0;coded_fragi++);
      theora_read1(&_dec->opb,&val);
      flag=(int)val;
      while(coded_fragi<coded_fragi_end){
        int full_run;
        run_count=oc_sb_run_unpack(&_dec->opb);
        full_run=run_count>=4129;
        for(;coded_fragi<coded_fragi_end;coded_fragi++){
          oc_fragment *frag;
          frag=_dec->state.frags+*coded_fragi;
          if(frag->qi==0)continue;
          if(run_count--<=0)break;
          frag->qi+=flag;
        }
        if(full_run&&coded_fragi<coded_fragi_end){
          theora_read1(&_dec->opb,&val);
          flag=(int)val;
        }
        else flag=!flag;
      }
      /*TODO: run_count should be 0 here.
        If it's not, we should issue a warning of some kind.*/
    }
    /*Finally, translate qii's to qi's.*/
    for(coded_fragi=_dec->state.coded_fragis;coded_fragi<coded_fragi_end;
     coded_fragi++){
      frag=_dec->state.frags+*coded_fragi;
      frag->qi=_dec->state.qis[frag->qi];
    }
  }
}



/*Returns the decoded value of the given token.
  It CANNOT be called for any of the EOB tokens.
  _token:      The token value to skip.
  _extra_bits: The extra bits attached to this token.
  Return: The decoded coefficient value.*/
typedef int (*oc_token_dec1val_func)(int _token,int _extra_bits);

/*Handles zero run tokens.*/
static int oc_token_dec1val_zrl(void){
  return 0;
}

/*Handles 1, -1, 2 and -2 tokens.*/
static int oc_token_dec1val_const(int _token){
  static const int CONST_VALS[4]={1,-1,2,-2};
  return CONST_VALS[_token-OC_NDCT_ZRL_TOKEN_MAX];
}

/*Handles DCT value tokens category 2.*/
static int oc_token_dec1val_cat2(int _token,int _extra_bits){
  int valsigned[2];
  valsigned[0]=_token-OC_DCT_VAL_CAT2+3;
  valsigned[1]=-valsigned[0];
  return valsigned[_extra_bits];
}

/*Handles DCT value tokens categories 3 through 8.*/
static int oc_token_dec1val_cati(int _token,int _extra_bits){
  static const int VAL_CAT_OFFS[6]={
    OC_NDCT_VAL_CAT2_SIZE+3,
    OC_NDCT_VAL_CAT2_SIZE+5,
    OC_NDCT_VAL_CAT2_SIZE+9,
    OC_NDCT_VAL_CAT2_SIZE+17,
    OC_NDCT_VAL_CAT2_SIZE+33,
    OC_NDCT_VAL_CAT2_SIZE+65
  };
  static const int VAL_CAT_MASKS[6]={
    0x001,0x003,0x007,0x00F,0x01F,0x1FF
  };
  static const int VAL_CAT_SHIFTS[6]={1,2,3,4,5,9};
  int valsigned[2];
  int cati;
  cati=_token-OC_NDCT_VAL_CAT2_MAX;
  valsigned[0]=VAL_CAT_OFFS[cati]+(_extra_bits&VAL_CAT_MASKS[cati]);
  valsigned[1]=-valsigned[0];
  return valsigned[_extra_bits>>VAL_CAT_SHIFTS[cati]&1];
}

/*A jump table for compute the first coefficient value the given token value
   represents.*/
static const oc_token_dec1val_func OC_TOKEN_DEC1VAL_TABLE[TH_NDCT_TOKENS-
 OC_NDCT_EOB_TOKEN_MAX]={
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_const,
  (oc_token_dec1val_func)oc_token_dec1val_const,
  (oc_token_dec1val_func)oc_token_dec1val_const,
  (oc_token_dec1val_func)oc_token_dec1val_const,
  oc_token_dec1val_cat2,
  oc_token_dec1val_cat2,
  oc_token_dec1val_cat2,
  oc_token_dec1val_cat2,
  oc_token_dec1val_cati,
  oc_token_dec1val_cati,
  oc_token_dec1val_cati,
  oc_token_dec1val_cati,
  oc_token_dec1val_cati,
  oc_token_dec1val_cati,
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_zrl,
  (oc_token_dec1val_func)oc_token_dec1val_zrl
};

/*Returns the decoded value of the given token.
  It CANNOT be called for any of the EOB tokens.
  _token:      The token value to skip.
  _extra_bits: The extra bits attached to this token.
  Return: The decoded coefficient value.*/
int oc_dct_token_dec1val(int _token,int _extra_bits){
  return (*OC_TOKEN_DEC1VAL_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
   _extra_bits);
}

/*Unpacks the DC coefficient tokens.
  Unlike when unpacking the AC coefficient tokens, we actually need to decode
   the DC coefficient values now so that we can do DC prediction.
  _huff_idx:   The index of the Huffman table to use for each color plane.
  _ntoks_left: The number of tokens left to be decoded in each color plane for
                each coefficient.
               This is updated as EOB tokens and zero run tokens are decoded.
  Return: The length of any outstanding EOB run.*/
static int oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[3],
 int _ntoks_left[3][64]){
  long  val;
  int  *coded_fragi;
  int  *coded_fragi_end;
  int   run_counts[64];
  int   cfi;
  int   eobi;
  int   eobs;
  int   ti;
  int   ebi;
  int   pli;
  int   rli;
  eobs=0;
  ti=ebi=0;
  coded_fragi_end=coded_fragi=_dec->state.coded_fragis;
  for(pli=0;pli<3;pli++){
    coded_fragi_end+=_dec->state.ncoded_fragis[pli];
    memset(run_counts,0,sizeof(run_counts));
    _dec->eob_runs[pli][0]=eobs;
    /*Continue any previous EOB run, if there was one.*/
    for(eobi=eobs;eobi-->0&&coded_fragi<coded_fragi_end;){
      _dec->state.frags[*coded_fragi++].dc=0;
    }
    cfi=0;
    while(eobs<_ntoks_left[pli][0]-cfi){
      int token;
      int neb;
      int eb;
      int skip;
      cfi+=eobs;
      run_counts[63]+=eobs;
      token=oc_huff_token_decode(&_dec->opb,
       _dec->huff_tables[_huff_idxs[pli]]);
      _dec->dct_tokens[0][ti++]=(unsigned char)token;
      neb=OC_DCT_TOKEN_EXTRA_BITS[token];
      if(neb){
        theora_read(&_dec->opb,neb,&val);
        eb=(int)val;
        _dec->extra_bits[0][ebi++]=(ogg_uint16_t)eb;
      }
      else eb=0;
      skip=oc_dct_token_skip(token,eb);
      if(skip<0){
        eobs=eobi=-skip;
        while(eobi-->0&&coded_fragi<coded_fragi_end){
          _dec->state.frags[*coded_fragi++].dc=0;
        }
      }
      else{
        run_counts[skip-1]++;
        cfi++;
        eobs=0;
        _dec->state.frags[*coded_fragi++].dc=oc_dct_token_dec1val(token,eb);
      }
    }
    _dec->ti0[pli][0]=ti;
    _dec->ebi0[pli][0]=ebi;
    /*Set the EOB count to the portion of the last EOB run which extends past
       this coefficient.*/
    eobs=eobs+cfi-_ntoks_left[pli][0];
    /*Add the portion of the last EOB which was included in this coefficient to
       to the longest run length.*/
    run_counts[63]+=_ntoks_left[pli][0]-cfi;
    /*And convert the run_counts array to a moment table.*/
    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
    /*Finally, subtract off the number of coefficients that have been
       accounted for by runs started in this coefficient.*/
    for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
  }
  return eobs;
}

/*Unpacks the AC coefficient tokens.
  This can completely discard coefficient values while unpacking, and so is
   somewhat simpler than unpacking the DC coefficient tokens.
  _huff_idx:   The index of the Huffman table to use for each color plane.
  _ntoks_left: The number of tokens left to be decoded in each color plane for
                each coefficient.
               This is updated as EOB tokens and zero run tokens are decoded.
  _eobs:       The length of any outstanding EOB run from previous
                coefficients.
  Return: The length of any outstanding EOB run.*/
static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[3],
 int _ntoks_left[3][64],int _eobs){
  long val;
  int  run_counts[64];
  int  cfi;
  int  ti;
  int  ebi;
  int  pli;
  int  rli;
  ti=ebi=0;
  for(pli=0;pli<3;pli++){
    memset(run_counts,0,sizeof(run_counts));
    _dec->eob_runs[pli][_zzi]=_eobs;
    cfi=0;
    while(_eobs<_ntoks_left[pli][_zzi]-cfi){
      int token;
      int neb;
      int eb;
      int skip;
      cfi+=_eobs;
      run_counts[63]+=_eobs;
      token=oc_huff_token_decode(&_dec->opb,
       _dec->huff_tables[_huff_idxs[pli]]);
      _dec->dct_tokens[_zzi][ti++]=(unsigned char)token;
      neb=OC_DCT_TOKEN_EXTRA_BITS[token];
      if(neb){
        theora_read(&_dec->opb,neb,&val);
        eb=(int)val;
        _dec->extra_bits[_zzi][ebi++]=(ogg_uint16_t)eb;
      }
      else eb=0;
      skip=oc_dct_token_skip(token,eb);
      if(skip<0)_eobs=-skip;
      else{
        run_counts[skip-1]++;
        cfi++;
        _eobs=0;
      }
    }
    _dec->ti0[pli][_zzi]=ti;
    _dec->ebi0[pli][_zzi]=ebi;
    /*Set the EOB count to the portion of the last EOB run which extends past
       this coefficient.*/
    _eobs=_eobs+cfi-_ntoks_left[pli][_zzi];
    /*Add the portion of the last EOB which was included in this coefficient to
       to the longest run length.*/
    run_counts[63]+=_ntoks_left[pli][_zzi]-cfi;
    /*And convert the run_counts array to a moment table.*/
    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
    /*Finally, subtract off the number of coefficients that have been
       accounted for by runs started in this coefficient.*/
    for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
  }
  return _eobs;
}

/*Tokens describing the DCT coefficients that belong to each fragment are
   stored in the bitstream grouped by coefficient, not by fragment.
  This means that we either decode all the tokens in order, building up a
   separate coefficient list for each fragment as we go, and then go back and
   do the iDCT on each fragment, or we have to create separate lists of tokens
   for each coefficient, so that we can pull the next token required off the
   head of the appropriate list when decoding a specific fragment.
  The former was VP3's choice, and it meant 2*w*h extra storage for all the
   decoded coefficient values.
  We take the second option, which lets us store just one or three bytes per
   token (generally far fewer than the number of coefficients, due to EOB
   tokens and zero runs), and which requires us to only maintain a counter for
   each of the 64 coefficients, instead of a counter for every fragment to
   determine where the next token goes.
  Actually, we use 3 counters per coefficient, one for each color plane, so we
   can decode all color planes simultaneously.
  This lets us color conversion, etc., be done as soon as a full MCU (one or
   two super block rows) is decoded, while the image data is still in cache.*/
static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
  static const int OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
  long val;
  int  ntoks_left[3][64];
  int  huff_idxs[3];
  int  pli;
  int  zzi;
  int  hgi;
  int  huffi_y;
  int  huffi_c;
  int  eobs;
  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
    ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
  }
  theora_read(&_dec->opb,4,&val);
  huffi_y=(int)val;
  theora_read(&_dec->opb,4,&val);
  huffi_c=(int)val;
  huff_idxs[0]=huffi_y;
  huff_idxs[1]=huff_idxs[2]=huffi_c;
  _dec->eob_runs[0][0]=0;
  eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
  theora_read(&_dec->opb,4,&val);
  huffi_y=(int)val;
  theora_read(&_dec->opb,4,&val);
  huffi_c=(int)val;
  zzi=1;
  for(hgi=1;hgi<5;hgi++){
    huff_idxs[0]=huffi_y+(hgi<<4);
    huff_idxs[1]=huff_idxs[2]=huffi_c+(hgi<<4);
    for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
      eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
    }
  }
  /*TODO: eobs should be exactly zero, or 4096 or greater.
    The second case occurs when an EOB run of size zero is encountered, which
     gets treated as an infinite EOB run (where infinity is INT_MAX).
    If neither of these conditions holds, then a warning should be issued.*/
}



/*Expands a single token into the given coefficient list.
  This fills in the zeros for zero runs as well as coefficient values, and
   updates the index of the current coefficient.
  It CANNOT be called for any of the EOB tokens.
  _token:      The token value to expand.
  _extra_bits: The extra bits associated with the token.
  _dct_coeffs: The current list of coefficients, in zig-zag order.
  _zzi:        A pointer to the zig-zag index of the next coefficient to write
                to.
               This is updated before the function returns.*/
typedef void (*oc_token_expand_func)(int _token,int _extra_bits,
 ogg_int16_t _dct_coeffs[128],int *_zzi);

/*Expands a zero run token.*/
void oc_token_expand_zrl(int _token,int _extra_bits,
 ogg_int16_t _dct_coeffs[128],int *_zzi){
  int zzi;
  zzi=*_zzi;
  do _dct_coeffs[zzi++]=0;
  while(_extra_bits-->0);
  *_zzi=zzi;
}

/*Expands a constant, single-value token.*/
void oc_token_expand_const(int _token,int _extra_bits,
 ogg_int16_t _dct_coeffs[128],int *_zzi){
  _dct_coeffs[(*_zzi)++]=(ogg_int16_t)oc_token_dec1val_const(_token);
}

/*Expands category 2 single-valued tokens.*/
void oc_token_expand_cat2(int _token,int _extra_bits,
 ogg_int16_t _dct_coeffs[128],int *_zzi){
  _dct_coeffs[(*_zzi)++]=
   (ogg_int16_t)oc_token_dec1val_cat2(_token,_extra_bits);
}

/*Expands category 3 through 8 single-valued tokens.*/
void oc_token_expand_cati(int _token,int _extra_bits,
 ogg_int16_t _dct_coeffs[128],int *_zzi){
  _dct_coeffs[(*_zzi)++]=
   (ogg_int16_t)oc_token_dec1val_cati(_token,_extra_bits);
}

/*Expands a category 1a zero run/value combo token.*/
void oc_token_expand_run_cat1a(int _token,int _extra_bits,
 ogg_int16_t _dct_coeffs[128],int *_zzi){
  int zzi;
  int rl;
  zzi=*_zzi;
  for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[zzi++]=0;
  _dct_coeffs[zzi++]=(ogg_int16_t)(1-(_extra_bits<<1));
  *_zzi=zzi;
}

/*Expands all other zero run/value combo tokens.*/
void oc_token_expand_run(int _token,int _extra_bits,
 ogg_int16_t _dct_coeffs[128],int *_zzi){
  static const int NZEROS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
    6,10,1,2
  };
  static const int NZEROS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
    3,7,0,1
  };
  static const int VALUE_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
    0,0,0,1
  };
  static const int VALUE_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
    0,0,1,1
  };
  static const int VALUE_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
    1,1,2,2
  };
  static const int SIGN_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
    2,3,1,2
  };
  int valsigned[2];
  int zzi;
  int rl;
  _token-=OC_DCT_RUN_CAT1B;
  rl=(_extra_bits&NZEROS_MASK[_token])+NZEROS_ADJUST[_token];
  zzi=*_zzi;
  while(rl-->0)_dct_coeffs[zzi++]=0;
  valsigned[0]=VALUE_ADJUST[_token]+
   (_extra_bits>>VALUE_SHIFT[_token]&VALUE_MASK[_token]);
  valsigned[1]=-valsigned[0];
  _dct_coeffs[zzi++]=(ogg_int16_t)valsigned[
   _extra_bits>>SIGN_SHIFT[_token]];
  *_zzi=zzi;
}

/*A jump table for expanding token values into coefficient values.
  This reduces all the conditional branches, etc., needed to parse these token
   values down to one indirect jump.*/
static const oc_token_expand_func OC_TOKEN_EXPAND_TABLE[TH_NDCT_TOKENS-
 OC_NDCT_EOB_TOKEN_MAX]={
  oc_token_expand_zrl,
  oc_token_expand_zrl,
  oc_token_expand_const,
  oc_token_expand_const,
  oc_token_expand_const,
  oc_token_expand_const,
  oc_token_expand_cat2,
  oc_token_expand_cat2,
  oc_token_expand_cat2,
  oc_token_expand_cat2,
  oc_token_expand_cati,
  oc_token_expand_cati,
  oc_token_expand_cati,
  oc_token_expand_cati,
  oc_token_expand_cati,
  oc_token_expand_cati,
  oc_token_expand_run_cat1a,
  oc_token_expand_run_cat1a,
  oc_token_expand_run_cat1a,
  oc_token_expand_run_cat1a,
  oc_token_expand_run_cat1a,
  oc_token_expand_run,
  oc_token_expand_run,
  oc_token_expand_run,
  oc_token_expand_run
};

/*Expands a single token into the given coefficient list.
  This fills in the zeros for zero runs as well as coefficient values, and
   updates the index of the current coefficient.
  It CANNOT be called for any of the EOB tokens.
  _token:      The token value to expand.
  _extra_bits: The extra bits associated with the token.
  _dct_coeffs: The current list of coefficients, in zig-zag order.
  _zzi:        A pointer to the zig-zag index of the next coefficient to write
                to.
               This is updated before the function returns.*/
static void oc_dct_token_expand(int _token,int _extra_bits,
 ogg_int16_t *_dct_coeffs,int *_zzi){
  (*OC_TOKEN_EXPAND_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
   _extra_bits,_dct_coeffs,_zzi);
}



static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
  /*pp_level 0: disabled; free any memory used and return*/
  if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
    if(_dec->dc_qis!=NULL){
      _ogg_free(_dec->dc_qis);
      _dec->dc_qis=NULL;
      _ogg_free(_dec->variances);
      _dec->variances=NULL;
      _ogg_free(_dec->pp_frame_data);
      _dec->pp_frame_data=NULL;
    }
    return 1;
  }
  if(_dec->dc_qis==NULL){
    /*If we haven't been tracking DC quantization indices, there's no point in
       starting now.*/
    if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
    _dec->dc_qis=(unsigned char *)_ogg_malloc(
     _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
    memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
  }
  else{
    int           *coded_fragi;
    int           *coded_fragi_end;
    unsigned char  qi0;
    /*Update the DC quantization index of each coded block.*/
    qi0=(unsigned char)_dec->state.qis[0];
    coded_fragi_end=_dec->state.coded_fragis+_dec->state.ncoded_fragis[0]+
     _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
    for(coded_fragi=_dec->state.coded_fragis;coded_fragi<coded_fragi_end;
     coded_fragi++){
      _dec->dc_qis[*coded_fragi]=qi0;
    }
  }
  /*pp_level 1: Stop after updating DC quantization indices.*/
  if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
    if(_dec->variances!=NULL){
      _ogg_free(_dec->variances);
      _dec->variances=NULL;
      _ogg_free(_dec->pp_frame_data);
      _dec->pp_frame_data=NULL;
    }
    return 1;
  }
  if(_dec->variances==NULL||
   _dec->pp_frame_has_chroma!=(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
    size_t frame_sz;
    frame_sz=_dec->state.info.frame_width*_dec->state.info.frame_height;
    if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
      _dec->variances=(int *)_ogg_realloc(_dec->variances,
       _dec->state.fplanes[0].nfrags*sizeof(_dec->variances[0]));
      _dec->pp_frame_data=(unsigned char *)_ogg_realloc( 
       _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
      _dec->pp_frame_buf[0].ystride=-_dec->pp_frame_buf[0].width;
      _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
       (1-_dec->pp_frame_buf[0].height)*_dec->pp_frame_buf[0].ystride;
    }
    else{
      size_t y_sz;
      size_t c_sz;
      int    c_w;
      int    c_h;
      _dec->variances=(int *)_ogg_realloc(_dec->variances,
       _dec->state.nfrags*sizeof(_dec->variances[0]));
      y_sz=frame_sz;
      c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
      c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
      c_sz=c_w*c_h;
      frame_sz+=c_sz<<1;
      _dec->pp_frame_data=(unsigned char *)_ogg_realloc( 
       _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
      _dec->pp_frame_buf[0].ystride=_dec->pp_frame_buf[0].width;
      _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
      _dec->pp_frame_buf[1].width=c_w;
      _dec->pp_frame_buf[1].height=c_h;
      _dec->pp_frame_buf[1].ystride=_dec->pp_frame_buf[1].width;
      _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
      _dec->pp_frame_buf[2].width=c_w;
      _dec->pp_frame_buf[2].height=c_h;
      _dec->pp_frame_buf[2].ystride=_dec->pp_frame_buf[2].width;
      _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
    }
    _dec->pp_frame_has_chroma=(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
  }
  /*If we're not processing chroma, copy the reference frame's chroma planes.*/
  if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
    memcpy(_dec->pp_frame_buf+1,
     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
     sizeof(_dec->pp_frame_buf[1])*2);
  }
  return 0;
}



typedef struct{
  int  ti[3][64];
  int  ebi[3][64];
  int  eob_runs[3][64];
  int  bounding_values[512];
  int *coded_fragis[3];
  int *uncoded_fragis[3];
  int  fragy0[3];
  int  fragy_end[3];
  int  ncoded_fragis[3];
  int  nuncoded_fragis[3];
  int  pred_last[3][3];
  int  mcu_nvfrags;
  int  loop_filter;
  int  pp_level;
}oc_dec_pipeline_state;



/*Initialize the main decoding pipeline.*/
static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
 oc_dec_pipeline_state *_pipe){
  int *coded_fragi_end;
  int *uncoded_fragi_end;
  int  pli;
  /*If chroma is sub-sampled in the vertical direction, we have to decode two
     super block rows of Y' for each super block row of Cb and Cr.*/
  _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
  /*Initialize the token and extra bits indices for each plane and
     coefficient.*/
  memset(_pipe->ti[0],0,sizeof(_pipe->ti[0]));
  memset(_pipe->ebi[0],0,sizeof(_pipe->ebi[0]));
  for(pli=1;pli<3;pli++){
    memcpy(_pipe->ti[pli],_dec->ti0[pli-1],sizeof(_pipe->ti[0]));
    memcpy(_pipe->ebi[pli],_dec->ebi0[pli-1],sizeof(_pipe->ebi[0]));
  }
  /*Also copy over the initial the EOB run counts.*/
  memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
  /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
  coded_fragi_end=_dec->state.coded_fragis;
  uncoded_fragi_end=_dec->state.uncoded_fragis;
  for(pli=0;pli<3;pli++){
    _pipe->coded_fragis[pli]=coded_fragi_end;
    _pipe->uncoded_fragis[pli]=uncoded_fragi_end;
    coded_fragi_end+=_dec->state.ncoded_fragis[pli];
    uncoded_fragi_end-=_dec->state.nuncoded_fragis[pli];
  }
  /*Set the previous DC predictor to 0 for all color planes and frame types.*/
  memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
  /*Initialize the bounding value array for the loop filter.*/
  _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state,
   _pipe->bounding_values);
  /*Initialize any buffers needed for post-processing.
    We also save the current post-processing level, to guard against the user
     changing it from a callback.*/
  if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
  /*If we don't have enough information to post-process, disable it, regardless
     of the user-requested level.*/
  else{
    _pipe->pp_level=OC_PP_LEVEL_DISABLED;
    memcpy(_dec->pp_frame_buf,
     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
     sizeof(_dec->pp_frame_buf[0])*3);
  }
}

/*Undo the DC prediction in a single plane of an MCU (one or two super block
   rows).
  As a side effect, the number of coded and uncoded fragments in this plane of
   the MCU is also computed.*/
static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
 oc_dec_pipeline_state *_pipe,int _pli){
  /*Undo the DC prediction.*/
  oc_fragment_plane *fplane;
  oc_fragment       *frag;
  int               *pred_last;
  int                ncoded_fragis;
  int                fragx;
  int                fragy;
  int                fragy0;
  int                fragy_end;
  /*Compute the first and last fragment row of the current MCU for this
     plane.*/
  fplane=_dec->state.fplanes+_pli;
  fragy0=_pipe->fragy0[_pli];
  fragy_end=_pipe->fragy_end[_pli];
  frag=_dec->state.frags+fplane->froffset+(fragy0*fplane->nhfrags);
  ncoded_fragis=0;
  pred_last=_pipe->pred_last[_pli];
  for(fragy=fragy0;fragy<fragy_end;fragy++){
    for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++){
      if(!frag->coded)continue;
      pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+=
       oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
      ncoded_fragis++;
    }
  }
  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
  /*Also save the number of uncoded fragments so we know how many to copy.*/
  _pipe->nuncoded_fragis[_pli]=
   (fragy_end-fragy0)*fplane->nhfrags-ncoded_fragis;
}

/*Reconstructs all coded fragments in a single MCU (one or two super block
   rows).
  This requires that each coded fragment have a proper macro block mode and
   motion vector (if not in INTRA mode), and have it's DC value decoded, with
   the DC prediction process reversed, and the number of coded and uncoded
   fragments in this plane of the MCU be counted.
  The token lists for each color plane and coefficient should also be filled
   in, along with initial token offsets, extra bits offsets, and EOB run
   counts.*/
static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
 oc_dec_pipeline_state *_pipe,int _pli){
  /*Decode the AC coefficients.*/
  int *ti;
  int *ebi;
  int *eob_runs;
  int *coded_fragi;
  int *coded_fragi_end;
  ti=_pipe->ti[_pli];
  ebi=_pipe->ebi[_pli];
  eob_runs=_pipe->eob_runs[_pli];
  coded_fragi_end=coded_fragi=_pipe->coded_fragis[_pli];
  coded_fragi_end+=_pipe->ncoded_fragis[_pli];
  for(;coded_fragi<coded_fragi_end;coded_fragi++){
    oc_fragment    *frag;
    oc_quant_table *iquants;
    /*This array is made one bigger than necessary so that an invalid zero
       run cannot cause a buffer overflow.
      The inverse zig-zag mapping sends all out of range indices to the last
       entry of this array, where they are ignored.*/
    ogg_int16_t    dct_coeffs[128];
    int            fragi;
    int            zzi;
    int            last_zzi;
    fragi=*coded_fragi;
    frag=_dec->state.frags+fragi;
    for(zzi=0;zzi<64;){
      int token;
      int eb;
      last_zzi=zzi;
      if(eob_runs[zzi]){
        eob_runs[zzi]--;
        break;
      }
      else{
        int ebflag;
        token=_dec->dct_tokens[zzi][ti[zzi]++];
        ebflag=OC_DCT_TOKEN_EXTRA_BITS[token]!=0;
        eb=_dec->extra_bits[zzi][ebi[zzi]]&-ebflag;
        ebi[zzi]+=ebflag;
        if(token<OC_NDCT_EOB_TOKEN_MAX){
          eob_runs[zzi]=-oc_dct_token_skip(token,eb);
        }
        else oc_dct_token_expand(token,eb,dct_coeffs,&zzi);
      }
    }
    /*TODO: zzi should be exactly 64 here.
      If it's not, we should report some kind of warning.*/
    zzi=OC_MINI(zzi,64);
    dct_coeffs[0]=(ogg_int16_t)frag->dc;
    iquants=_dec->state.dequant_tables[frag->mbmode!=OC_MODE_INTRA][_pli];
    /*last_zzi is always initialized.
      If your compiler thinks otherwise, it is dumb.*/
    oc_state_frag_recon(&_dec->state,frag,_pli,dct_coeffs,last_zzi,zzi,
     iquants[_dec->state.qis[0]][0],iquants[frag->qi]);
  }
  _pipe->coded_fragis[_pli]=coded_fragi;
  /*Right now the reconstructed MCU has only the coded blocks in it.*/
  /*TODO: We make the decision here to always copy the uncoded blocks into it
     from the reference frame.
    We could also copy the coded blocks back over the reference frame, if we
     wait for an additional MCU to be decoded, which might be faster if only a
     small number of blocks are coded.
    However, this introduces more latency, creating a larger cache footprint.
    It's unknown which decision is better, but this one results in simpler
     code, and the hard case (high bitrate, high resolution) is handled
     correctly.*/
  /*Copy the uncoded blocks from the previous reference frame.*/
  _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
  oc_state_frag_copy(&_dec->state,_pipe->uncoded_fragis[_pli],
   _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
}

/*Filter a horizontal block edge.*/
static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
 const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
 int *_variance0,int *_variance1){
  unsigned char       *rdst;
  const unsigned char *rsrc;
  unsigned char       *cdst;
  const unsigned char *csrc;
  int                  r[10];
  int                  sum0;
  int                  sum1;
  int                  bx;
  int                  by;
  rdst=_dst;
  rsrc=_src;
  for(bx=0;bx<8;bx++){
    cdst=rdst;
    csrc=rsrc;
    for(by=0;by<10;by++){
      r[by]=*csrc;
      csrc+=_src_ystride;
    }
    sum0=sum1=0;
    for(by=0;by<4;by++){
      sum0+=abs(r[by+1]-r[by]);
      sum1+=abs(r[by+5]-r[by+6]);
    }
    *_variance0+=OC_MINI(255,sum0);
    *_variance1+=OC_MINI(255,sum1);
    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
      *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
      cdst+=_dst_ystride;
      *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
      cdst+=_dst_ystride;
      for(by=0;by<4;by++){
        *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
         r[by+4]+r[by+5]+r[by+6]+4>>3);
        cdst+=_dst_ystride;
      }
      *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
      cdst+=_dst_ystride;
      *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
    }
    else{
      for(by=1;by<=8;by++){
        *cdst=(unsigned char)r[by];
        cdst+=_dst_ystride;
      }
    }
    rdst++;
    rsrc++;
  }
}

/*Filter a vertical block edge.*/
static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
 int _qstep,int _flimit,int *_variances){
  unsigned char       *rdst;
  const unsigned char *rsrc;
  unsigned char       *cdst;
  int                  r[10];
  int                  sum0;
  int                  sum1;
  int                  bx;
  int                  by;
  cdst=_dst;
  for(by=0;by<8;by++){
    rsrc=cdst-1;
    rdst=cdst;
    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
    sum0=sum1=0;
    for(bx=0;bx<4;bx++){
      sum0+=abs(r[bx+1]-r[bx]);
      sum1+=abs(r[bx+5]-r[bx+6]);
    }
    _variances[0]+=OC_MINI(255,sum0);
    _variances[1]+=OC_MINI(255,sum1);
    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
      *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
      *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
      for(bx=0;bx<4;bx++){
        *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
         r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
      }
      *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
      *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
    }
    else for(bx=1;bx<=8;bx++)*rdst++=(unsigned char)r[bx];
    cdst+=_dst_ystride;
  }
}

static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
 th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
 int _fragy_end){
  oc_fragment_plane   *fplane;
  int                 *variance;
  unsigned char       *dc_qi;
  unsigned char       *dst;
  const unsigned char *src;
  int                  notstart;
  int                  notdone;
  int                  froffset;
  int                  flimit;
  int                  qstep;
  int                  y_end;
  int                  y;
  int                  x;
  _dst+=_pli;
  _src+=_pli;
  fplane=_dec->state.fplanes+_pli;
  froffset=fplane->froffset+_fragy0*fplane->nhfrags;
  variance=_dec->variances+froffset;
  dc_qi=_dec->dc_qis+froffset;
  notstart=_fragy0>0;
  notdone=_fragy_end<fplane->nvfrags;
  /*We want to clear an extra row of variances, except at the end.*/
  memset(variance+(fplane->nhfrags&-notstart),0,
   (_fragy_end+notdone-_fragy0-notstart)*fplane->nhfrags*sizeof(variance[0]));
  /*Except for the first time, we want to point to the middle of the row.*/
  y=(_fragy0<<3)+(notstart<<2);
  dst=_dst->data+y*_dst->ystride;
  src=_src->data+y*_src->ystride;
  for(;y<4;y++){
    memcpy(dst,src,_dst->width*sizeof(dst[0]));
    dst+=_dst->ystride;
    src+=_src->ystride;
  }
  /*We also want to skip the last row in the frame for this loop.*/
  y_end=_fragy_end-!notdone<<3;
  for(;y<y_end;y+=8){
    qstep=_dec->pp_dc_scale[*dc_qi];
    flimit=(qstep*3)>>2;
    oc_filter_hedge(dst,_dst->ystride,src-_src->ystride,_src->ystride,
     qstep,flimit,variance,variance+fplane->nhfrags);
    variance++;
    dc_qi++;
    for(x=8;x<_dst->width;x+=8){
      qstep=_dec->pp_dc_scale[*dc_qi];
      flimit=(qstep*3)>>2;
      oc_filter_hedge(dst+x,_dst->ystride,src+x-_src->ystride,_src->ystride,
       qstep,flimit,variance,variance+fplane->nhfrags);
      oc_filter_vedge(dst+x-(_dst->ystride<<2)-4,_dst->ystride,
       qstep,flimit,variance-1);
      variance++;
      dc_qi++;
    }
    dst+=_dst->ystride<<3;
    src+=_src->ystride<<3;
  }
  /*And finally, handle the last row in the frame, if it's in the range.*/
  if(!notdone){
    for(;y<_dst->height;y++){
      memcpy(dst,src,_dst->width*sizeof(dst[0]));
      dst+=_dst->ystride;
      src+=_src->ystride;
    }
    /*Filter the last row of vertical block edges.*/
    dc_qi++;
    for(x=8;x<_dst->width;x+=8){
      qstep=_dec->pp_dc_scale[*dc_qi++];
      flimit=(qstep*3)>>2;
      oc_filter_vedge(dst+x-(_dst->ystride<<3)-4,_dst->ystride,
       qstep,flimit,variance++);
    }
  }
}

static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
 int _dc_scale,int _sharp_mod,int _strong){
  static const int     MOD_MAX[2]={24,32};
  static const int     MOD_SHIFT[2]={1,0};
  const unsigned char *psrc;
  const unsigned char *src;
  const unsigned char *nsrc;
  unsigned char       *dst;
  int                  vmod[72];
  int                  hmod[72];
  int                  mod_hi;
  int                  by;
  int                  bx;
  mod_hi=OC_MINI(3*_dc_scale,MOD_MAX[_strong]);
  dst=_idata;
  src=dst;
  psrc=src-(_ystride&-!(_b&4));
  for(by=0;by<9;by++){
    for(bx=0;bx<8;bx++){
      int mod;
      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<MOD_SHIFT[_strong]);
      vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
    }
    psrc=src;
    src+=_ystride&-(!(_b&8)|by<7);
  }
  nsrc=dst;
  psrc=dst-!(_b&1);
  for(bx=0;bx<9;bx++){
    src=nsrc;
    for(by=0;by<8;by++){
      int mod;
      mod=32+_dc_scale-(abs(*src-*psrc)<<MOD_SHIFT[_strong]);
      hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
      psrc+=_ystride;
      src+=_ystride;
    }
    psrc=nsrc;
    nsrc+=!(_b&2)|bx<7;
  }
  src=dst;
  psrc=src-(_ystride&-!(_b&4));
  nsrc=src+_ystride;
  for(by=0;by<8;by++){
    int a;
    int b;
    int p;
    int p1;
    int a1;
    a=128;
    b=64;
    p=src[0];
    p1=*(src-!(_b&1));
    a1=hmod[(bx<<3)+by];
    a-=a1;
    b+=a1*p1;
    p1=psrc[0];
    a1=vmod[(by<<3)+bx];
    a-=a1;
    b+=a1*p1;
    p1=nsrc[0];
    a1=vmod[(by+1<<3)+bx];
    a-=a1;
    b+=a1*p1;
    p1=src[1];
    a1=hmod[(bx+1<<3)+by];
    a-=a1;
    b+=a1*p1;
    dst[0]=OC_CLAMP255(a*p+b>>7);
    for(bx=1;bx<7;bx++){
      a=128;
      b=64;
      p=src[bx];
      p1=src[bx-1];
      a1=hmod[(bx<<3)+by];
      a-=a1;
      b+=a1*p1;
      p1=psrc[bx];
      a1=vmod[(by<<3)+bx];
      a-=a1;
      b+=a1*p1;
      p1=nsrc[bx];
      a1=vmod[(by+1<<3)+bx];
      a-=a1;
      b+=a1*p1;
      p1=src[bx+1];
      a1=hmod[(bx+1<<3)+by];
      a-=a1;
      b+=a1*p1;
      dst[bx]=OC_CLAMP255(a*p+b>>7);
    }
    a=128;
    b=64;
    p=src[7];
    p1=src[6];
    a1=hmod[(bx<<3)+by];
    a-=a1;
    b+=a1*p1;
    p1=psrc[7];
    a1=vmod[(by<<3)+bx];
    a-=a1;
    b+=a1*p1;
    p1=nsrc[7];
    a1=vmod[(by+1<<3)+bx];
    a-=a1;
    b+=a1*p1;
    p1=src[7+!(_b&2)];
    a1=hmod[(bx+1<<3)+by];
    a-=a1;
    b+=a1*p1;
    dst[7]=OC_CLAMP255(a*p+b>>7);
    dst+=_ystride;
    psrc=src;
    src=nsrc;
    nsrc+=_ystride&-(!(_b&8)|by<6);
  }
}

#define OC_DERING_THRESH1 (384)
#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)

static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
 int _pli,int _fragy0,int _fragy_end){
  th_img_plane  *iplane;
  oc_fragment_plane *fplane;
  oc_fragment       *frag;
  int               *variance;
  unsigned char     *idata;
  int                sthresh;
  int                strong;
  int                froffset;
  int                y_end;
  int                y;
  int                x;
  iplane=_img+_pli;
  fplane=_dec->state.fplanes+_pli;
  froffset=fplane->froffset+_fragy0*fplane->nhfrags;
  variance=_dec->variances+froffset;
  frag=_dec->state.frags+froffset;
  strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
  sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
  y=_fragy0<<3;
  idata=iplane->data+y*iplane->ystride;
  y_end=_fragy_end<<3;
  for(;y<y_end;y+=8){
    for(x=0;x<iplane->width;x+=8){
      int b;
      int qi;
      int var;
      qi=frag->qi;
      var=*variance;
      b=(x<=0)|(x+8>=iplane->width)<<1|(y<=0)<<2|(y+8>=iplane->height)<<3;
      if(strong&&var>sthresh){
        oc_dering_block(idata+x,iplane->ystride,b,
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
        if(_pli||(b&1)&&*(variance-1)>OC_DERING_THRESH4||
         (b&2)&&variance[1]>OC_DERING_THRESH4||
         (b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4||
         (b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){
          oc_dering_block(idata+x,iplane->ystride,b,
           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
          oc_dering_block(idata+x,iplane->ystride,b,
           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
        }
      }
      else if(var>OC_DERING_THRESH2){
        oc_dering_block(idata+x,iplane->ystride,b,
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
      }
      else if(var>OC_DERING_THRESH1){
        oc_dering_block(idata+x,iplane->ystride,b,
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
      }
      frag++;
      variance++;
    }
    idata+=iplane->ystride<<3;
  }
}



01870 th_dec_ctx *th_decode_alloc(const th_info *_info,
 const th_setup_info *_setup){
  oc_dec_ctx *dec;
  if(_info==NULL||_setup==NULL)return NULL;
  dec=_ogg_malloc(sizeof(*dec));
  if(oc_dec_init(dec,_info,_setup)<0){
    _ogg_free(dec);
    return NULL;
  }
  dec->state.curframe_num=0;
  return dec;
}

01883 void th_decode_free(th_dec_ctx *_dec){
  if(_dec!=NULL){
    oc_dec_clear(_dec);
    _ogg_free(_dec);
  }
}

01890 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
 size_t _buf_sz){
  switch(_req){
    case TH_DECCTL_GET_PPLEVEL_MAX:{
      if(_dec==NULL||_buf==NULL)return TH_EFAULT;
      if(_buf_sz!=sizeof(int))return TH_EINVAL;
      (*(int *)_buf)=OC_PP_LEVEL_MAX;
      return 0;
    }break;
    case TH_DECCTL_SET_PPLEVEL:{
      int pp_level;
      if(_dec==NULL||_buf==NULL)return TH_EFAULT;
      if(_buf_sz!=sizeof(int))return TH_EINVAL;
      pp_level=*(int *)_buf;
      if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
      _dec->pp_level=pp_level;
      return 0;
    }break;
    case TH_DECCTL_SET_GRANPOS:{
      ogg_int64_t granpos;
      if(_dec==NULL||_buf==NULL)return TH_EFAULT;
      if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
      granpos=*(ogg_int64_t *)_buf;
      if(granpos<0)return TH_EINVAL;
      _dec->state.granpos=granpos;
      _dec->state.keyframe_num=
       granpos>>_dec->state.info.keyframe_granule_shift;
      _dec->state.curframe_num=_dec->state.keyframe_num+
       (granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
      return 0;
    }break;
    case TH_DECCTL_SET_STRIPE_CB:{
      th_stripe_callback *cb;
      if(_dec==NULL||_buf==NULL)return TH_EFAULT;
      if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
      cb=(th_stripe_callback *)_buf;
      _dec->stripe_cb.ctx=cb->ctx;
      _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
      return 0;
    }break;
    default:return TH_EIMPL;
  }
}

01934 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
 ogg_int64_t *_granpos){
  int ret;
  if(_dec==NULL||_op==NULL)return TH_EFAULT;
  /*A completely empty packet indicates a dropped frame and is treated exactly
     like an inter frame with no coded blocks.
    Only proceed if we have a non-empty packet.*/
  if(_op->bytes!=0){
    oc_dec_pipeline_state pipe;
    th_ycbcr_buffer       stripe_buf;
    int                   stripe_fragy;
    int                   refi;
    int                   pli;
    int                   notstart;
    int                   notdone;
    oggpackB_readinit(&_dec->opb,_op->packet,_op->bytes);
    ret=oc_dec_frame_header_unpack(_dec);
    if(ret<0)return ret;
    /*Select a free buffer to use for the reconstructed version of this
       frame.*/
    if(_dec->state.frame_type!=OC_INTRA_FRAME&&
     (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
     _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
      th_info *info;
      size_t       yplane_sz;
      size_t       cplane_sz;
      int          yhstride;
      int          yvstride;
      int          chstride;
      int          cvstride;
      /*We're decoding an INTER frame, but have no initialized reference
         buffers (i.e., decoding did not start on a key frame).
        We initialize them to a solid gray here.*/
      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
      _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi=1;
      info=&_dec->state.info;
      yhstride=info->frame_width+2*OC_UMV_PADDING;
      yvstride=info->frame_height+2*OC_UMV_PADDING;
      chstride=yhstride>>!(info->pixel_fmt&1);
      cvstride=yvstride>>!(info->pixel_fmt&2);
      yplane_sz=(size_t)yhstride*yvstride;
      cplane_sz=(size_t)chstride*cvstride;
      memset(_dec->state.ref_frame_data,0x80,yplane_sz+2*cplane_sz);
    }
    else{
      for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
       refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
    }
    if(_dec->state.frame_type==OC_INTRA_FRAME){
      oc_dec_mark_all_intra(_dec);
      _dec->state.keyframe_num=_dec->state.curframe_num;
    }
    else{
      oc_dec_coded_flags_unpack(_dec);
      oc_dec_mb_modes_unpack(_dec);
      oc_dec_mv_unpack_and_frag_modes_fill(_dec);
    }
    oc_dec_block_qis_unpack(_dec);
    oc_dec_residual_tokens_unpack(_dec);
    /*Update granule position.
      This must be done before the striped decode callbacks so that the
       application knows what to do with the frame data.*/
    _dec->state.granpos=
     (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+
     (_dec->state.curframe_num-_dec->state.keyframe_num);
    _dec->state.curframe_num++;
    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
    /*All of the rest of the operations -- DC prediction reversal,
       reconstructing coded fragments, copying uncoded fragments, loop
       filtering, extending borders, and out-of-loop post-processing -- should
       be pipelined.
      I.e., DC prediction reversal, reconstruction, and uncoded fragment
       copying are done for one or two super block rows, then loop filtering is
       run as far as it can, then bordering copying, then post-processing.
      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
       block rows, and one chroma.
      Otherwise, an MCU consists of one super block row from each plane.
      Inside each MCU, we perform all of the steps on one color plane before
       moving on to the next.
      After reconstruction, the additional filtering stages introduce a delay
       since they need some pixels from the next fragment row.
      Thus the actual number of decoded rows available is slightly smaller for
       the first MCU, and slightly larger for the last.

      This entire process allows us to operate on the data while it is still in
       cache, resulting in big performance improvements.
      An application callback allows further application processing (blitting
       to video memory, color conversion, etc.) to also use the data while it's
       in cache.*/
    oc_dec_pipeline_init(_dec,&pipe);
    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
    notstart=0;
    notdone=1;
    for(stripe_fragy=notstart=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
      int avail_fragy0;
      int avail_fragy_end;
      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
      notdone=stripe_fragy+pipe.mcu_nvfrags<avail_fragy_end;
      for(pli=0;pli<3;pli++){
        oc_fragment_plane *fplane;
        int                frag_shift;
        int                pp_offset;
        int                sdelay;
        int                edelay;
        fplane=_dec->state.fplanes+pli;
        /*Compute the first and last fragment row of the current MCU for this
           plane.*/
        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
        pipe.fragy0[pli]=stripe_fragy>>frag_shift;
        pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
         pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
        oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
        oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
        sdelay=edelay=0;
        if(pipe.loop_filter){
          sdelay+=notstart;
          edelay+=notdone;
          oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
           refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
        }
        /*To fill the borders, we have an additional two pixel delay, since a
           fragment in the next row could filter its top edge, using two pixels
           from a fragment in this row.
          But there's no reason to delay a full fragment between the two.*/
        oc_state_borders_fill_rows(&_dec->state,refi,pli,
         (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
         (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
        /*Out-of-loop post-processing.*/
        pp_offset=3*(pli!=0);
        if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
          /*Perform de-blocking in one plane.*/
          sdelay+=notstart;
          edelay+=notdone;
          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
           _dec->state.ref_frame_bufs[refi],pli,
           pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
          if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
            /*Perform de-ringing in one plane.*/
            sdelay+=notstart;
            edelay+=notdone;
            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
             pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
          }
        }
        /*If no post-processing is done, we still need to delay a row for the
           loop filter, thanks to the strange filtering order VP3 chose.*/
        else if(pipe.loop_filter){
          sdelay+=notstart;
          edelay+=notdone;
        }
        /*Compute the intersection of the available rows in all planes.
          If chroma is sub-sampled, the effect of each of its delays is
           doubled, but luma might have more post-processing filters enabled
           than chroma, so we don't know up front which one is the limiting
           factor.*/
        avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
        avail_fragy_end=OC_MINI(avail_fragy_end,
         pipe.fragy_end[pli]-edelay<<frag_shift);
      }
      if(_dec->stripe_cb.stripe_decoded!=NULL){
        /*Make the callback, ensuring we flip the sense of the "start" and
           "end" of the available region upside down.*/
        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
         _dec->state.fplanes[0].nvfrags-avail_fragy0);
      }
      notstart=1;
    }
    /*Finish filling in the reference frame borders.*/
    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
    /*Update the reference frame indices.*/
    if(_dec->state.frame_type==OC_INTRA_FRAME){
      /*The new frame becomes both the previous and gold reference frames.*/
      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
       _dec->state.ref_frame_idx[OC_FRAME_SELF];
    }
    else{
      /*Otherwise, just replace the previous reference frame.*/
      _dec->state.ref_frame_idx[OC_FRAME_PREV]=
       _dec->state.ref_frame_idx[OC_FRAME_SELF];
    }
#if defined(OC_DUMP_IMAGES)
    /*Don't dump images for dropped frames.*/
    oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
#endif
    return 0;
  }
  else{
    /*Just update the granule position and return.*/
    _dec->state.granpos=
     (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+
     (_dec->state.curframe_num-_dec->state.keyframe_num);
    _dec->state.curframe_num++;
    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
    return TH_DUPFRAME;
  }
}

02135 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
  oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
  return 0;
}

Generated by  Doxygen 1.6.0   Back to index