Logo Search packages:      
Sourcecode: libtheora version File versions  Download package

mmxidct.c

/********************************************************************
 *                                                                  *
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
    last mod: $Id:

 ********************************************************************/

/* -------------------------------------------------------------------
  MMX based IDCT for the theora codec.

  Originally written by Rudolf Marek, based on code from On2's VP3.
  Converted to Visual Studio inline assembly by Nils Pipenbrinck.

  ---------------------------------------------------------------------*/
#if defined(USE_ASM)

#include <ogg/ogg.h>
#include "../dct.h"
#include "../idct.h"
#include "x86int.h"

/*A table of constants used by the MMX routines.*/
static const __declspec(align(16)) ogg_uint16_t
 OC_IDCT_CONSTS[(7+1)*4]={
  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
      8,    8,    8,    8
};


void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){
  _asm {
    mov     edx, [_y]
    mov     eax, offset OC_IDCT_CONSTS
    movq    mm2, [edx + 30H]
    movq    mm6, [eax + 10H]
    movq    mm4, mm2
    movq    mm7, [edx + 18H]
    pmulhw  mm4, mm6
    movq    mm1, [eax + 20H]
    pmulhw  mm6, mm7
    movq    mm5, mm1
    pmulhw  mm1, mm2
    movq    mm3, [edx + 10H]
    pmulhw  mm5, mm7
    movq    mm0, [eax]
    paddw   mm4, mm2
    paddw   mm6, mm7
    paddw   mm2, mm1
    movq    mm1, [edx + 38H]
    paddw   mm7, mm5
    movq    mm5, mm0
    pmulhw  mm0, mm3
    paddw   mm4, mm7
    pmulhw  mm5, mm1
    movq    mm7, [eax + 30H]
    psubw   mm6, mm2
    paddw   mm0, mm3
    pmulhw  mm3, mm7
    movq    mm2, [edx + 20H]
    pmulhw  mm7, mm1
    paddw   mm5, mm1
    movq    mm1, mm2
    pmulhw  mm2, [eax + 08H]
    psubw   mm3, mm5
    movq    mm5, [edx + 28H]
    paddw   mm0, mm7
    movq    mm7, mm5
    psubw   mm0, mm4
    pmulhw  mm5, [eax + 08H]
    paddw   mm2, mm1
    pmulhw  mm1, [eax + 28H]
    paddw   mm4, mm4
    paddw   mm4, mm0
    psubw   mm3, mm6
    paddw   mm5, mm7
    paddw   mm6, mm6
    pmulhw  mm7, [eax + 28H]
    paddw   mm6, mm3
    movq    [edx + 10H], mm4
    psubw   mm1, mm5
    movq    mm4, [eax + 18H]
    movq    mm5, mm3
    pmulhw  mm3, mm4
    paddw   mm7, mm2
    movq    [edx + 20H], mm6
    movq    mm2, mm0
    movq    mm6, [edx]
    pmulhw  mm0, mm4
    paddw   mm5, mm3
    movq    mm3, [edx + 08H]
    psubw   mm5, mm1
    paddw   mm2, mm0
    psubw   mm6, mm3
    movq    mm0, mm6
    pmulhw  mm6, mm4
    paddw   mm3, mm3
    paddw   mm1, mm1
    paddw   mm3, mm0
    paddw   mm1, mm5
    pmulhw  mm4, mm3
    paddw   mm6, mm0
    psubw   mm6, mm2
    paddw   mm2, mm2
    movq    mm0, [edx + 10H]
    paddw   mm2, mm6
    paddw   mm4, mm3
    psubw   mm2, mm1
    movq    mm3, [edx + 20H]
    psubw   mm4, mm7
    paddw   mm1, mm1
    paddw   mm7, mm7
    paddw   mm1, mm2
    paddw   mm7, mm4
    psubw   mm4, mm3
    paddw   mm3, mm3
    psubw   mm6, mm5
    paddw   mm5, mm5
    paddw   mm3, mm4
    paddw   mm5, mm6
    psubw   mm7, mm0
    paddw   mm0, mm0
    movq    [edx + 10H], mm1
    paddw   mm0, mm7
    movq    mm1, mm4
    punpcklwd mm4, mm5
    movq    [edx], mm0
    punpckhwd mm1, mm5
    movq    mm0, mm6
    punpcklwd mm6, mm7
    movq    mm5, mm4
    punpckldq mm4, mm6
    punpckhdq mm5, mm6
    movq    mm6, mm1
    movq    [edx + 08H], mm4
    punpckhwd mm0, mm7
    movq    [edx + 18H], mm5
    punpckhdq mm6, mm0
    movq    mm4, [edx]
    punpckldq mm1, mm0
    movq    mm5, [edx + 10H]
    movq    mm0, mm4
    movq    [edx + 38H], mm6
    punpcklwd mm0, mm5
    movq    [edx + 28H], mm1
    punpckhwd mm4, mm5
    movq    mm5, mm2
    punpcklwd mm2, mm3
    movq    mm1, mm0
    punpckldq mm0, mm2
    punpckhdq mm1, mm2
    movq    mm2, mm4
    movq    [edx], mm0
    punpckhwd mm5, mm3
    movq    [edx + 10H], mm1
    punpckhdq mm4, mm5
    punpckldq mm2, mm5
    movq    [edx + 30H], mm4
    movq    [edx + 20H], mm2
    movq    mm2, [edx + 70H]
    movq    mm6, [eax + 10H]
    movq    mm4, mm2
    movq    mm7, [edx + 58H]
    pmulhw  mm4, mm6
    movq    mm1, [eax + 20H]
    pmulhw  mm6, mm7
    movq    mm5, mm1
    pmulhw  mm1, mm2
    movq    mm3, [edx + 50H]
    pmulhw  mm5, mm7
    movq    mm0, [eax]
    paddw   mm4, mm2
    paddw   mm6, mm7
    paddw   mm2, mm1
    movq    mm1, [edx + 78H]
    paddw   mm7, mm5
    movq    mm5, mm0
    pmulhw  mm0, mm3
    paddw   mm4, mm7
    pmulhw  mm5, mm1
    movq    mm7, [eax + 30H]
    psubw   mm6, mm2
    paddw   mm0, mm3
    pmulhw  mm3, mm7
    movq    mm2, [edx + 60H]
    pmulhw  mm7, mm1
    paddw   mm5, mm1
    movq    mm1, mm2
    pmulhw  mm2, [eax + 08H]
    psubw   mm3, mm5
    movq    mm5, [edx + 68H]
    paddw   mm0, mm7
    movq    mm7, mm5
    psubw   mm0, mm4
    pmulhw  mm5, [eax + 08H]
    paddw   mm2, mm1
    pmulhw  mm1, [eax + 28H]
    paddw   mm4, mm4
    paddw   mm4, mm0
    psubw   mm3, mm6
    paddw   mm5, mm7
    paddw   mm6, mm6
    pmulhw  mm7, [eax + 28H]
    paddw   mm6, mm3
    movq    [edx + 50H], mm4
    psubw   mm1, mm5
    movq    mm4, [eax + 18H]
    movq    mm5, mm3
    pmulhw  mm3, mm4
    paddw   mm7, mm2
    movq    [edx + 60H], mm6
    movq    mm2, mm0
    movq    mm6, [edx + 40H]
    pmulhw  mm0, mm4
    paddw   mm5, mm3
    movq    mm3, [edx + 48H]
    psubw   mm5, mm1
    paddw   mm2, mm0
    psubw   mm6, mm3
    movq    mm0, mm6
    pmulhw  mm6, mm4
    paddw   mm3, mm3
    paddw   mm1, mm1
    paddw   mm3, mm0
    paddw   mm1, mm5
    pmulhw  mm4, mm3
    paddw   mm6, mm0
    psubw   mm6, mm2
    paddw   mm2, mm2
    movq    mm0, [edx + 50H]
    paddw   mm2, mm6
    paddw   mm4, mm3
    psubw   mm2, mm1
    movq    mm3, [edx + 60H]
    psubw   mm4, mm7
    paddw   mm1, mm1
    paddw   mm7, mm7
    paddw   mm1, mm2
    paddw   mm7, mm4
    psubw   mm4, mm3
    paddw   mm3, mm3
    psubw   mm6, mm5
    paddw   mm5, mm5
    paddw   mm3, mm4
    paddw   mm5, mm6
    psubw   mm7, mm0
    paddw   mm0, mm0
    movq    [edx + 50H], mm1
    paddw   mm0, mm7
    movq    mm1, mm4
    punpcklwd mm4, mm5
    movq    [edx + 40H], mm0
    punpckhwd mm1, mm5
    movq    mm0, mm6
    punpcklwd mm6, mm7
    movq    mm5, mm4
    punpckldq mm4, mm6
    punpckhdq mm5, mm6
    movq    mm6, mm1
    movq    [edx + 48H], mm4
    punpckhwd mm0, mm7
    movq    [edx + 58H], mm5
    punpckhdq mm6, mm0
    movq    mm4, [edx + 40H]
    punpckldq mm1, mm0
    movq    mm5, [edx + 50H]
    movq    mm0, mm4
    movq    [edx + 78H], mm6
    punpcklwd mm0, mm5
    movq    [edx + 68H], mm1
    punpckhwd mm4, mm5
    movq    mm5, mm2
    punpcklwd mm2, mm3
    movq    mm1, mm0
    punpckldq mm0, mm2
    punpckhdq mm1, mm2
    movq    mm2, mm4
    movq    [edx + 40H], mm0
    punpckhwd mm5, mm3
    movq    [edx + 50H], mm1
    punpckhdq mm4, mm5
    punpckldq mm2, mm5
    movq    [edx + 70H], mm4
    movq    [edx + 60H], mm2
    movq    mm2, [edx + 30H]
    movq    mm6, [eax + 10H]
    movq    mm4, mm2
    movq    mm7, [edx + 50H]
    pmulhw  mm4, mm6
    movq    mm1, [eax + 20H]
    pmulhw  mm6, mm7
    movq    mm5, mm1
    pmulhw  mm1, mm2
    movq    mm3, [edx + 10H]
    pmulhw  mm5, mm7
    movq    mm0, [eax]
    paddw   mm4, mm2
    paddw   mm6, mm7
    paddw   mm2, mm1
    movq    mm1, [edx + 70H]
    paddw   mm7, mm5
    movq    mm5, mm0
    pmulhw  mm0, mm3
    paddw   mm4, mm7
    pmulhw  mm5, mm1
    movq    mm7, [eax + 30H]
    psubw   mm6, mm2
    paddw   mm0, mm3
    pmulhw  mm3, mm7
    movq    mm2, [edx + 20H]
    pmulhw  mm7, mm1
    paddw   mm5, mm1
    movq    mm1, mm2
    pmulhw  mm2, [eax + 08H]
    psubw   mm3, mm5
    movq    mm5, [edx + 60H]
    paddw   mm0, mm7
    movq    mm7, mm5
    psubw   mm0, mm4
    pmulhw  mm5, [eax + 08H]
    paddw   mm2, mm1
    pmulhw  mm1, [eax + 28H]
    paddw   mm4, mm4
    paddw   mm4, mm0
    psubw   mm3, mm6
    paddw   mm5, mm7
    paddw   mm6, mm6
    pmulhw  mm7, [eax + 28H]
    paddw   mm6, mm3
    movq    [edx + 10H], mm4
    psubw   mm1, mm5
    movq    mm4, [eax + 18H]
    movq    mm5, mm3
    pmulhw  mm3, mm4
    paddw   mm7, mm2
    movq    [edx + 20H], mm6
    movq    mm2, mm0
    movq    mm6, [edx]
    pmulhw  mm0, mm4
    paddw   mm5, mm3
    movq    mm3, [edx + 40H]
    psubw   mm5, mm1
    paddw   mm2, mm0
    psubw   mm6, mm3
    movq    mm0, mm6
    pmulhw  mm6, mm4
    paddw   mm3, mm3
    paddw   mm1, mm1
    paddw   mm3, mm0
    paddw   mm1, mm5
    pmulhw  mm4, mm3
    paddw   mm6, mm0
    psubw   mm6, mm2
    paddw   mm2, mm2
    movq    mm0, [edx + 10H]
    paddw   mm2, mm6
    paddw   mm4, mm3
    psubw   mm2, mm1
    paddw   mm2, [eax + 38H]
    paddw   mm1, mm1
    paddw   mm1, mm2
    psraw   mm2, 4
    psubw   mm4, mm7
    psraw   mm1, 4
    movq    mm3, [edx + 20H]
    paddw   mm7, mm7
    movq    [edx + 20H], mm2
    paddw   mm7, mm4
    movq    [edx + 10H], mm1
    psubw   mm4, mm3
    paddw   mm4, [eax + 38H]
    paddw   mm3, mm3
    paddw   mm3, mm4
    psraw   mm4, 4
    psubw   mm6, mm5
    psraw   mm3, 4
    paddw   mm6, [eax + 38H]
    paddw   mm5, mm5
    paddw   mm5, mm6
    psraw   mm6, 4
    movq    [edx + 40H], mm4
    psraw   mm5, 4
    movq    [edx + 30H], mm3
    psubw   mm7, mm0
    paddw   mm7, [eax + 38H]
    paddw   mm0, mm0
    paddw   mm0, mm7
    psraw   mm7, 4
    movq    [edx + 60H], mm6
    psraw   mm0, 4
    movq    [edx + 50H], mm5
    movq    [edx + 70H], mm7
    movq    [edx], mm0
    movq    mm2, [edx + 38H]
    movq    mm6, [eax + 10H]
    movq    mm4, mm2
    movq    mm7, [edx + 58H]
    pmulhw  mm4, mm6
    movq    mm1, [eax + 20H]
    pmulhw  mm6, mm7
    movq    mm5, mm1
    pmulhw  mm1, mm2
    movq    mm3, [edx + 18H]
    pmulhw  mm5, mm7
    movq    mm0, [eax]
    paddw   mm4, mm2
    paddw   mm6, mm7
    paddw   mm2, mm1
    movq    mm1, [edx + 78H]
    paddw   mm7, mm5
    movq    mm5, mm0
    pmulhw  mm0, mm3
    paddw   mm4, mm7
    pmulhw  mm5, mm1
    movq    mm7, [eax + 30H]
    psubw   mm6, mm2
    paddw   mm0, mm3
    pmulhw  mm3, mm7
    movq    mm2, [edx + 28H]
    pmulhw  mm7, mm1
    paddw   mm5, mm1
    movq    mm1, mm2
    pmulhw  mm2, [eax + 08H]
    psubw   mm3, mm5
    movq    mm5, [edx + 68H]
    paddw   mm0, mm7
    movq    mm7, mm5
    psubw   mm0, mm4
    pmulhw  mm5, [eax + 08H]
    paddw   mm2, mm1
    pmulhw  mm1, [eax + 28H]
    paddw   mm4, mm4
    paddw   mm4, mm0
    psubw   mm3, mm6
    paddw   mm5, mm7
    paddw   mm6, mm6
    pmulhw  mm7, [eax + 28H]
    paddw   mm6, mm3
    movq    [edx + 18H], mm4
    psubw   mm1, mm5
    movq    mm4, [eax + 18H]
    movq    mm5, mm3
    pmulhw  mm3, mm4
    paddw   mm7, mm2
    movq    [edx + 28H], mm6
    movq    mm2, mm0
    movq    mm6, [edx + 08H]
    pmulhw  mm0, mm4
    paddw   mm5, mm3
    movq    mm3, [edx + 48H]
    psubw   mm5, mm1
    paddw   mm2, mm0
    psubw   mm6, mm3
    movq    mm0, mm6
    pmulhw  mm6, mm4
    paddw   mm3, mm3
    paddw   mm1, mm1
    paddw   mm3, mm0
    paddw   mm1, mm5
    pmulhw  mm4, mm3
    paddw   mm6, mm0
    psubw   mm6, mm2
    paddw   mm2, mm2
    movq    mm0, [edx + 18H]
    paddw   mm2, mm6
    paddw   mm4, mm3
    psubw   mm2, mm1
    paddw   mm2, [eax + 38H]
    paddw   mm1, mm1
    paddw   mm1, mm2
    psraw   mm2, 4
    psubw   mm4, mm7
    psraw   mm1, 4
    movq    mm3, [edx + 28H]
    paddw   mm7, mm7
    movq    [edx + 28H], mm2
    paddw   mm7, mm4
    movq    [edx + 18H], mm1
    psubw   mm4, mm3
    paddw   mm4, [eax + 38H]
    paddw   mm3, mm3
    paddw   mm3, mm4
    psraw   mm4, 4
    psubw   mm6, mm5
    psraw   mm3, 4
    paddw   mm6, [eax + 38H]
    paddw   mm5, mm5
    paddw   mm5, mm6
    psraw   mm6, 4
    movq    [edx + 48H], mm4
    psraw   mm5, 4
    movq    [edx + 38H], mm3
    psubw   mm7, mm0
    paddw   mm7, [eax + 38H]
    paddw   mm0, mm0
    paddw   mm0, mm7
    psraw   mm7, 4
    movq    [edx + 68H], mm6
    psraw   mm0, 4
    movq    [edx + 58H], mm5
    movq    [edx + 78H], mm7
    movq    [edx + 08H], mm0
    /* emms  */
  }
}


void oc_idct8x8_mmx(ogg_int16_t _y[64]){
  _asm {
    mov     edx, [_y]
    mov     eax, offset OC_IDCT_CONSTS
    movq    mm2, [edx + 30H]
    movq    mm6, [eax + 10H]
    movq    mm4, mm2
    movq    mm7, [edx + 18H]
    pmulhw  mm4, mm6
    movq    mm1, [eax + 20H]
    pmulhw  mm6, mm7
    movq    mm5, mm1
    pmulhw  mm1, mm2
    movq    mm3, [edx + 10H]
    pmulhw  mm5, mm7
    movq    mm0, [eax]
    paddw   mm4, mm2
    paddw   mm6, mm7
    paddw   mm2, mm1
    movq    mm1, [edx + 38H]
    paddw   mm7, mm5
    movq    mm5, mm0
    pmulhw  mm0, mm3
    paddw   mm4, mm7
    pmulhw  mm5, mm1
    movq    mm7, [eax + 30H]
    psubw   mm6, mm2
    paddw   mm0, mm3
    pmulhw  mm3, mm7
    movq    mm2, [edx + 20H]
    pmulhw  mm7, mm1
    paddw   mm5, mm1
    movq    mm1, mm2
    pmulhw  mm2, [eax + 08H]
    psubw   mm3, mm5
    movq    mm5, [edx + 28H]
    paddw   mm0, mm7
    movq    mm7, mm5
    psubw   mm0, mm4
    pmulhw  mm5, [eax + 08H]
    paddw   mm2, mm1
    pmulhw  mm1, [eax + 28H]
    paddw   mm4, mm4
    paddw   mm4, mm0
    psubw   mm3, mm6
    paddw   mm5, mm7
    paddw   mm6, mm6
    pmulhw  mm7, [eax + 28H]
    paddw   mm6, mm3
    movq    [edx + 10H], mm4
    psubw   mm1, mm5
    movq    mm4, [eax + 18H]
    movq    mm5, mm3
    pmulhw  mm3, mm4
    paddw   mm7, mm2
    movq    [edx + 20H], mm6
    movq    mm2, mm0
    movq    mm6, [edx]
    pmulhw  mm0, mm4
    paddw   mm5, mm3
    movq    mm3, [edx + 08H]
    psubw   mm5, mm1
    paddw   mm2, mm0
    psubw   mm6, mm3
    movq    mm0, mm6
    pmulhw  mm6, mm4
    paddw   mm3, mm3
    paddw   mm1, mm1
    paddw   mm3, mm0
    paddw   mm1, mm5
    pmulhw  mm4, mm3
    paddw   mm6, mm0
    psubw   mm6, mm2
    paddw   mm2, mm2
    movq    mm0, [edx + 10H]
    paddw   mm2, mm6
    paddw   mm4, mm3
    psubw   mm2, mm1
    movq    mm3, [edx + 20H]
    psubw   mm4, mm7
    paddw   mm1, mm1
    paddw   mm7, mm7
    paddw   mm1, mm2
    paddw   mm7, mm4
    psubw   mm4, mm3
    paddw   mm3, mm3
    psubw   mm6, mm5
    paddw   mm5, mm5
    paddw   mm3, mm4
    paddw   mm5, mm6
    psubw   mm7, mm0
    paddw   mm0, mm0
    movq    [edx + 10H], mm1
    paddw   mm0, mm7
    movq    mm1, mm4
    punpcklwd mm4, mm5
    movq    [edx], mm0
    punpckhwd mm1, mm5
    movq    mm0, mm6
    punpcklwd mm6, mm7
    movq    mm5, mm4
    punpckldq mm4, mm6
    punpckhdq mm5, mm6
    movq    mm6, mm1
    movq    [edx + 08H], mm4
    punpckhwd mm0, mm7
    movq    [edx + 18H], mm5
    punpckhdq mm6, mm0
    movq    mm4, [edx]
    punpckldq mm1, mm0
    movq    mm5, [edx + 10H]
    movq    mm0, mm4
    movq    [edx + 38H], mm6
    punpcklwd mm0, mm5
    movq    [edx + 28H], mm1
    punpckhwd mm4, mm5
    movq    mm5, mm2
    punpcklwd mm2, mm3
    movq    mm1, mm0
    punpckldq mm0, mm2
    punpckhdq mm1, mm2
    movq    mm2, mm4
    movq    [edx], mm0
    punpckhwd mm5, mm3
    movq    [edx + 10H], mm1
    punpckhdq mm4, mm5
    punpckldq mm2, mm5
    movq    [edx + 30H], mm4
    movq    [edx + 20H], mm2
    movq    mm2, [edx + 70H]
    movq    mm6, [eax + 10H]
    movq    mm4, mm2
    movq    mm7, [edx + 58H]
    pmulhw  mm4, mm6
    movq    mm1, [eax + 20H]
    pmulhw  mm6, mm7
    movq    mm5, mm1
    pmulhw  mm1, mm2
    movq    mm3, [edx + 50H]
    pmulhw  mm5, mm7
    movq    mm0, [eax]
    paddw   mm4, mm2
    paddw   mm6, mm7
    paddw   mm2, mm1
    movq    mm1, [edx + 78H]
    paddw   mm7, mm5
    movq    mm5, mm0
    pmulhw  mm0, mm3
    paddw   mm4, mm7
    pmulhw  mm5, mm1
    movq    mm7, [eax + 30H]
    psubw   mm6, mm2
    paddw   mm0, mm3
    pmulhw  mm3, mm7
    movq    mm2, [edx + 60H]
    pmulhw  mm7, mm1
    paddw   mm5, mm1
    movq    mm1, mm2
    pmulhw  mm2, [eax + 08H]
    psubw   mm3, mm5
    movq    mm5, [edx + 68H]
    paddw   mm0, mm7
    movq    mm7, mm5
    psubw   mm0, mm4
    pmulhw  mm5, [eax + 08H]
    paddw   mm2, mm1
    pmulhw  mm1, [eax + 28H]
    paddw   mm4, mm4
    paddw   mm4, mm0
    psubw   mm3, mm6
    paddw   mm5, mm7
    paddw   mm6, mm6
    pmulhw  mm7, [eax + 28H]
    paddw   mm6, mm3
    movq    [edx + 50H], mm4
    psubw   mm1, mm5
    movq    mm4, [eax + 18H]
    movq    mm5, mm3
    pmulhw  mm3, mm4
    paddw   mm7, mm2
    movq    [edx + 60H], mm6
    movq    mm2, mm0
    movq    mm6, [edx + 40H]
    pmulhw  mm0, mm4
    paddw   mm5, mm3
    movq    mm3, [edx + 48H]
    psubw   mm5, mm1
    paddw   mm2, mm0
    psubw   mm6, mm3
    movq    mm0, mm6
    pmulhw  mm6, mm4
    paddw   mm3, mm3
    paddw   mm1, mm1
    paddw   mm3, mm0
    paddw   mm1, mm5
    pmulhw  mm4, mm3
    paddw   mm6, mm0
    psubw   mm6, mm2
    paddw   mm2, mm2
    movq    mm0, [edx + 50H]
    paddw   mm2, mm6
    paddw   mm4, mm3
    psubw   mm2, mm1
    movq    mm3, [edx + 60H]
    psubw   mm4, mm7
    paddw   mm1, mm1
    paddw   mm7, mm7
    paddw   mm1, mm2
    paddw   mm7, mm4
    psubw   mm4, mm3
    paddw   mm3, mm3
    psubw   mm6, mm5
    paddw   mm5, mm5
    paddw   mm3, mm4
    paddw   mm5, mm6
    psubw   mm7, mm0
    paddw   mm0, mm0
    movq    [edx + 50H], mm1
    paddw   mm0, mm7
    movq    mm1, mm4
    punpcklwd mm4, mm5
    movq    [edx + 40H], mm0
    punpckhwd mm1, mm5
    movq    mm0, mm6
    punpcklwd mm6, mm7
    movq    mm5, mm4
    punpckldq mm4, mm6
    punpckhdq mm5, mm6
    movq    mm6, mm1
    movq    [edx + 48H], mm4
    punpckhwd mm0, mm7
    movq    [edx + 58H], mm5
    punpckhdq mm6, mm0
    movq    mm4, [edx + 40H]
    punpckldq mm1, mm0
    movq    mm5, [edx + 50H]
    movq    mm0, mm4
    movq    [edx + 78H], mm6
    punpcklwd mm0, mm5
    movq    [edx + 68H], mm1
    punpckhwd mm4, mm5
    movq    mm5, mm2
    punpcklwd mm2, mm3
    movq    mm1, mm0
    punpckldq mm0, mm2
    punpckhdq mm1, mm2
    movq    mm2, mm4
    movq    [edx + 40H], mm0
    punpckhwd mm5, mm3
    movq    [edx + 50H], mm1
    punpckhdq mm4, mm5
    punpckldq mm2, mm5
    movq    [edx + 70H], mm4
    movq    [edx + 60H], mm2
    movq    mm2, [edx + 30H]
    movq    mm6, [eax + 10H]
    movq    mm4, mm2
    movq    mm7, [edx + 50H]
    pmulhw  mm4, mm6
    movq    mm1, [eax + 20H]
    pmulhw  mm6, mm7
    movq    mm5, mm1
    pmulhw  mm1, mm2
    movq    mm3, [edx + 10H]
    pmulhw  mm5, mm7
    movq    mm0, [eax]
    paddw   mm4, mm2
    paddw   mm6, mm7
    paddw   mm2, mm1
    movq    mm1, [edx + 70H]
    paddw   mm7, mm5
    movq    mm5, mm0
    pmulhw  mm0, mm3
    paddw   mm4, mm7
    pmulhw  mm5, mm1
    movq    mm7, [eax + 30H]
    psubw   mm6, mm2
    paddw   mm0, mm3
    pmulhw  mm3, mm7
    movq    mm2, [edx + 20H]
    pmulhw  mm7, mm1
    paddw   mm5, mm1
    movq    mm1, mm2
    pmulhw  mm2, [eax + 08H]
    psubw   mm3, mm5
    movq    mm5, [edx + 60H]
    paddw   mm0, mm7
    movq    mm7, mm5
    psubw   mm0, mm4
    pmulhw  mm5, [eax + 08H]
    paddw   mm2, mm1
    pmulhw  mm1, [eax + 28H]
    paddw   mm4, mm4
    paddw   mm4, mm0
    psubw   mm3, mm6
    paddw   mm5, mm7
    paddw   mm6, mm6
    pmulhw  mm7, [eax + 28H]
    paddw   mm6, mm3
    movq    [edx + 10H], mm4
    psubw   mm1, mm5
    movq    mm4, [eax + 18H]
    movq    mm5, mm3
    pmulhw  mm3, mm4
    paddw   mm7, mm2
    movq    [edx + 20H], mm6
    movq    mm2, mm0
    movq    mm6, [edx]
    pmulhw  mm0, mm4
    paddw   mm5, mm3
    movq    mm3, [edx + 40H]
    psubw   mm5, mm1
    paddw   mm2, mm0
    psubw   mm6, mm3
    movq    mm0, mm6
    pmulhw  mm6, mm4
    paddw   mm3, mm3
    paddw   mm1, mm1
    paddw   mm3, mm0
    paddw   mm1, mm5
    pmulhw  mm4, mm3
    paddw   mm6, mm0
    psubw   mm6, mm2
    paddw   mm2, mm2
    movq    mm0, [edx + 10H]
    paddw   mm2, mm6
    paddw   mm4, mm3
    psubw   mm2, mm1
    paddw   mm2, [eax + 38H]
    paddw   mm1, mm1
    paddw   mm1, mm2
    psraw   mm2, 4
    psubw   mm4, mm7
    psraw   mm1, 4
    movq    mm3, [edx + 20H]
    paddw   mm7, mm7
    movq    [edx + 20H], mm2
    paddw   mm7, mm4
    movq    [edx + 10H], mm1
    psubw   mm4, mm3
    paddw   mm4, [eax + 38H]
    paddw   mm3, mm3
    paddw   mm3, mm4
    psraw   mm4, 4
    psubw   mm6, mm5
    psraw   mm3, 4
    paddw   mm6, [eax + 38H]
    paddw   mm5, mm5
    paddw   mm5, mm6
    psraw   mm6, 4
    movq    [edx + 40H], mm4
    psraw   mm5, 4
    movq    [edx + 30H], mm3
    psubw   mm7, mm0
    paddw   mm7, [eax + 38H]
    paddw   mm0, mm0
    paddw   mm0, mm7
    psraw   mm7, 4
    movq    [edx + 60H], mm6
    psraw   mm0, 4
    movq    [edx + 50H], mm5
    movq    [edx + 70H], mm7
    movq    [edx], mm0
    movq    mm2, [edx + 38H]
    movq    mm6, [eax + 10H]
    movq    mm4, mm2
    movq    mm7, [edx + 58H]
    pmulhw  mm4, mm6
    movq    mm1, [eax + 20H]
    pmulhw  mm6, mm7
    movq    mm5, mm1
    pmulhw  mm1, mm2
    movq    mm3, [edx + 18H]
    pmulhw  mm5, mm7
    movq    mm0, [eax]
    paddw   mm4, mm2
    paddw   mm6, mm7
    paddw   mm2, mm1
    movq    mm1, [edx + 78H]
    paddw   mm7, mm5
    movq    mm5, mm0
    pmulhw  mm0, mm3
    paddw   mm4, mm7
    pmulhw  mm5, mm1
    movq    mm7, [eax + 30H]
    psubw   mm6, mm2
    paddw   mm0, mm3
    pmulhw  mm3, mm7
    movq    mm2, [edx + 28H]
    pmulhw  mm7, mm1
    paddw   mm5, mm1
    movq    mm1, mm2
    pmulhw  mm2, [eax + 08H]
    psubw   mm3, mm5
    movq    mm5, [edx + 68H]
    paddw   mm0, mm7
    movq    mm7, mm5
    psubw   mm0, mm4
    pmulhw  mm5, [eax + 08H]
    paddw   mm2, mm1
    pmulhw  mm1, [eax + 28H]
    paddw   mm4, mm4
    paddw   mm4, mm0
    psubw   mm3, mm6
    paddw   mm5, mm7
    paddw   mm6, mm6
    pmulhw  mm7, [eax + 28H]
    paddw   mm6, mm3
    movq    [edx + 18H], mm4
    psubw   mm1, mm5
    movq    mm4, [eax + 18H]
    movq    mm5, mm3
    pmulhw  mm3, mm4
    paddw   mm7, mm2
    movq    [edx + 28H], mm6
    movq    mm2, mm0
    movq    mm6, [edx + 08H]
    pmulhw  mm0, mm4
    paddw   mm5, mm3
    movq    mm3, [edx + 48H]
    psubw   mm5, mm1
    paddw   mm2, mm0
    psubw   mm6, mm3
    movq    mm0, mm6
    pmulhw  mm6, mm4
    paddw   mm3, mm3
    paddw   mm1, mm1
    paddw   mm3, mm0
    paddw   mm1, mm5
    pmulhw  mm4, mm3
    paddw   mm6, mm0
    psubw   mm6, mm2
    paddw   mm2, mm2
    movq    mm0, [edx + 18H]
    paddw   mm2, mm6
    paddw   mm4, mm3
    psubw   mm2, mm1
    paddw   mm2, [eax + 38H]
    paddw   mm1, mm1
    paddw   mm1, mm2
    psraw   mm2, 4
    psubw   mm4, mm7
    psraw   mm1, 4
    movq    mm3, [edx + 28H]
    paddw   mm7, mm7
    movq    [edx + 28H], mm2
    paddw   mm7, mm4
    movq    [edx + 18H], mm1
    psubw   mm4, mm3
    paddw   mm4, [eax + 38H]
    paddw   mm3, mm3
    paddw   mm3, mm4
    psraw   mm4, 4
    psubw   mm6, mm5
    psraw   mm3, 4
    paddw   mm6, [eax + 38H]
    paddw   mm5, mm5
    paddw   mm5, mm6
    psraw   mm6, 4
    movq    [edx + 48H], mm4
    psraw   mm5, 4
    movq    [edx + 38H], mm3
    psubw   mm7, mm0
    paddw   mm7, [eax + 38H]
    paddw   mm0, mm0
    paddw   mm0, mm7
    psraw   mm7, 4
    movq    [edx + 68H], mm6
    psraw   mm0, 4
    movq    [edx + 58H], mm5
    movq    [edx + 78H], mm7
    movq    [edx + 08H], mm0
    /* emms  */
  }
}

#endif

Generated by  Doxygen 1.6.0   Back to index