Doxygen Source Code Documentation
        
Main Page   Alphabetical List   Data Structures   File List   Data Fields   Globals   Search   
idct.c
Go to the documentation of this file.00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 #include "config.h"
00025 
00026 #include <stdlib.h>
00027 #include <inttypes.h>
00028 
00029 #include "mpeg2.h"
00030 #include "mpeg2_internal.h"
00031 #include "attributes.h"
00032 
00033 #define W1 2841 
00034 #define W2 2676 
00035 #define W3 2408 
00036 #define W5 1609 
00037 #define W6 1108 
00038 #define W7 565  
00039 
00040 
00041 void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
00042 void (* mpeg2_idct_add) (int last, int16_t * block,
00043                          uint8_t * dest, int stride);
00044 
00045 static uint8_t clip_lut[1024];
00046 #define CLIP(i) ((clip_lut+384)[(i)])
00047 
00048 #if 0
00049 #define BUTTERFLY(t0,t1,W0,W1,d0,d1)    \
00050 do {                                    \
00051     t0 = W0*d0 + W1*d1;                 \
00052     t1 = W0*d1 - W1*d0;                 \
00053 } while (0)
00054 #else
00055 #define BUTTERFLY(t0,t1,W0,W1,d0,d1)    \
00056 do {                                    \
00057     int tmp = W0 * (d0 + d1);           \
00058     t0 = tmp + (W1 - W0) * d1;          \
00059     t1 = tmp - (W1 + W0) * d0;          \
00060 } while (0)
00061 #endif
00062 
00063 static void inline idct_row (int16_t * const block)
00064 {
00065     int d0, d1, d2, d3;
00066     int a0, a1, a2, a3, b0, b1, b2, b3;
00067     int t0, t1, t2, t3;
00068 
00069     
00070     if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] |
00071                   ((int32_t *)block)[3]))) {
00072         uint32_t tmp = (uint16_t) (block[0] << 3);
00073         tmp |= tmp << 16;
00074         ((int32_t *)block)[0] = tmp;
00075         ((int32_t *)block)[1] = tmp;
00076         ((int32_t *)block)[2] = tmp;
00077         ((int32_t *)block)[3] = tmp;
00078         return;
00079     }
00080 
00081     d0 = (block[0] << 11) + 128;
00082     d1 = block[1];
00083     d2 = block[2] << 11;
00084     d3 = block[3];
00085     t0 = d0 + d2;
00086     t1 = d0 - d2;
00087     BUTTERFLY (t2, t3, W6, W2, d3, d1);
00088     a0 = t0 + t2;
00089     a1 = t1 + t3;
00090     a2 = t1 - t3;
00091     a3 = t0 - t2;
00092 
00093     d0 = block[4];
00094     d1 = block[5];
00095     d2 = block[6];
00096     d3 = block[7];
00097     BUTTERFLY (t0, t1, W7, W1, d3, d0);
00098     BUTTERFLY (t2, t3, W3, W5, d1, d2);
00099     b0 = t0 + t2;
00100     b3 = t1 + t3;
00101     t0 -= t2;
00102     t1 -= t3;
00103     b1 = ((t0 + t1) * 181) >> 8;
00104     b2 = ((t0 - t1) * 181) >> 8;
00105 
00106     block[0] = (a0 + b0) >> 8;
00107     block[1] = (a1 + b1) >> 8;
00108     block[2] = (a2 + b2) >> 8;
00109     block[3] = (a3 + b3) >> 8;
00110     block[4] = (a3 - b3) >> 8;
00111     block[5] = (a2 - b2) >> 8;
00112     block[6] = (a1 - b1) >> 8;
00113     block[7] = (a0 - b0) >> 8;
00114 }
00115 
00116 static void inline idct_col (int16_t * const block)
00117 {
00118     int d0, d1, d2, d3;
00119     int a0, a1, a2, a3, b0, b1, b2, b3;
00120     int t0, t1, t2, t3;
00121 
00122     d0 = (block[8*0] << 11) + 65536;
00123     d1 = block[8*1];
00124     d2 = block[8*2] << 11;
00125     d3 = block[8*3];
00126     t0 = d0 + d2;
00127     t1 = d0 - d2;
00128     BUTTERFLY (t2, t3, W6, W2, d3, d1);
00129     a0 = t0 + t2;
00130     a1 = t1 + t3;
00131     a2 = t1 - t3;
00132     a3 = t0 - t2;
00133 
00134     d0 = block[8*4];
00135     d1 = block[8*5];
00136     d2 = block[8*6];
00137     d3 = block[8*7];
00138     BUTTERFLY (t0, t1, W7, W1, d3, d0);
00139     BUTTERFLY (t2, t3, W3, W5, d1, d2);
00140     b0 = t0 + t2;
00141     b3 = t1 + t3;
00142     t0 = (t0 - t2) >> 8;
00143     t1 = (t1 - t3) >> 8;
00144     b1 = (t0 + t1) * 181;
00145     b2 = (t0 - t1) * 181;
00146 
00147     block[8*0] = (a0 + b0) >> 17;
00148     block[8*1] = (a1 + b1) >> 17;
00149     block[8*2] = (a2 + b2) >> 17;
00150     block[8*3] = (a3 + b3) >> 17;
00151     block[8*4] = (a3 - b3) >> 17;
00152     block[8*5] = (a2 - b2) >> 17;
00153     block[8*6] = (a1 - b1) >> 17;
00154     block[8*7] = (a0 - b0) >> 17;
00155 }
00156 
00157 static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
00158                                const int stride)
00159 {
00160     int i;
00161 
00162     for (i = 0; i < 8; i++)
00163         idct_row (block + 8 * i);
00164     for (i = 0; i < 8; i++)
00165         idct_col (block + i);
00166     do {
00167         dest[0] = CLIP (block[0]);
00168         dest[1] = CLIP (block[1]);
00169         dest[2] = CLIP (block[2]);
00170         dest[3] = CLIP (block[3]);
00171         dest[4] = CLIP (block[4]);
00172         dest[5] = CLIP (block[5]);
00173         dest[6] = CLIP (block[6]);
00174         dest[7] = CLIP (block[7]);
00175 
00176         block[0] = 0;   block[1] = 0;   block[2] = 0;   block[3] = 0;
00177         block[4] = 0;   block[5] = 0;   block[6] = 0;   block[7] = 0;
00178 
00179         dest += stride;
00180         block += 8;
00181     } while (--i);
00182 }
00183 
00184 static void mpeg2_idct_add_c (const int last, int16_t * block,
00185                               uint8_t * dest, const int stride)
00186 {
00187     int i;
00188 
00189     if (last != 129 || (block[0] & 7) == 4) {
00190         for (i = 0; i < 8; i++)
00191             idct_row (block + 8 * i);
00192         for (i = 0; i < 8; i++)
00193             idct_col (block + i);
00194         do {
00195             dest[0] = CLIP (block[0] + dest[0]);
00196             dest[1] = CLIP (block[1] + dest[1]);
00197             dest[2] = CLIP (block[2] + dest[2]);
00198             dest[3] = CLIP (block[3] + dest[3]);
00199             dest[4] = CLIP (block[4] + dest[4]);
00200             dest[5] = CLIP (block[5] + dest[5]);
00201             dest[6] = CLIP (block[6] + dest[6]);
00202             dest[7] = CLIP (block[7] + dest[7]);
00203 
00204             block[0] = 0;       block[1] = 0;   block[2] = 0;   block[3] = 0;
00205             block[4] = 0;       block[5] = 0;   block[6] = 0;   block[7] = 0;
00206 
00207             dest += stride;
00208             block += 8;
00209         } while (--i);
00210     } else {
00211         int DC;
00212 
00213         DC = (block[0] + 4) >> 3;
00214         block[0] = block[63] = 0;
00215         i = 8;
00216         do {
00217             dest[0] = CLIP (DC + dest[0]);
00218             dest[1] = CLIP (DC + dest[1]);
00219             dest[2] = CLIP (DC + dest[2]);
00220             dest[3] = CLIP (DC + dest[3]);
00221             dest[4] = CLIP (DC + dest[4]);
00222             dest[5] = CLIP (DC + dest[5]);
00223             dest[6] = CLIP (DC + dest[6]);
00224             dest[7] = CLIP (DC + dest[7]);
00225             dest += stride;
00226         } while (--i);
00227     }
00228 }
00229 
00230 void mpeg2_idct_init (uint32_t accel)
00231 {
00232 #ifdef ARCH_X86
00233     if (accel & MPEG2_ACCEL_X86_MMXEXT) {
00234         mpeg2_idct_copy = mpeg2_idct_copy_mmxext;
00235         mpeg2_idct_add = mpeg2_idct_add_mmxext;
00236         mpeg2_idct_mmx_init ();
00237     } else if (accel & MPEG2_ACCEL_X86_MMX) {
00238         mpeg2_idct_copy = mpeg2_idct_copy_mmx;
00239         mpeg2_idct_add = mpeg2_idct_add_mmx;
00240         mpeg2_idct_mmx_init ();
00241     } else
00242 #endif
00243 #ifdef ARCH_PPC
00244     if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
00245         mpeg2_idct_copy = mpeg2_idct_copy_altivec;
00246         mpeg2_idct_add = mpeg2_idct_add_altivec;
00247         mpeg2_idct_altivec_init ();
00248     } else
00249 #endif
00250 #ifdef ARCH_ALPHA
00251     if (accel & MPEG2_ACCEL_ALPHA_MVI) {
00252         mpeg2_idct_copy = mpeg2_idct_copy_mvi;
00253         mpeg2_idct_add = mpeg2_idct_add_mvi;
00254         mpeg2_idct_alpha_init (0);
00255     } else if (accel & MPEG2_ACCEL_ALPHA) {
00256         mpeg2_idct_copy = mpeg2_idct_copy_alpha;
00257         mpeg2_idct_add = mpeg2_idct_add_alpha;
00258         mpeg2_idct_alpha_init (1);
00259     } else
00260 #endif
00261 #ifdef LIBMPEG2_MLIB
00262     if (accel & MPEG2_ACCEL_MLIB) {
00263         mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee;
00264         mpeg2_idct_add = (getenv ("MLIB_NON_IEEE") ?
00265                           mpeg2_idct_add_mlib_non_ieee : mpeg2_idct_add_mlib);
00266     } else
00267 #endif
00268     {
00269         extern uint8_t mpeg2_scan_norm[64];
00270         extern uint8_t mpeg2_scan_alt[64];
00271         int i, j;
00272 
00273         mpeg2_idct_copy = mpeg2_idct_copy_c;
00274         mpeg2_idct_add = mpeg2_idct_add_c;
00275         for (i = -384; i < 640; i++)
00276             clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i);
00277         for (i = 0; i < 64; i++) {
00278             j = mpeg2_scan_norm[i];
00279             mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
00280             j = mpeg2_scan_alt[i];
00281             mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
00282         }
00283     }
00284 }