00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 #include "config.h"
00027 
00028 #ifdef ARCH_X86
00029 
00030 #include <stdio.h>
00031 #include <stdlib.h>
00032 #include <inttypes.h>
00033 
00034 #include "convert.h"
00035 #include "convert_internal.h"
00036 #include "attributes.h"
00037 #include "mmx.h"
00038 
00039 #define CPU_MMXEXT 0
00040 #define CPU_MMX 1
00041 
00042 
00043 
00044 #define movntq(src,dest)        \
00045 do {                            \
00046     if (cpu == CPU_MMXEXT)      \
00047         movntq_r2m (src, dest); \
00048     else                        \
00049         movq_r2m (src, dest);   \
00050 } while (0)
00051 
00052 static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)
00053 {
00054     static mmx_t mmx_80w = {0x0080008000800080LL};
00055     static mmx_t mmx_U_green = {0xf37df37df37df37dLL};
00056     static mmx_t mmx_U_blue = {0x4093409340934093LL};
00057     static mmx_t mmx_V_red = {0x3312331233123312LL};
00058     static mmx_t mmx_V_green = {0xe5fce5fce5fce5fcLL};
00059     static mmx_t mmx_10w = {0x1010101010101010LL};
00060     static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffLL};
00061     static mmx_t mmx_Y_coeff = {0x253f253f253f253fLL};
00062 
00063     movd_m2r (*pu, mm0);                
00064     movd_m2r (*pv, mm1);                
00065     movq_m2r (*py, mm6);                
00066     pxor_r2r (mm4, mm4);                
00067     
00068 
00069     
00070 
00071 
00072 
00073 
00074 
00075 
00076 
00077     punpcklbw_r2r (mm4, mm0);           
00078     punpcklbw_r2r (mm4, mm1);           
00079     psubsw_m2r (mmx_80w, mm0);          
00080     psubsw_m2r (mmx_80w, mm1);          
00081     psllw_i2r (3, mm0);                 
00082     psllw_i2r (3, mm1);                 
00083     movq_r2r (mm0, mm2);                
00084     movq_r2r (mm1, mm3);                
00085     pmulhw_m2r (mmx_U_green, mm2);      
00086     pmulhw_m2r (mmx_V_green, mm3);      
00087     pmulhw_m2r (mmx_U_blue, mm0);       
00088     pmulhw_m2r (mmx_V_red, mm1);        
00089     paddsw_r2r (mm3, mm2);              
00090 
00091     psubusb_m2r (mmx_10w, mm6);         
00092     movq_r2r (mm6, mm7);                
00093     pand_m2r (mmx_00ffw, mm6);          
00094     psrlw_i2r (8, mm7);                 
00095     psllw_i2r (3, mm6);                 
00096     psllw_i2r (3, mm7);                 
00097     pmulhw_m2r (mmx_Y_coeff, mm6);      
00098     pmulhw_m2r (mmx_Y_coeff, mm7);      
00099 
00100     
00101 
00102 
00103 
00104 
00105 
00106 
00107 
00108     movq_r2r (mm0, mm3);                
00109     movq_r2r (mm1, mm4);                
00110     movq_r2r (mm2, mm5);                
00111     paddsw_r2r (mm6, mm0);              
00112     paddsw_r2r (mm7, mm3);              
00113     paddsw_r2r (mm6, mm1);              
00114     paddsw_r2r (mm7, mm4);              
00115     paddsw_r2r (mm6, mm2);              
00116     paddsw_r2r (mm7, mm5);              
00117     packuswb_r2r (mm0, mm0);            
00118     packuswb_r2r (mm1, mm1);            
00119     packuswb_r2r (mm2, mm2);            
00120     packuswb_r2r (mm3, mm3);            
00121     packuswb_r2r (mm4, mm4);            
00122     packuswb_r2r (mm5, mm5);            
00123     punpcklbw_r2r (mm3, mm0);           
00124     punpcklbw_r2r (mm4, mm1);           
00125     punpcklbw_r2r (mm5, mm2);           
00126 }
00127 
00128 static inline void mmx_unpack_16rgb (uint8_t * image, const int cpu)
00129 {
00130     static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8LL};
00131     static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfcLL};
00132     static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8LL};
00133 
00134     
00135 
00136 
00137 
00138 
00139 
00140 
00141     pand_m2r (mmx_bluemask, mm0);       
00142     pand_m2r (mmx_greenmask, mm2);      
00143     pand_m2r (mmx_redmask, mm1);        
00144     psrlq_i2r (3, mm0);                 
00145     pxor_r2r (mm4, mm4);                
00146     movq_r2r (mm0, mm5);                
00147     movq_r2r (mm2, mm7);                
00148 
00149     punpcklbw_r2r (mm4, mm2);
00150     punpcklbw_r2r (mm1, mm0);
00151     psllq_i2r (3, mm2);
00152     por_r2r (mm2, mm0);
00153     movntq (mm0, *image);
00154 
00155     punpckhbw_r2r (mm4, mm7);
00156     punpckhbw_r2r (mm1, mm5);
00157     psllq_i2r (3, mm7);
00158     por_r2r (mm7, mm5);
00159     movntq (mm5, *(image+8));
00160 }
00161 
00162 static inline void mmx_unpack_32rgb (uint8_t * image, const int cpu)
00163 {
00164     
00165 
00166 
00167 
00168 
00169 
00170 
00171     pxor_r2r (mm3, mm3);
00172     movq_r2r (mm0, mm6);
00173     movq_r2r (mm1, mm7);
00174     movq_r2r (mm0, mm4);
00175     movq_r2r (mm1, mm5);
00176     punpcklbw_r2r (mm2, mm6);
00177     punpcklbw_r2r (mm3, mm7);
00178     punpcklwd_r2r (mm7, mm6);
00179     movntq (mm6, *image);
00180     movq_r2r (mm0, mm6);
00181     punpcklbw_r2r (mm2, mm6);
00182     punpckhwd_r2r (mm7, mm6);
00183     movntq (mm6, *(image+8));
00184     punpckhbw_r2r (mm2, mm4);
00185     punpckhbw_r2r (mm3, mm5);
00186     punpcklwd_r2r (mm5, mm4);
00187     movntq (mm4, *(image+16));
00188     movq_r2r (mm0, mm4);
00189     punpckhbw_r2r (mm2, mm4);
00190     punpckhwd_r2r (mm5, mm4);
00191     movntq (mm4, *(image+24));
00192 }
00193 
00194 static inline void yuv420_rgb16 (uint8_t * image,
00195                                  uint8_t * py, uint8_t * pu, uint8_t * pv,
00196                                  int width, int height,
00197                                  int rgb_stride, int y_stride, int uv_stride,
00198                                  const int cpu)
00199 {
00200     int i;
00201 
00202     rgb_stride -= 2 * width;
00203     y_stride -= width;
00204     uv_stride -= width >> 1;
00205     width >>= 3;
00206 
00207     do {
00208         i = width;
00209         do {
00210             mmx_yuv2rgb (py, pu, pv);
00211             mmx_unpack_16rgb (image, cpu);
00212             py += 8;
00213             pu += 4;
00214             pv += 4;
00215             image += 16;
00216         } while (--i);
00217 
00218         py += y_stride;
00219         image += rgb_stride;
00220         if (height & 1) {
00221             pu += uv_stride;
00222             pv += uv_stride;
00223         } else {
00224             pu -= 4 * width;
00225             pv -= 4 * width;
00226         }
00227     } while (--height);
00228 }
00229 
00230 static inline void yuv420_argb32 (uint8_t * image, uint8_t * py,
00231                                   uint8_t * pu, uint8_t * pv,
00232                                   int width, int height,
00233                                   int rgb_stride, int y_stride, int uv_stride,
00234                                   const int cpu)
00235 {
00236     int i;
00237 
00238     rgb_stride -= 4 * width;
00239     y_stride -= width;
00240     uv_stride -= width >> 1;
00241     width >>= 3;
00242 
00243     do {
00244         i = width;
00245         do {
00246             mmx_yuv2rgb (py, pu, pv);
00247             mmx_unpack_32rgb (image, cpu);
00248             py += 8;
00249             pu += 4;
00250             pv += 4;
00251             image += 32;
00252         } while (--i);
00253 
00254         py += y_stride;
00255         image += rgb_stride;
00256         if (height & 1) {
00257             pu += uv_stride;
00258             pv += uv_stride;
00259         } else {
00260             pu -= 4 * width;
00261             pv -= 4 * width;
00262         }
00263     } while (--height);
00264 }
00265 
00266 static void mmxext_rgb16 (void * _id, uint8_t * const * src,
00267                           unsigned int v_offset)
00268 {
00269     convert_rgb_t * id = (convert_rgb_t *) _id;
00270 
00271     yuv420_rgb16 (id->rgb_ptr + id->rgb_stride * v_offset,
00272                   src[0], src[1], src[2], id->width, 16,
00273                   id->rgb_stride, id->uv_stride << 1, id->uv_stride,
00274                   CPU_MMXEXT);
00275 }
00276 
00277 static void mmxext_argb32 (void * _id, uint8_t * const * src,
00278                            unsigned int v_offset)
00279 {
00280     convert_rgb_t * id = (convert_rgb_t *) _id;
00281 
00282     yuv420_argb32 (id->rgb_ptr + id->rgb_stride * v_offset,
00283                    src[0], src[1], src[2], id->width, 16,
00284                   id->rgb_stride, id->uv_stride << 1, id->uv_stride,
00285                   CPU_MMXEXT);
00286 }
00287 
00288 static void mmx_rgb16 (void * _id, uint8_t * const * src,
00289                        unsigned int v_offset)
00290 {
00291     convert_rgb_t * id = (convert_rgb_t *) _id;
00292 
00293     yuv420_rgb16 (id->rgb_ptr + id->rgb_stride * v_offset,
00294                   src[0], src[1], src[2], id->width, 16,
00295                   id->rgb_stride, id->uv_stride << 1, id->uv_stride, CPU_MMX);
00296 }
00297 
00298 static void mmx_argb32 (void * _id, uint8_t * const * src,
00299                         unsigned int v_offset)
00300 {
00301     convert_rgb_t * id = (convert_rgb_t *) _id;
00302 
00303     yuv420_argb32 (id->rgb_ptr + id->rgb_stride * v_offset,
00304                    src[0], src[1], src[2], id->width, 16,
00305                   id->rgb_stride, id->uv_stride << 1, id->uv_stride, CPU_MMX);
00306 }
00307 
00308 yuv2rgb_copy * yuv2rgb_init_mmxext (int order, int bpp)
00309 {
00310     if ((order == CONVERT_RGB) && (bpp == 16))
00311         return mmxext_rgb16;
00312     else if ((order == CONVERT_RGB) && (bpp == 32))
00313         return mmxext_argb32;
00314     return NULL;        
00315 }
00316 
00317 yuv2rgb_copy * yuv2rgb_init_mmx (int order, int bpp)
00318 {
00319     if ((order == CONVERT_RGB) && (bpp == 16))
00320         return mmx_rgb16;
00321     else if ((order == CONVERT_RGB) && (bpp == 32))
00322         return mmx_argb32;
00323     return NULL;        
00324 }
00325 #endif