xref: /aosp_15_r20/external/libjpeg-turbo/simd/i386/jsimd.c (revision dfc6aa5c1cfd4bc4e2018dc74aa96e29ee49c6da)
1 /*
2  * jsimd_i386.c
3  *
4  * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
5  * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander.
6  * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
7  *
8  * Based on the x86 SIMD extension for IJG JPEG library,
9  * Copyright (C) 1999-2006, MIYASAKA Masaru.
10  * For conditions of distribution and use, see copyright notice in jsimdext.inc
11  *
12  * This file contains the interface between the "normal" portions
13  * of the library and the SIMD implementations when running on a
14  * 32-bit x86 architecture.
15  */
16 
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
23 #include "../jsimd.h"
24 
25 /*
26  * In the PIC cases, we have no guarantee that constants will keep
27  * their alignment. This macro allows us to verify it at runtime.
28  */
29 #define IS_ALIGNED(ptr, order)  (((unsigned)ptr & ((1 << order) - 1)) == 0)
30 
31 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
33 
34 static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
35 static THREAD_LOCAL unsigned int simd_huffman = 1;
36 
37 /*
38  * Check what SIMD accelerations are supported.
39  */
40 LOCAL(void)
init_simd(void)41 init_simd(void)
42 {
43 #ifndef NO_GETENV
44   char env[2] = { 0 };
45 #endif
46 
47   if (simd_support != ~0U)
48     return;
49 
50   simd_support = jpeg_simd_cpu_support();
51 
52 #ifndef NO_GETENV
53   /* Force different settings through environment variables */
54   if (!GETENV_S(env, 2, "JSIMD_FORCEMMX") && !strcmp(env, "1"))
55     simd_support &= JSIMD_MMX;
56   if (!GETENV_S(env, 2, "JSIMD_FORCE3DNOW") && !strcmp(env, "1"))
57     simd_support &= JSIMD_3DNOW | JSIMD_MMX;
58   if (!GETENV_S(env, 2, "JSIMD_FORCESSE") && !strcmp(env, "1"))
59     simd_support &= JSIMD_SSE | JSIMD_MMX;
60   if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
61     simd_support &= JSIMD_SSE2;
62   if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
63     simd_support &= JSIMD_AVX2;
64   if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
65     simd_support = 0;
66   if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
67     simd_huffman = 0;
68 #endif
69 }
70 
71 GLOBAL(int)
jsimd_can_rgb_ycc(void)72 jsimd_can_rgb_ycc(void)
73 {
74   init_simd();
75 
76   /* The code is optimised for these values only */
77   if (BITS_IN_JSAMPLE != 8)
78     return 0;
79   if (sizeof(JDIMENSION) != 4)
80     return 0;
81   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
82     return 0;
83 
84   if ((simd_support & JSIMD_AVX2) &&
85       IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
86     return 1;
87   if ((simd_support & JSIMD_SSE2) &&
88       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
89     return 1;
90   if (simd_support & JSIMD_MMX)
91     return 1;
92 
93   return 0;
94 }
95 
96 GLOBAL(int)
jsimd_can_rgb_gray(void)97 jsimd_can_rgb_gray(void)
98 {
99   init_simd();
100 
101   /* The code is optimised for these values only */
102   if (BITS_IN_JSAMPLE != 8)
103     return 0;
104   if (sizeof(JDIMENSION) != 4)
105     return 0;
106   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
107     return 0;
108 
109   if ((simd_support & JSIMD_AVX2) &&
110       IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
111     return 1;
112   if ((simd_support & JSIMD_SSE2) &&
113       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
114     return 1;
115   if (simd_support & JSIMD_MMX)
116     return 1;
117 
118   return 0;
119 }
120 
121 GLOBAL(int)
jsimd_can_ycc_rgb(void)122 jsimd_can_ycc_rgb(void)
123 {
124   init_simd();
125 
126   /* The code is optimised for these values only */
127   if (BITS_IN_JSAMPLE != 8)
128     return 0;
129   if (sizeof(JDIMENSION) != 4)
130     return 0;
131   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
132     return 0;
133 
134   if ((simd_support & JSIMD_AVX2) &&
135       IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
136     return 1;
137   if ((simd_support & JSIMD_SSE2) &&
138       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
139     return 1;
140   if (simd_support & JSIMD_MMX)
141     return 1;
142 
143   return 0;
144 }
145 
146 GLOBAL(int)
jsimd_can_ycc_rgb565(void)147 jsimd_can_ycc_rgb565(void)
148 {
149   return 0;
150 }
151 
152 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)153 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
154                       JSAMPIMAGE output_buf, JDIMENSION output_row,
155                       int num_rows)
156 {
157   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
158   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
159   void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
160 
161   if (simd_support == ~0U)
162     init_simd();
163 
164   switch (cinfo->in_color_space) {
165   case JCS_EXT_RGB:
166     avx2fct = jsimd_extrgb_ycc_convert_avx2;
167     sse2fct = jsimd_extrgb_ycc_convert_sse2;
168     mmxfct = jsimd_extrgb_ycc_convert_mmx;
169     break;
170   case JCS_EXT_RGBX:
171   case JCS_EXT_RGBA:
172     avx2fct = jsimd_extrgbx_ycc_convert_avx2;
173     sse2fct = jsimd_extrgbx_ycc_convert_sse2;
174     mmxfct = jsimd_extrgbx_ycc_convert_mmx;
175     break;
176   case JCS_EXT_BGR:
177     avx2fct = jsimd_extbgr_ycc_convert_avx2;
178     sse2fct = jsimd_extbgr_ycc_convert_sse2;
179     mmxfct = jsimd_extbgr_ycc_convert_mmx;
180     break;
181   case JCS_EXT_BGRX:
182   case JCS_EXT_BGRA:
183     avx2fct = jsimd_extbgrx_ycc_convert_avx2;
184     sse2fct = jsimd_extbgrx_ycc_convert_sse2;
185     mmxfct = jsimd_extbgrx_ycc_convert_mmx;
186     break;
187   case JCS_EXT_XBGR:
188   case JCS_EXT_ABGR:
189     avx2fct = jsimd_extxbgr_ycc_convert_avx2;
190     sse2fct = jsimd_extxbgr_ycc_convert_sse2;
191     mmxfct = jsimd_extxbgr_ycc_convert_mmx;
192     break;
193   case JCS_EXT_XRGB:
194   case JCS_EXT_ARGB:
195     avx2fct = jsimd_extxrgb_ycc_convert_avx2;
196     sse2fct = jsimd_extxrgb_ycc_convert_sse2;
197     mmxfct = jsimd_extxrgb_ycc_convert_mmx;
198     break;
199   default:
200     avx2fct = jsimd_rgb_ycc_convert_avx2;
201     sse2fct = jsimd_rgb_ycc_convert_sse2;
202     mmxfct = jsimd_rgb_ycc_convert_mmx;
203     break;
204   }
205 
206   if (simd_support & JSIMD_AVX2)
207     avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
208   else if (simd_support & JSIMD_SSE2)
209     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
210   else
211     mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
212 }
213 
214 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)215 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
216                        JSAMPIMAGE output_buf, JDIMENSION output_row,
217                        int num_rows)
218 {
219   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
220   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
221   void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
222 
223   if (simd_support == ~0U)
224     init_simd();
225 
226   switch (cinfo->in_color_space) {
227   case JCS_EXT_RGB:
228     avx2fct = jsimd_extrgb_gray_convert_avx2;
229     sse2fct = jsimd_extrgb_gray_convert_sse2;
230     mmxfct = jsimd_extrgb_gray_convert_mmx;
231     break;
232   case JCS_EXT_RGBX:
233   case JCS_EXT_RGBA:
234     avx2fct = jsimd_extrgbx_gray_convert_avx2;
235     sse2fct = jsimd_extrgbx_gray_convert_sse2;
236     mmxfct = jsimd_extrgbx_gray_convert_mmx;
237     break;
238   case JCS_EXT_BGR:
239     avx2fct = jsimd_extbgr_gray_convert_avx2;
240     sse2fct = jsimd_extbgr_gray_convert_sse2;
241     mmxfct = jsimd_extbgr_gray_convert_mmx;
242     break;
243   case JCS_EXT_BGRX:
244   case JCS_EXT_BGRA:
245     avx2fct = jsimd_extbgrx_gray_convert_avx2;
246     sse2fct = jsimd_extbgrx_gray_convert_sse2;
247     mmxfct = jsimd_extbgrx_gray_convert_mmx;
248     break;
249   case JCS_EXT_XBGR:
250   case JCS_EXT_ABGR:
251     avx2fct = jsimd_extxbgr_gray_convert_avx2;
252     sse2fct = jsimd_extxbgr_gray_convert_sse2;
253     mmxfct = jsimd_extxbgr_gray_convert_mmx;
254     break;
255   case JCS_EXT_XRGB:
256   case JCS_EXT_ARGB:
257     avx2fct = jsimd_extxrgb_gray_convert_avx2;
258     sse2fct = jsimd_extxrgb_gray_convert_sse2;
259     mmxfct = jsimd_extxrgb_gray_convert_mmx;
260     break;
261   default:
262     avx2fct = jsimd_rgb_gray_convert_avx2;
263     sse2fct = jsimd_rgb_gray_convert_sse2;
264     mmxfct = jsimd_rgb_gray_convert_mmx;
265     break;
266   }
267 
268   if (simd_support & JSIMD_AVX2)
269     avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
270   else if (simd_support & JSIMD_SSE2)
271     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
272   else
273     mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
274 }
275 
276 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)277 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
278                       JDIMENSION input_row, JSAMPARRAY output_buf,
279                       int num_rows)
280 {
281   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
282   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
283   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
284 
285   if (simd_support == ~0U)
286     init_simd();
287 
288   switch (cinfo->out_color_space) {
289   case JCS_EXT_RGB:
290     avx2fct = jsimd_ycc_extrgb_convert_avx2;
291     sse2fct = jsimd_ycc_extrgb_convert_sse2;
292     mmxfct = jsimd_ycc_extrgb_convert_mmx;
293     break;
294   case JCS_EXT_RGBX:
295   case JCS_EXT_RGBA:
296     avx2fct = jsimd_ycc_extrgbx_convert_avx2;
297     sse2fct = jsimd_ycc_extrgbx_convert_sse2;
298     mmxfct = jsimd_ycc_extrgbx_convert_mmx;
299     break;
300   case JCS_EXT_BGR:
301     avx2fct = jsimd_ycc_extbgr_convert_avx2;
302     sse2fct = jsimd_ycc_extbgr_convert_sse2;
303     mmxfct = jsimd_ycc_extbgr_convert_mmx;
304     break;
305   case JCS_EXT_BGRX:
306   case JCS_EXT_BGRA:
307     avx2fct = jsimd_ycc_extbgrx_convert_avx2;
308     sse2fct = jsimd_ycc_extbgrx_convert_sse2;
309     mmxfct = jsimd_ycc_extbgrx_convert_mmx;
310     break;
311   case JCS_EXT_XBGR:
312   case JCS_EXT_ABGR:
313     avx2fct = jsimd_ycc_extxbgr_convert_avx2;
314     sse2fct = jsimd_ycc_extxbgr_convert_sse2;
315     mmxfct = jsimd_ycc_extxbgr_convert_mmx;
316     break;
317   case JCS_EXT_XRGB:
318   case JCS_EXT_ARGB:
319     avx2fct = jsimd_ycc_extxrgb_convert_avx2;
320     sse2fct = jsimd_ycc_extxrgb_convert_sse2;
321     mmxfct = jsimd_ycc_extxrgb_convert_mmx;
322     break;
323   default:
324     avx2fct = jsimd_ycc_rgb_convert_avx2;
325     sse2fct = jsimd_ycc_rgb_convert_sse2;
326     mmxfct = jsimd_ycc_rgb_convert_mmx;
327     break;
328   }
329 
330   if (simd_support & JSIMD_AVX2)
331     avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
332   else if (simd_support & JSIMD_SSE2)
333     sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
334   else
335     mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
336 }
337 
338 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)339 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
340                          JDIMENSION input_row, JSAMPARRAY output_buf,
341                          int num_rows)
342 {
343 }
344 
345 GLOBAL(int)
jsimd_can_h2v2_downsample(void)346 jsimd_can_h2v2_downsample(void)
347 {
348   init_simd();
349 
350   /* The code is optimised for these values only */
351   if (BITS_IN_JSAMPLE != 8)
352     return 0;
353   if (sizeof(JDIMENSION) != 4)
354     return 0;
355 
356   if (simd_support & JSIMD_AVX2)
357     return 1;
358   if (simd_support & JSIMD_SSE2)
359     return 1;
360   if (simd_support & JSIMD_MMX)
361     return 1;
362 
363   return 0;
364 }
365 
366 GLOBAL(int)
jsimd_can_h2v1_downsample(void)367 jsimd_can_h2v1_downsample(void)
368 {
369   init_simd();
370 
371   /* The code is optimised for these values only */
372   if (BITS_IN_JSAMPLE != 8)
373     return 0;
374   if (sizeof(JDIMENSION) != 4)
375     return 0;
376 
377   if (simd_support & JSIMD_AVX2)
378     return 1;
379   if (simd_support & JSIMD_SSE2)
380     return 1;
381   if (simd_support & JSIMD_MMX)
382     return 1;
383 
384   return 0;
385 }
386 
387 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)388 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
389                       JSAMPARRAY input_data, JSAMPARRAY output_data)
390 {
391   if (simd_support == ~0U)
392     init_simd();
393 
394   if (simd_support & JSIMD_AVX2)
395     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
396                                compptr->v_samp_factor,
397                                compptr->width_in_blocks, input_data,
398                                output_data);
399   else if (simd_support & JSIMD_SSE2)
400     jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
401                                compptr->v_samp_factor,
402                                compptr->width_in_blocks, input_data,
403                                output_data);
404   else
405     jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
406                               compptr->v_samp_factor, compptr->width_in_blocks,
407                               input_data, output_data);
408 }
409 
410 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)411 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
412                       JSAMPARRAY input_data, JSAMPARRAY output_data)
413 {
414   if (simd_support == ~0U)
415     init_simd();
416 
417   if (simd_support & JSIMD_AVX2)
418     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
419                                compptr->v_samp_factor,
420                                compptr->width_in_blocks, input_data,
421                                output_data);
422   else if (simd_support & JSIMD_SSE2)
423     jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
424                                compptr->v_samp_factor,
425                                compptr->width_in_blocks, input_data,
426                                output_data);
427   else
428     jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
429                               compptr->v_samp_factor, compptr->width_in_blocks,
430                               input_data, output_data);
431 }
432 
433 GLOBAL(int)
jsimd_can_h2v2_upsample(void)434 jsimd_can_h2v2_upsample(void)
435 {
436   init_simd();
437 
438   /* The code is optimised for these values only */
439   if (BITS_IN_JSAMPLE != 8)
440     return 0;
441   if (sizeof(JDIMENSION) != 4)
442     return 0;
443 
444   if (simd_support & JSIMD_AVX2)
445     return 1;
446   if (simd_support & JSIMD_SSE2)
447     return 1;
448   if (simd_support & JSIMD_MMX)
449     return 1;
450 
451   return 0;
452 }
453 
454 GLOBAL(int)
jsimd_can_h2v1_upsample(void)455 jsimd_can_h2v1_upsample(void)
456 {
457   init_simd();
458 
459   /* The code is optimised for these values only */
460   if (BITS_IN_JSAMPLE != 8)
461     return 0;
462   if (sizeof(JDIMENSION) != 4)
463     return 0;
464 
465   if (simd_support & JSIMD_AVX2)
466     return 1;
467   if (simd_support & JSIMD_SSE2)
468     return 1;
469   if (simd_support & JSIMD_MMX)
470     return 1;
471 
472   return 0;
473 }
474 
475 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)476 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
477                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
478 {
479   if (simd_support == ~0U)
480     init_simd();
481 
482   if (simd_support & JSIMD_AVX2)
483     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
484                              input_data, output_data_ptr);
485   else if (simd_support & JSIMD_SSE2)
486     jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
487                              input_data, output_data_ptr);
488   else
489     jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
490                             input_data, output_data_ptr);
491 }
492 
493 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)494 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
495                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
496 {
497   if (simd_support == ~0U)
498     init_simd();
499 
500   if (simd_support & JSIMD_AVX2)
501     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
502                              input_data, output_data_ptr);
503   else if (simd_support & JSIMD_SSE2)
504     jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
505                              input_data, output_data_ptr);
506   else
507     jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
508                             input_data, output_data_ptr);
509 }
510 
511 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)512 jsimd_can_h2v2_fancy_upsample(void)
513 {
514   init_simd();
515 
516   /* The code is optimised for these values only */
517   if (BITS_IN_JSAMPLE != 8)
518     return 0;
519   if (sizeof(JDIMENSION) != 4)
520     return 0;
521 
522   if ((simd_support & JSIMD_AVX2) &&
523       IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
524     return 1;
525   if ((simd_support & JSIMD_SSE2) &&
526       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
527     return 1;
528   if (simd_support & JSIMD_MMX)
529     return 1;
530 
531   return 0;
532 }
533 
534 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)535 jsimd_can_h2v1_fancy_upsample(void)
536 {
537   init_simd();
538 
539   /* The code is optimised for these values only */
540   if (BITS_IN_JSAMPLE != 8)
541     return 0;
542   if (sizeof(JDIMENSION) != 4)
543     return 0;
544 
545   if ((simd_support & JSIMD_AVX2) &&
546       IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
547     return 1;
548   if ((simd_support & JSIMD_SSE2) &&
549       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
550     return 1;
551   if (simd_support & JSIMD_MMX)
552     return 1;
553 
554   return 0;
555 }
556 
557 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)558 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
559                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
560 {
561   if (simd_support == ~0U)
562     init_simd();
563 
564   if (simd_support & JSIMD_AVX2)
565     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
566                                    compptr->downsampled_width, input_data,
567                                    output_data_ptr);
568   else if (simd_support & JSIMD_SSE2)
569     jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
570                                    compptr->downsampled_width, input_data,
571                                    output_data_ptr);
572   else
573     jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
574                                   compptr->downsampled_width, input_data,
575                                   output_data_ptr);
576 }
577 
578 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)579 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
580                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
581 {
582   if (simd_support == ~0U)
583     init_simd();
584 
585   if (simd_support & JSIMD_AVX2)
586     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
587                                    compptr->downsampled_width, input_data,
588                                    output_data_ptr);
589   else if (simd_support & JSIMD_SSE2)
590     jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
591                                    compptr->downsampled_width, input_data,
592                                    output_data_ptr);
593   else
594     jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
595                                   compptr->downsampled_width, input_data,
596                                   output_data_ptr);
597 }
598 
599 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)600 jsimd_can_h2v2_merged_upsample(void)
601 {
602   init_simd();
603 
604   /* The code is optimised for these values only */
605   if (BITS_IN_JSAMPLE != 8)
606     return 0;
607   if (sizeof(JDIMENSION) != 4)
608     return 0;
609 
610   if ((simd_support & JSIMD_AVX2) &&
611       IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
612     return 1;
613   if ((simd_support & JSIMD_SSE2) &&
614       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
615     return 1;
616   if (simd_support & JSIMD_MMX)
617     return 1;
618 
619   return 0;
620 }
621 
622 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)623 jsimd_can_h2v1_merged_upsample(void)
624 {
625   init_simd();
626 
627   /* The code is optimised for these values only */
628   if (BITS_IN_JSAMPLE != 8)
629     return 0;
630   if (sizeof(JDIMENSION) != 4)
631     return 0;
632 
633   if ((simd_support & JSIMD_AVX2) &&
634       IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
635     return 1;
636   if ((simd_support & JSIMD_SSE2) &&
637       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
638     return 1;
639   if (simd_support & JSIMD_MMX)
640     return 1;
641 
642   return 0;
643 }
644 
645 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)646 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
647                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
648 {
649   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
650   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
651   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
652 
653   if (simd_support == ~0U)
654     init_simd();
655 
656   switch (cinfo->out_color_space) {
657   case JCS_EXT_RGB:
658     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
659     sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
660     mmxfct = jsimd_h2v2_extrgb_merged_upsample_mmx;
661     break;
662   case JCS_EXT_RGBX:
663   case JCS_EXT_RGBA:
664     avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
665     sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
666     mmxfct = jsimd_h2v2_extrgbx_merged_upsample_mmx;
667     break;
668   case JCS_EXT_BGR:
669     avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
670     sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
671     mmxfct = jsimd_h2v2_extbgr_merged_upsample_mmx;
672     break;
673   case JCS_EXT_BGRX:
674   case JCS_EXT_BGRA:
675     avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
676     sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
677     mmxfct = jsimd_h2v2_extbgrx_merged_upsample_mmx;
678     break;
679   case JCS_EXT_XBGR:
680   case JCS_EXT_ABGR:
681     avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
682     sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
683     mmxfct = jsimd_h2v2_extxbgr_merged_upsample_mmx;
684     break;
685   case JCS_EXT_XRGB:
686   case JCS_EXT_ARGB:
687     avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
688     sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
689     mmxfct = jsimd_h2v2_extxrgb_merged_upsample_mmx;
690     break;
691   default:
692     avx2fct = jsimd_h2v2_merged_upsample_avx2;
693     sse2fct = jsimd_h2v2_merged_upsample_sse2;
694     mmxfct = jsimd_h2v2_merged_upsample_mmx;
695     break;
696   }
697 
698   if (simd_support & JSIMD_AVX2)
699     avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
700   else if (simd_support & JSIMD_SSE2)
701     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
702   else
703     mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
704 }
705 
706 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)707 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
708                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
709 {
710   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
711   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
712   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
713 
714   if (simd_support == ~0U)
715     init_simd();
716 
717   switch (cinfo->out_color_space) {
718   case JCS_EXT_RGB:
719     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
720     sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
721     mmxfct = jsimd_h2v1_extrgb_merged_upsample_mmx;
722     break;
723   case JCS_EXT_RGBX:
724   case JCS_EXT_RGBA:
725     avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
726     sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
727     mmxfct = jsimd_h2v1_extrgbx_merged_upsample_mmx;
728     break;
729   case JCS_EXT_BGR:
730     avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
731     sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
732     mmxfct = jsimd_h2v1_extbgr_merged_upsample_mmx;
733     break;
734   case JCS_EXT_BGRX:
735   case JCS_EXT_BGRA:
736     avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
737     sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
738     mmxfct = jsimd_h2v1_extbgrx_merged_upsample_mmx;
739     break;
740   case JCS_EXT_XBGR:
741   case JCS_EXT_ABGR:
742     avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
743     sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
744     mmxfct = jsimd_h2v1_extxbgr_merged_upsample_mmx;
745     break;
746   case JCS_EXT_XRGB:
747   case JCS_EXT_ARGB:
748     avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
749     sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
750     mmxfct = jsimd_h2v1_extxrgb_merged_upsample_mmx;
751     break;
752   default:
753     avx2fct = jsimd_h2v1_merged_upsample_avx2;
754     sse2fct = jsimd_h2v1_merged_upsample_sse2;
755     mmxfct = jsimd_h2v1_merged_upsample_mmx;
756     break;
757   }
758 
759   if (simd_support & JSIMD_AVX2)
760     avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
761   else if (simd_support & JSIMD_SSE2)
762     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
763   else
764     mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
765 }
766 
767 GLOBAL(int)
jsimd_can_convsamp(void)768 jsimd_can_convsamp(void)
769 {
770   init_simd();
771 
772   /* The code is optimised for these values only */
773   if (DCTSIZE != 8)
774     return 0;
775   if (BITS_IN_JSAMPLE != 8)
776     return 0;
777   if (sizeof(JDIMENSION) != 4)
778     return 0;
779   if (sizeof(DCTELEM) != 2)
780     return 0;
781 
782   if (simd_support & JSIMD_AVX2)
783     return 1;
784   if (simd_support & JSIMD_SSE2)
785     return 1;
786   if (simd_support & JSIMD_MMX)
787     return 1;
788 
789   return 0;
790 }
791 
792 GLOBAL(int)
jsimd_can_convsamp_float(void)793 jsimd_can_convsamp_float(void)
794 {
795   init_simd();
796 
797   /* The code is optimised for these values only */
798   if (DCTSIZE != 8)
799     return 0;
800   if (BITS_IN_JSAMPLE != 8)
801     return 0;
802   if (sizeof(JDIMENSION) != 4)
803     return 0;
804   if (sizeof(FAST_FLOAT) != 4)
805     return 0;
806 
807   if (simd_support & JSIMD_SSE2)
808     return 1;
809   if (simd_support & JSIMD_SSE)
810     return 1;
811   if (simd_support & JSIMD_3DNOW)
812     return 1;
813 
814   return 0;
815 }
816 
817 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)818 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
819                DCTELEM *workspace)
820 {
821   if (simd_support == ~0U)
822     init_simd();
823 
824   if (simd_support & JSIMD_AVX2)
825     jsimd_convsamp_avx2(sample_data, start_col, workspace);
826   else if (simd_support & JSIMD_SSE2)
827     jsimd_convsamp_sse2(sample_data, start_col, workspace);
828   else
829     jsimd_convsamp_mmx(sample_data, start_col, workspace);
830 }
831 
832 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)833 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
834                      FAST_FLOAT *workspace)
835 {
836   if (simd_support == ~0U)
837     init_simd();
838 
839   if (simd_support & JSIMD_SSE2)
840     jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
841   else if (simd_support & JSIMD_SSE)
842     jsimd_convsamp_float_sse(sample_data, start_col, workspace);
843   else
844     jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
845 }
846 
847 GLOBAL(int)
jsimd_can_fdct_islow(void)848 jsimd_can_fdct_islow(void)
849 {
850   init_simd();
851 
852   /* The code is optimised for these values only */
853   if (DCTSIZE != 8)
854     return 0;
855   if (sizeof(DCTELEM) != 2)
856     return 0;
857 
858   if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
859     return 1;
860   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
861     return 1;
862   if (simd_support & JSIMD_MMX)
863     return 1;
864 
865   return 0;
866 }
867 
868 GLOBAL(int)
jsimd_can_fdct_ifast(void)869 jsimd_can_fdct_ifast(void)
870 {
871   init_simd();
872 
873   /* The code is optimised for these values only */
874   if (DCTSIZE != 8)
875     return 0;
876   if (sizeof(DCTELEM) != 2)
877     return 0;
878 
879   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
880     return 1;
881   if (simd_support & JSIMD_MMX)
882     return 1;
883 
884   return 0;
885 }
886 
887 GLOBAL(int)
jsimd_can_fdct_float(void)888 jsimd_can_fdct_float(void)
889 {
890   init_simd();
891 
892   /* The code is optimised for these values only */
893   if (DCTSIZE != 8)
894     return 0;
895   if (sizeof(FAST_FLOAT) != 4)
896     return 0;
897 
898   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
899     return 1;
900   if (simd_support & JSIMD_3DNOW)
901     return 1;
902 
903   return 0;
904 }
905 
906 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)907 jsimd_fdct_islow(DCTELEM *data)
908 {
909   if (simd_support == ~0U)
910     init_simd();
911 
912   if (simd_support & JSIMD_AVX2)
913     jsimd_fdct_islow_avx2(data);
914   else if (simd_support & JSIMD_SSE2)
915     jsimd_fdct_islow_sse2(data);
916   else
917     jsimd_fdct_islow_mmx(data);
918 }
919 
920 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)921 jsimd_fdct_ifast(DCTELEM *data)
922 {
923   if (simd_support == ~0U)
924     init_simd();
925 
926   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
927     jsimd_fdct_ifast_sse2(data);
928   else
929     jsimd_fdct_ifast_mmx(data);
930 }
931 
932 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)933 jsimd_fdct_float(FAST_FLOAT *data)
934 {
935   if (simd_support == ~0U)
936     init_simd();
937 
938   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
939     jsimd_fdct_float_sse(data);
940   else if (simd_support & JSIMD_3DNOW)
941     jsimd_fdct_float_3dnow(data);
942 }
943 
944 GLOBAL(int)
jsimd_can_quantize(void)945 jsimd_can_quantize(void)
946 {
947   init_simd();
948 
949   /* The code is optimised for these values only */
950   if (DCTSIZE != 8)
951     return 0;
952   if (sizeof(JCOEF) != 2)
953     return 0;
954   if (sizeof(DCTELEM) != 2)
955     return 0;
956 
957   if (simd_support & JSIMD_AVX2)
958     return 1;
959   if (simd_support & JSIMD_SSE2)
960     return 1;
961   if (simd_support & JSIMD_MMX)
962     return 1;
963 
964   return 0;
965 }
966 
967 GLOBAL(int)
jsimd_can_quantize_float(void)968 jsimd_can_quantize_float(void)
969 {
970   init_simd();
971 
972   /* The code is optimised for these values only */
973   if (DCTSIZE != 8)
974     return 0;
975   if (sizeof(JCOEF) != 2)
976     return 0;
977   if (sizeof(FAST_FLOAT) != 4)
978     return 0;
979 
980   if (simd_support & JSIMD_SSE2)
981     return 1;
982   if (simd_support & JSIMD_SSE)
983     return 1;
984   if (simd_support & JSIMD_3DNOW)
985     return 1;
986 
987   return 0;
988 }
989 
990 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)991 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
992 {
993   if (simd_support == ~0U)
994     init_simd();
995 
996   if (simd_support & JSIMD_AVX2)
997     jsimd_quantize_avx2(coef_block, divisors, workspace);
998   else if (simd_support & JSIMD_SSE2)
999     jsimd_quantize_sse2(coef_block, divisors, workspace);
1000   else
1001     jsimd_quantize_mmx(coef_block, divisors, workspace);
1002 }
1003 
1004 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)1005 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
1006                      FAST_FLOAT *workspace)
1007 {
1008   if (simd_support == ~0U)
1009     init_simd();
1010 
1011   if (simd_support & JSIMD_SSE2)
1012     jsimd_quantize_float_sse2(coef_block, divisors, workspace);
1013   else if (simd_support & JSIMD_SSE)
1014     jsimd_quantize_float_sse(coef_block, divisors, workspace);
1015   else
1016     jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
1017 }
1018 
1019 GLOBAL(int)
jsimd_can_idct_2x2(void)1020 jsimd_can_idct_2x2(void)
1021 {
1022   init_simd();
1023 
1024   /* The code is optimised for these values only */
1025   if (DCTSIZE != 8)
1026     return 0;
1027   if (sizeof(JCOEF) != 2)
1028     return 0;
1029   if (BITS_IN_JSAMPLE != 8)
1030     return 0;
1031   if (sizeof(JDIMENSION) != 4)
1032     return 0;
1033   if (sizeof(ISLOW_MULT_TYPE) != 2)
1034     return 0;
1035 
1036   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1037     return 1;
1038   if (simd_support & JSIMD_MMX)
1039     return 1;
1040 
1041   return 0;
1042 }
1043 
1044 GLOBAL(int)
jsimd_can_idct_4x4(void)1045 jsimd_can_idct_4x4(void)
1046 {
1047   init_simd();
1048 
1049   /* The code is optimised for these values only */
1050   if (DCTSIZE != 8)
1051     return 0;
1052   if (sizeof(JCOEF) != 2)
1053     return 0;
1054   if (BITS_IN_JSAMPLE != 8)
1055     return 0;
1056   if (sizeof(JDIMENSION) != 4)
1057     return 0;
1058   if (sizeof(ISLOW_MULT_TYPE) != 2)
1059     return 0;
1060 
1061   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1062     return 1;
1063   if (simd_support & JSIMD_MMX)
1064     return 1;
1065 
1066   return 0;
1067 }
1068 
1069 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1070 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1071                JCOEFPTR coef_block, JSAMPARRAY output_buf,
1072                JDIMENSION output_col)
1073 {
1074   if (simd_support == ~0U)
1075     init_simd();
1076 
1077   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1078     jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
1079                         output_col);
1080   else
1081     jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1082 }
1083 
1084 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1085 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1086                JCOEFPTR coef_block, JSAMPARRAY output_buf,
1087                JDIMENSION output_col)
1088 {
1089   if (simd_support == ~0U)
1090     init_simd();
1091 
1092   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1093     jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
1094                         output_col);
1095   else
1096     jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1097 }
1098 
1099 GLOBAL(int)
jsimd_can_idct_islow(void)1100 jsimd_can_idct_islow(void)
1101 {
1102   init_simd();
1103 
1104   /* The code is optimised for these values only */
1105   if (DCTSIZE != 8)
1106     return 0;
1107   if (sizeof(JCOEF) != 2)
1108     return 0;
1109   if (BITS_IN_JSAMPLE != 8)
1110     return 0;
1111   if (sizeof(JDIMENSION) != 4)
1112     return 0;
1113   if (sizeof(ISLOW_MULT_TYPE) != 2)
1114     return 0;
1115 
1116   if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
1117     return 1;
1118   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1119     return 1;
1120   if (simd_support & JSIMD_MMX)
1121     return 1;
1122 
1123   return 0;
1124 }
1125 
1126 GLOBAL(int)
jsimd_can_idct_ifast(void)1127 jsimd_can_idct_ifast(void)
1128 {
1129   init_simd();
1130 
1131   /* The code is optimised for these values only */
1132   if (DCTSIZE != 8)
1133     return 0;
1134   if (sizeof(JCOEF) != 2)
1135     return 0;
1136   if (BITS_IN_JSAMPLE != 8)
1137     return 0;
1138   if (sizeof(JDIMENSION) != 4)
1139     return 0;
1140   if (sizeof(IFAST_MULT_TYPE) != 2)
1141     return 0;
1142   if (IFAST_SCALE_BITS != 2)
1143     return 0;
1144 
1145   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1146     return 1;
1147   if (simd_support & JSIMD_MMX)
1148     return 1;
1149 
1150   return 0;
1151 }
1152 
1153 GLOBAL(int)
jsimd_can_idct_float(void)1154 jsimd_can_idct_float(void)
1155 {
1156   init_simd();
1157 
1158   if (DCTSIZE != 8)
1159     return 0;
1160   if (sizeof(JCOEF) != 2)
1161     return 0;
1162   if (BITS_IN_JSAMPLE != 8)
1163     return 0;
1164   if (sizeof(JDIMENSION) != 4)
1165     return 0;
1166   if (sizeof(FAST_FLOAT) != 4)
1167     return 0;
1168   if (sizeof(FLOAT_MULT_TYPE) != 4)
1169     return 0;
1170 
1171   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1172     return 1;
1173   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1174     return 1;
1175   if (simd_support & JSIMD_3DNOW)
1176     return 1;
1177 
1178   return 0;
1179 }
1180 
1181 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1182 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1183                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
1184                  JDIMENSION output_col)
1185 {
1186   if (simd_support == ~0U)
1187     init_simd();
1188 
1189   if (simd_support & JSIMD_AVX2)
1190     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1191                           output_col);
1192   else if (simd_support & JSIMD_SSE2)
1193     jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1194                           output_col);
1195   else
1196     jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1197                          output_col);
1198 }
1199 
1200 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1201 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1202                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
1203                  JDIMENSION output_col)
1204 {
1205   if (simd_support == ~0U)
1206     init_simd();
1207 
1208   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1209     jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1210                           output_col);
1211   else
1212     jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1213                          output_col);
1214 }
1215 
1216 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1217 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1218                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
1219                  JDIMENSION output_col)
1220 {
1221   if (simd_support == ~0U)
1222     init_simd();
1223 
1224   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1225     jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1226                           output_col);
1227   else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1228     jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1229                          output_col);
1230   else
1231     jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1232                            output_col);
1233 }
1234 
1235 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)1236 jsimd_can_huff_encode_one_block(void)
1237 {
1238   init_simd();
1239 
1240   if (DCTSIZE != 8)
1241     return 0;
1242   if (sizeof(JCOEF) != 2)
1243     return 0;
1244 
1245   if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1246       IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1247     return 1;
1248 
1249   return 0;
1250 }
1251 
1252 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1253 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1254                             int last_dc_val, c_derived_tbl *dctbl,
1255                             c_derived_tbl *actbl)
1256 {
1257   return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1258                                           dctbl, actbl);
1259 }
1260 
1261 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1262 jsimd_can_encode_mcu_AC_first_prepare(void)
1263 {
1264   init_simd();
1265 
1266   if (DCTSIZE != 8)
1267     return 0;
1268   if (sizeof(JCOEF) != 2)
1269     return 0;
1270   if (SIZEOF_SIZE_T != 4)
1271     return 0;
1272   if (simd_support & JSIMD_SSE2)
1273     return 1;
1274 
1275   return 0;
1276 }
1277 
1278 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * values,size_t * zerobits)1279 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1280                                   const int *jpeg_natural_order_start, int Sl,
1281                                   int Al, UJCOEF *values, size_t *zerobits)
1282 {
1283   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1284                                          Sl, Al, values, zerobits);
1285 }
1286 
1287 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1288 jsimd_can_encode_mcu_AC_refine_prepare(void)
1289 {
1290   init_simd();
1291 
1292   if (DCTSIZE != 8)
1293     return 0;
1294   if (sizeof(JCOEF) != 2)
1295     return 0;
1296   if (SIZEOF_SIZE_T != 4)
1297     return 0;
1298   if (simd_support & JSIMD_SSE2)
1299     return 1;
1300 
1301   return 0;
1302 }
1303 
1304 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * absvalues,size_t * bits)1305 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1306                                    const int *jpeg_natural_order_start, int Sl,
1307                                    int Al, UJCOEF *absvalues, size_t *bits)
1308 {
1309   return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1310                                                  jpeg_natural_order_start,
1311                                                  Sl, Al, absvalues, bits);
1312 }
1313