1 /*
2 * jsimd_i386.c
3 *
4 * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
5 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
7 *
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 *
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 32-bit x86 architecture.
15 */
16
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
23 #include "../jsimd.h"
24
25 /*
26 * In the PIC cases, we have no guarantee that constants will keep
27 * their alignment. This macro allows us to verify it at runtime.
28 */
29 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
30
31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
33
34 static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
35 static THREAD_LOCAL unsigned int simd_huffman = 1;
36
37 /*
38 * Check what SIMD accelerations are supported.
39 */
40 LOCAL(void)
init_simd(void)41 init_simd(void)
42 {
43 #ifndef NO_GETENV
44 char env[2] = { 0 };
45 #endif
46
47 if (simd_support != ~0U)
48 return;
49
50 simd_support = jpeg_simd_cpu_support();
51
52 #ifndef NO_GETENV
53 /* Force different settings through environment variables */
54 if (!GETENV_S(env, 2, "JSIMD_FORCEMMX") && !strcmp(env, "1"))
55 simd_support &= JSIMD_MMX;
56 if (!GETENV_S(env, 2, "JSIMD_FORCE3DNOW") && !strcmp(env, "1"))
57 simd_support &= JSIMD_3DNOW | JSIMD_MMX;
58 if (!GETENV_S(env, 2, "JSIMD_FORCESSE") && !strcmp(env, "1"))
59 simd_support &= JSIMD_SSE | JSIMD_MMX;
60 if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
61 simd_support &= JSIMD_SSE2;
62 if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
63 simd_support &= JSIMD_AVX2;
64 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
65 simd_support = 0;
66 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
67 simd_huffman = 0;
68 #endif
69 }
70
71 GLOBAL(int)
jsimd_can_rgb_ycc(void)72 jsimd_can_rgb_ycc(void)
73 {
74 init_simd();
75
76 /* The code is optimised for these values only */
77 if (BITS_IN_JSAMPLE != 8)
78 return 0;
79 if (sizeof(JDIMENSION) != 4)
80 return 0;
81 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
82 return 0;
83
84 if ((simd_support & JSIMD_AVX2) &&
85 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
86 return 1;
87 if ((simd_support & JSIMD_SSE2) &&
88 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
89 return 1;
90 if (simd_support & JSIMD_MMX)
91 return 1;
92
93 return 0;
94 }
95
96 GLOBAL(int)
jsimd_can_rgb_gray(void)97 jsimd_can_rgb_gray(void)
98 {
99 init_simd();
100
101 /* The code is optimised for these values only */
102 if (BITS_IN_JSAMPLE != 8)
103 return 0;
104 if (sizeof(JDIMENSION) != 4)
105 return 0;
106 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
107 return 0;
108
109 if ((simd_support & JSIMD_AVX2) &&
110 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
111 return 1;
112 if ((simd_support & JSIMD_SSE2) &&
113 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
114 return 1;
115 if (simd_support & JSIMD_MMX)
116 return 1;
117
118 return 0;
119 }
120
121 GLOBAL(int)
jsimd_can_ycc_rgb(void)122 jsimd_can_ycc_rgb(void)
123 {
124 init_simd();
125
126 /* The code is optimised for these values only */
127 if (BITS_IN_JSAMPLE != 8)
128 return 0;
129 if (sizeof(JDIMENSION) != 4)
130 return 0;
131 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
132 return 0;
133
134 if ((simd_support & JSIMD_AVX2) &&
135 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
136 return 1;
137 if ((simd_support & JSIMD_SSE2) &&
138 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
139 return 1;
140 if (simd_support & JSIMD_MMX)
141 return 1;
142
143 return 0;
144 }
145
146 GLOBAL(int)
jsimd_can_ycc_rgb565(void)147 jsimd_can_ycc_rgb565(void)
148 {
149 return 0;
150 }
151
152 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)153 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
154 JSAMPIMAGE output_buf, JDIMENSION output_row,
155 int num_rows)
156 {
157 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
158 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
159 void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
160
161 if (simd_support == ~0U)
162 init_simd();
163
164 switch (cinfo->in_color_space) {
165 case JCS_EXT_RGB:
166 avx2fct = jsimd_extrgb_ycc_convert_avx2;
167 sse2fct = jsimd_extrgb_ycc_convert_sse2;
168 mmxfct = jsimd_extrgb_ycc_convert_mmx;
169 break;
170 case JCS_EXT_RGBX:
171 case JCS_EXT_RGBA:
172 avx2fct = jsimd_extrgbx_ycc_convert_avx2;
173 sse2fct = jsimd_extrgbx_ycc_convert_sse2;
174 mmxfct = jsimd_extrgbx_ycc_convert_mmx;
175 break;
176 case JCS_EXT_BGR:
177 avx2fct = jsimd_extbgr_ycc_convert_avx2;
178 sse2fct = jsimd_extbgr_ycc_convert_sse2;
179 mmxfct = jsimd_extbgr_ycc_convert_mmx;
180 break;
181 case JCS_EXT_BGRX:
182 case JCS_EXT_BGRA:
183 avx2fct = jsimd_extbgrx_ycc_convert_avx2;
184 sse2fct = jsimd_extbgrx_ycc_convert_sse2;
185 mmxfct = jsimd_extbgrx_ycc_convert_mmx;
186 break;
187 case JCS_EXT_XBGR:
188 case JCS_EXT_ABGR:
189 avx2fct = jsimd_extxbgr_ycc_convert_avx2;
190 sse2fct = jsimd_extxbgr_ycc_convert_sse2;
191 mmxfct = jsimd_extxbgr_ycc_convert_mmx;
192 break;
193 case JCS_EXT_XRGB:
194 case JCS_EXT_ARGB:
195 avx2fct = jsimd_extxrgb_ycc_convert_avx2;
196 sse2fct = jsimd_extxrgb_ycc_convert_sse2;
197 mmxfct = jsimd_extxrgb_ycc_convert_mmx;
198 break;
199 default:
200 avx2fct = jsimd_rgb_ycc_convert_avx2;
201 sse2fct = jsimd_rgb_ycc_convert_sse2;
202 mmxfct = jsimd_rgb_ycc_convert_mmx;
203 break;
204 }
205
206 if (simd_support & JSIMD_AVX2)
207 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
208 else if (simd_support & JSIMD_SSE2)
209 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
210 else
211 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
212 }
213
214 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)215 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
216 JSAMPIMAGE output_buf, JDIMENSION output_row,
217 int num_rows)
218 {
219 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
220 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
221 void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
222
223 if (simd_support == ~0U)
224 init_simd();
225
226 switch (cinfo->in_color_space) {
227 case JCS_EXT_RGB:
228 avx2fct = jsimd_extrgb_gray_convert_avx2;
229 sse2fct = jsimd_extrgb_gray_convert_sse2;
230 mmxfct = jsimd_extrgb_gray_convert_mmx;
231 break;
232 case JCS_EXT_RGBX:
233 case JCS_EXT_RGBA:
234 avx2fct = jsimd_extrgbx_gray_convert_avx2;
235 sse2fct = jsimd_extrgbx_gray_convert_sse2;
236 mmxfct = jsimd_extrgbx_gray_convert_mmx;
237 break;
238 case JCS_EXT_BGR:
239 avx2fct = jsimd_extbgr_gray_convert_avx2;
240 sse2fct = jsimd_extbgr_gray_convert_sse2;
241 mmxfct = jsimd_extbgr_gray_convert_mmx;
242 break;
243 case JCS_EXT_BGRX:
244 case JCS_EXT_BGRA:
245 avx2fct = jsimd_extbgrx_gray_convert_avx2;
246 sse2fct = jsimd_extbgrx_gray_convert_sse2;
247 mmxfct = jsimd_extbgrx_gray_convert_mmx;
248 break;
249 case JCS_EXT_XBGR:
250 case JCS_EXT_ABGR:
251 avx2fct = jsimd_extxbgr_gray_convert_avx2;
252 sse2fct = jsimd_extxbgr_gray_convert_sse2;
253 mmxfct = jsimd_extxbgr_gray_convert_mmx;
254 break;
255 case JCS_EXT_XRGB:
256 case JCS_EXT_ARGB:
257 avx2fct = jsimd_extxrgb_gray_convert_avx2;
258 sse2fct = jsimd_extxrgb_gray_convert_sse2;
259 mmxfct = jsimd_extxrgb_gray_convert_mmx;
260 break;
261 default:
262 avx2fct = jsimd_rgb_gray_convert_avx2;
263 sse2fct = jsimd_rgb_gray_convert_sse2;
264 mmxfct = jsimd_rgb_gray_convert_mmx;
265 break;
266 }
267
268 if (simd_support & JSIMD_AVX2)
269 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
270 else if (simd_support & JSIMD_SSE2)
271 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
272 else
273 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
274 }
275
276 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)277 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
278 JDIMENSION input_row, JSAMPARRAY output_buf,
279 int num_rows)
280 {
281 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
282 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
283 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
284
285 if (simd_support == ~0U)
286 init_simd();
287
288 switch (cinfo->out_color_space) {
289 case JCS_EXT_RGB:
290 avx2fct = jsimd_ycc_extrgb_convert_avx2;
291 sse2fct = jsimd_ycc_extrgb_convert_sse2;
292 mmxfct = jsimd_ycc_extrgb_convert_mmx;
293 break;
294 case JCS_EXT_RGBX:
295 case JCS_EXT_RGBA:
296 avx2fct = jsimd_ycc_extrgbx_convert_avx2;
297 sse2fct = jsimd_ycc_extrgbx_convert_sse2;
298 mmxfct = jsimd_ycc_extrgbx_convert_mmx;
299 break;
300 case JCS_EXT_BGR:
301 avx2fct = jsimd_ycc_extbgr_convert_avx2;
302 sse2fct = jsimd_ycc_extbgr_convert_sse2;
303 mmxfct = jsimd_ycc_extbgr_convert_mmx;
304 break;
305 case JCS_EXT_BGRX:
306 case JCS_EXT_BGRA:
307 avx2fct = jsimd_ycc_extbgrx_convert_avx2;
308 sse2fct = jsimd_ycc_extbgrx_convert_sse2;
309 mmxfct = jsimd_ycc_extbgrx_convert_mmx;
310 break;
311 case JCS_EXT_XBGR:
312 case JCS_EXT_ABGR:
313 avx2fct = jsimd_ycc_extxbgr_convert_avx2;
314 sse2fct = jsimd_ycc_extxbgr_convert_sse2;
315 mmxfct = jsimd_ycc_extxbgr_convert_mmx;
316 break;
317 case JCS_EXT_XRGB:
318 case JCS_EXT_ARGB:
319 avx2fct = jsimd_ycc_extxrgb_convert_avx2;
320 sse2fct = jsimd_ycc_extxrgb_convert_sse2;
321 mmxfct = jsimd_ycc_extxrgb_convert_mmx;
322 break;
323 default:
324 avx2fct = jsimd_ycc_rgb_convert_avx2;
325 sse2fct = jsimd_ycc_rgb_convert_sse2;
326 mmxfct = jsimd_ycc_rgb_convert_mmx;
327 break;
328 }
329
330 if (simd_support & JSIMD_AVX2)
331 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
332 else if (simd_support & JSIMD_SSE2)
333 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
334 else
335 mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
336 }
337
338 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)339 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
340 JDIMENSION input_row, JSAMPARRAY output_buf,
341 int num_rows)
342 {
343 }
344
345 GLOBAL(int)
jsimd_can_h2v2_downsample(void)346 jsimd_can_h2v2_downsample(void)
347 {
348 init_simd();
349
350 /* The code is optimised for these values only */
351 if (BITS_IN_JSAMPLE != 8)
352 return 0;
353 if (sizeof(JDIMENSION) != 4)
354 return 0;
355
356 if (simd_support & JSIMD_AVX2)
357 return 1;
358 if (simd_support & JSIMD_SSE2)
359 return 1;
360 if (simd_support & JSIMD_MMX)
361 return 1;
362
363 return 0;
364 }
365
366 GLOBAL(int)
jsimd_can_h2v1_downsample(void)367 jsimd_can_h2v1_downsample(void)
368 {
369 init_simd();
370
371 /* The code is optimised for these values only */
372 if (BITS_IN_JSAMPLE != 8)
373 return 0;
374 if (sizeof(JDIMENSION) != 4)
375 return 0;
376
377 if (simd_support & JSIMD_AVX2)
378 return 1;
379 if (simd_support & JSIMD_SSE2)
380 return 1;
381 if (simd_support & JSIMD_MMX)
382 return 1;
383
384 return 0;
385 }
386
387 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)388 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
389 JSAMPARRAY input_data, JSAMPARRAY output_data)
390 {
391 if (simd_support == ~0U)
392 init_simd();
393
394 if (simd_support & JSIMD_AVX2)
395 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
396 compptr->v_samp_factor,
397 compptr->width_in_blocks, input_data,
398 output_data);
399 else if (simd_support & JSIMD_SSE2)
400 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
401 compptr->v_samp_factor,
402 compptr->width_in_blocks, input_data,
403 output_data);
404 else
405 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
406 compptr->v_samp_factor, compptr->width_in_blocks,
407 input_data, output_data);
408 }
409
410 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)411 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
412 JSAMPARRAY input_data, JSAMPARRAY output_data)
413 {
414 if (simd_support == ~0U)
415 init_simd();
416
417 if (simd_support & JSIMD_AVX2)
418 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
419 compptr->v_samp_factor,
420 compptr->width_in_blocks, input_data,
421 output_data);
422 else if (simd_support & JSIMD_SSE2)
423 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
424 compptr->v_samp_factor,
425 compptr->width_in_blocks, input_data,
426 output_data);
427 else
428 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
429 compptr->v_samp_factor, compptr->width_in_blocks,
430 input_data, output_data);
431 }
432
433 GLOBAL(int)
jsimd_can_h2v2_upsample(void)434 jsimd_can_h2v2_upsample(void)
435 {
436 init_simd();
437
438 /* The code is optimised for these values only */
439 if (BITS_IN_JSAMPLE != 8)
440 return 0;
441 if (sizeof(JDIMENSION) != 4)
442 return 0;
443
444 if (simd_support & JSIMD_AVX2)
445 return 1;
446 if (simd_support & JSIMD_SSE2)
447 return 1;
448 if (simd_support & JSIMD_MMX)
449 return 1;
450
451 return 0;
452 }
453
454 GLOBAL(int)
jsimd_can_h2v1_upsample(void)455 jsimd_can_h2v1_upsample(void)
456 {
457 init_simd();
458
459 /* The code is optimised for these values only */
460 if (BITS_IN_JSAMPLE != 8)
461 return 0;
462 if (sizeof(JDIMENSION) != 4)
463 return 0;
464
465 if (simd_support & JSIMD_AVX2)
466 return 1;
467 if (simd_support & JSIMD_SSE2)
468 return 1;
469 if (simd_support & JSIMD_MMX)
470 return 1;
471
472 return 0;
473 }
474
475 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)476 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
477 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
478 {
479 if (simd_support == ~0U)
480 init_simd();
481
482 if (simd_support & JSIMD_AVX2)
483 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
484 input_data, output_data_ptr);
485 else if (simd_support & JSIMD_SSE2)
486 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
487 input_data, output_data_ptr);
488 else
489 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
490 input_data, output_data_ptr);
491 }
492
493 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)494 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
495 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
496 {
497 if (simd_support == ~0U)
498 init_simd();
499
500 if (simd_support & JSIMD_AVX2)
501 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
502 input_data, output_data_ptr);
503 else if (simd_support & JSIMD_SSE2)
504 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
505 input_data, output_data_ptr);
506 else
507 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
508 input_data, output_data_ptr);
509 }
510
511 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)512 jsimd_can_h2v2_fancy_upsample(void)
513 {
514 init_simd();
515
516 /* The code is optimised for these values only */
517 if (BITS_IN_JSAMPLE != 8)
518 return 0;
519 if (sizeof(JDIMENSION) != 4)
520 return 0;
521
522 if ((simd_support & JSIMD_AVX2) &&
523 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
524 return 1;
525 if ((simd_support & JSIMD_SSE2) &&
526 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
527 return 1;
528 if (simd_support & JSIMD_MMX)
529 return 1;
530
531 return 0;
532 }
533
534 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)535 jsimd_can_h2v1_fancy_upsample(void)
536 {
537 init_simd();
538
539 /* The code is optimised for these values only */
540 if (BITS_IN_JSAMPLE != 8)
541 return 0;
542 if (sizeof(JDIMENSION) != 4)
543 return 0;
544
545 if ((simd_support & JSIMD_AVX2) &&
546 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
547 return 1;
548 if ((simd_support & JSIMD_SSE2) &&
549 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
550 return 1;
551 if (simd_support & JSIMD_MMX)
552 return 1;
553
554 return 0;
555 }
556
557 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)558 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
559 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
560 {
561 if (simd_support == ~0U)
562 init_simd();
563
564 if (simd_support & JSIMD_AVX2)
565 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
566 compptr->downsampled_width, input_data,
567 output_data_ptr);
568 else if (simd_support & JSIMD_SSE2)
569 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
570 compptr->downsampled_width, input_data,
571 output_data_ptr);
572 else
573 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
574 compptr->downsampled_width, input_data,
575 output_data_ptr);
576 }
577
578 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)579 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
580 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
581 {
582 if (simd_support == ~0U)
583 init_simd();
584
585 if (simd_support & JSIMD_AVX2)
586 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
587 compptr->downsampled_width, input_data,
588 output_data_ptr);
589 else if (simd_support & JSIMD_SSE2)
590 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
591 compptr->downsampled_width, input_data,
592 output_data_ptr);
593 else
594 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
595 compptr->downsampled_width, input_data,
596 output_data_ptr);
597 }
598
599 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)600 jsimd_can_h2v2_merged_upsample(void)
601 {
602 init_simd();
603
604 /* The code is optimised for these values only */
605 if (BITS_IN_JSAMPLE != 8)
606 return 0;
607 if (sizeof(JDIMENSION) != 4)
608 return 0;
609
610 if ((simd_support & JSIMD_AVX2) &&
611 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
612 return 1;
613 if ((simd_support & JSIMD_SSE2) &&
614 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
615 return 1;
616 if (simd_support & JSIMD_MMX)
617 return 1;
618
619 return 0;
620 }
621
622 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)623 jsimd_can_h2v1_merged_upsample(void)
624 {
625 init_simd();
626
627 /* The code is optimised for these values only */
628 if (BITS_IN_JSAMPLE != 8)
629 return 0;
630 if (sizeof(JDIMENSION) != 4)
631 return 0;
632
633 if ((simd_support & JSIMD_AVX2) &&
634 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
635 return 1;
636 if ((simd_support & JSIMD_SSE2) &&
637 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
638 return 1;
639 if (simd_support & JSIMD_MMX)
640 return 1;
641
642 return 0;
643 }
644
645 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)646 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
647 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
648 {
649 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
650 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
651 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
652
653 if (simd_support == ~0U)
654 init_simd();
655
656 switch (cinfo->out_color_space) {
657 case JCS_EXT_RGB:
658 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
659 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
660 mmxfct = jsimd_h2v2_extrgb_merged_upsample_mmx;
661 break;
662 case JCS_EXT_RGBX:
663 case JCS_EXT_RGBA:
664 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
665 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
666 mmxfct = jsimd_h2v2_extrgbx_merged_upsample_mmx;
667 break;
668 case JCS_EXT_BGR:
669 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
670 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
671 mmxfct = jsimd_h2v2_extbgr_merged_upsample_mmx;
672 break;
673 case JCS_EXT_BGRX:
674 case JCS_EXT_BGRA:
675 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
676 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
677 mmxfct = jsimd_h2v2_extbgrx_merged_upsample_mmx;
678 break;
679 case JCS_EXT_XBGR:
680 case JCS_EXT_ABGR:
681 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
682 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
683 mmxfct = jsimd_h2v2_extxbgr_merged_upsample_mmx;
684 break;
685 case JCS_EXT_XRGB:
686 case JCS_EXT_ARGB:
687 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
688 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
689 mmxfct = jsimd_h2v2_extxrgb_merged_upsample_mmx;
690 break;
691 default:
692 avx2fct = jsimd_h2v2_merged_upsample_avx2;
693 sse2fct = jsimd_h2v2_merged_upsample_sse2;
694 mmxfct = jsimd_h2v2_merged_upsample_mmx;
695 break;
696 }
697
698 if (simd_support & JSIMD_AVX2)
699 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
700 else if (simd_support & JSIMD_SSE2)
701 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
702 else
703 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
704 }
705
706 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)707 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
708 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
709 {
710 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
711 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
712 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
713
714 if (simd_support == ~0U)
715 init_simd();
716
717 switch (cinfo->out_color_space) {
718 case JCS_EXT_RGB:
719 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
720 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
721 mmxfct = jsimd_h2v1_extrgb_merged_upsample_mmx;
722 break;
723 case JCS_EXT_RGBX:
724 case JCS_EXT_RGBA:
725 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
726 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
727 mmxfct = jsimd_h2v1_extrgbx_merged_upsample_mmx;
728 break;
729 case JCS_EXT_BGR:
730 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
731 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
732 mmxfct = jsimd_h2v1_extbgr_merged_upsample_mmx;
733 break;
734 case JCS_EXT_BGRX:
735 case JCS_EXT_BGRA:
736 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
737 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
738 mmxfct = jsimd_h2v1_extbgrx_merged_upsample_mmx;
739 break;
740 case JCS_EXT_XBGR:
741 case JCS_EXT_ABGR:
742 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
743 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
744 mmxfct = jsimd_h2v1_extxbgr_merged_upsample_mmx;
745 break;
746 case JCS_EXT_XRGB:
747 case JCS_EXT_ARGB:
748 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
749 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
750 mmxfct = jsimd_h2v1_extxrgb_merged_upsample_mmx;
751 break;
752 default:
753 avx2fct = jsimd_h2v1_merged_upsample_avx2;
754 sse2fct = jsimd_h2v1_merged_upsample_sse2;
755 mmxfct = jsimd_h2v1_merged_upsample_mmx;
756 break;
757 }
758
759 if (simd_support & JSIMD_AVX2)
760 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
761 else if (simd_support & JSIMD_SSE2)
762 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
763 else
764 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
765 }
766
767 GLOBAL(int)
jsimd_can_convsamp(void)768 jsimd_can_convsamp(void)
769 {
770 init_simd();
771
772 /* The code is optimised for these values only */
773 if (DCTSIZE != 8)
774 return 0;
775 if (BITS_IN_JSAMPLE != 8)
776 return 0;
777 if (sizeof(JDIMENSION) != 4)
778 return 0;
779 if (sizeof(DCTELEM) != 2)
780 return 0;
781
782 if (simd_support & JSIMD_AVX2)
783 return 1;
784 if (simd_support & JSIMD_SSE2)
785 return 1;
786 if (simd_support & JSIMD_MMX)
787 return 1;
788
789 return 0;
790 }
791
792 GLOBAL(int)
jsimd_can_convsamp_float(void)793 jsimd_can_convsamp_float(void)
794 {
795 init_simd();
796
797 /* The code is optimised for these values only */
798 if (DCTSIZE != 8)
799 return 0;
800 if (BITS_IN_JSAMPLE != 8)
801 return 0;
802 if (sizeof(JDIMENSION) != 4)
803 return 0;
804 if (sizeof(FAST_FLOAT) != 4)
805 return 0;
806
807 if (simd_support & JSIMD_SSE2)
808 return 1;
809 if (simd_support & JSIMD_SSE)
810 return 1;
811 if (simd_support & JSIMD_3DNOW)
812 return 1;
813
814 return 0;
815 }
816
817 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)818 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
819 DCTELEM *workspace)
820 {
821 if (simd_support == ~0U)
822 init_simd();
823
824 if (simd_support & JSIMD_AVX2)
825 jsimd_convsamp_avx2(sample_data, start_col, workspace);
826 else if (simd_support & JSIMD_SSE2)
827 jsimd_convsamp_sse2(sample_data, start_col, workspace);
828 else
829 jsimd_convsamp_mmx(sample_data, start_col, workspace);
830 }
831
832 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)833 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
834 FAST_FLOAT *workspace)
835 {
836 if (simd_support == ~0U)
837 init_simd();
838
839 if (simd_support & JSIMD_SSE2)
840 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
841 else if (simd_support & JSIMD_SSE)
842 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
843 else
844 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
845 }
846
847 GLOBAL(int)
jsimd_can_fdct_islow(void)848 jsimd_can_fdct_islow(void)
849 {
850 init_simd();
851
852 /* The code is optimised for these values only */
853 if (DCTSIZE != 8)
854 return 0;
855 if (sizeof(DCTELEM) != 2)
856 return 0;
857
858 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
859 return 1;
860 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
861 return 1;
862 if (simd_support & JSIMD_MMX)
863 return 1;
864
865 return 0;
866 }
867
868 GLOBAL(int)
jsimd_can_fdct_ifast(void)869 jsimd_can_fdct_ifast(void)
870 {
871 init_simd();
872
873 /* The code is optimised for these values only */
874 if (DCTSIZE != 8)
875 return 0;
876 if (sizeof(DCTELEM) != 2)
877 return 0;
878
879 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
880 return 1;
881 if (simd_support & JSIMD_MMX)
882 return 1;
883
884 return 0;
885 }
886
887 GLOBAL(int)
jsimd_can_fdct_float(void)888 jsimd_can_fdct_float(void)
889 {
890 init_simd();
891
892 /* The code is optimised for these values only */
893 if (DCTSIZE != 8)
894 return 0;
895 if (sizeof(FAST_FLOAT) != 4)
896 return 0;
897
898 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
899 return 1;
900 if (simd_support & JSIMD_3DNOW)
901 return 1;
902
903 return 0;
904 }
905
906 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)907 jsimd_fdct_islow(DCTELEM *data)
908 {
909 if (simd_support == ~0U)
910 init_simd();
911
912 if (simd_support & JSIMD_AVX2)
913 jsimd_fdct_islow_avx2(data);
914 else if (simd_support & JSIMD_SSE2)
915 jsimd_fdct_islow_sse2(data);
916 else
917 jsimd_fdct_islow_mmx(data);
918 }
919
920 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)921 jsimd_fdct_ifast(DCTELEM *data)
922 {
923 if (simd_support == ~0U)
924 init_simd();
925
926 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
927 jsimd_fdct_ifast_sse2(data);
928 else
929 jsimd_fdct_ifast_mmx(data);
930 }
931
932 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)933 jsimd_fdct_float(FAST_FLOAT *data)
934 {
935 if (simd_support == ~0U)
936 init_simd();
937
938 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
939 jsimd_fdct_float_sse(data);
940 else if (simd_support & JSIMD_3DNOW)
941 jsimd_fdct_float_3dnow(data);
942 }
943
944 GLOBAL(int)
jsimd_can_quantize(void)945 jsimd_can_quantize(void)
946 {
947 init_simd();
948
949 /* The code is optimised for these values only */
950 if (DCTSIZE != 8)
951 return 0;
952 if (sizeof(JCOEF) != 2)
953 return 0;
954 if (sizeof(DCTELEM) != 2)
955 return 0;
956
957 if (simd_support & JSIMD_AVX2)
958 return 1;
959 if (simd_support & JSIMD_SSE2)
960 return 1;
961 if (simd_support & JSIMD_MMX)
962 return 1;
963
964 return 0;
965 }
966
967 GLOBAL(int)
jsimd_can_quantize_float(void)968 jsimd_can_quantize_float(void)
969 {
970 init_simd();
971
972 /* The code is optimised for these values only */
973 if (DCTSIZE != 8)
974 return 0;
975 if (sizeof(JCOEF) != 2)
976 return 0;
977 if (sizeof(FAST_FLOAT) != 4)
978 return 0;
979
980 if (simd_support & JSIMD_SSE2)
981 return 1;
982 if (simd_support & JSIMD_SSE)
983 return 1;
984 if (simd_support & JSIMD_3DNOW)
985 return 1;
986
987 return 0;
988 }
989
990 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)991 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
992 {
993 if (simd_support == ~0U)
994 init_simd();
995
996 if (simd_support & JSIMD_AVX2)
997 jsimd_quantize_avx2(coef_block, divisors, workspace);
998 else if (simd_support & JSIMD_SSE2)
999 jsimd_quantize_sse2(coef_block, divisors, workspace);
1000 else
1001 jsimd_quantize_mmx(coef_block, divisors, workspace);
1002 }
1003
1004 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)1005 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
1006 FAST_FLOAT *workspace)
1007 {
1008 if (simd_support == ~0U)
1009 init_simd();
1010
1011 if (simd_support & JSIMD_SSE2)
1012 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
1013 else if (simd_support & JSIMD_SSE)
1014 jsimd_quantize_float_sse(coef_block, divisors, workspace);
1015 else
1016 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
1017 }
1018
1019 GLOBAL(int)
jsimd_can_idct_2x2(void)1020 jsimd_can_idct_2x2(void)
1021 {
1022 init_simd();
1023
1024 /* The code is optimised for these values only */
1025 if (DCTSIZE != 8)
1026 return 0;
1027 if (sizeof(JCOEF) != 2)
1028 return 0;
1029 if (BITS_IN_JSAMPLE != 8)
1030 return 0;
1031 if (sizeof(JDIMENSION) != 4)
1032 return 0;
1033 if (sizeof(ISLOW_MULT_TYPE) != 2)
1034 return 0;
1035
1036 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1037 return 1;
1038 if (simd_support & JSIMD_MMX)
1039 return 1;
1040
1041 return 0;
1042 }
1043
1044 GLOBAL(int)
jsimd_can_idct_4x4(void)1045 jsimd_can_idct_4x4(void)
1046 {
1047 init_simd();
1048
1049 /* The code is optimised for these values only */
1050 if (DCTSIZE != 8)
1051 return 0;
1052 if (sizeof(JCOEF) != 2)
1053 return 0;
1054 if (BITS_IN_JSAMPLE != 8)
1055 return 0;
1056 if (sizeof(JDIMENSION) != 4)
1057 return 0;
1058 if (sizeof(ISLOW_MULT_TYPE) != 2)
1059 return 0;
1060
1061 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1062 return 1;
1063 if (simd_support & JSIMD_MMX)
1064 return 1;
1065
1066 return 0;
1067 }
1068
1069 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1070 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1071 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1072 JDIMENSION output_col)
1073 {
1074 if (simd_support == ~0U)
1075 init_simd();
1076
1077 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1078 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
1079 output_col);
1080 else
1081 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1082 }
1083
1084 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1085 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1086 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1087 JDIMENSION output_col)
1088 {
1089 if (simd_support == ~0U)
1090 init_simd();
1091
1092 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1093 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
1094 output_col);
1095 else
1096 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1097 }
1098
1099 GLOBAL(int)
jsimd_can_idct_islow(void)1100 jsimd_can_idct_islow(void)
1101 {
1102 init_simd();
1103
1104 /* The code is optimised for these values only */
1105 if (DCTSIZE != 8)
1106 return 0;
1107 if (sizeof(JCOEF) != 2)
1108 return 0;
1109 if (BITS_IN_JSAMPLE != 8)
1110 return 0;
1111 if (sizeof(JDIMENSION) != 4)
1112 return 0;
1113 if (sizeof(ISLOW_MULT_TYPE) != 2)
1114 return 0;
1115
1116 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
1117 return 1;
1118 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1119 return 1;
1120 if (simd_support & JSIMD_MMX)
1121 return 1;
1122
1123 return 0;
1124 }
1125
1126 GLOBAL(int)
jsimd_can_idct_ifast(void)1127 jsimd_can_idct_ifast(void)
1128 {
1129 init_simd();
1130
1131 /* The code is optimised for these values only */
1132 if (DCTSIZE != 8)
1133 return 0;
1134 if (sizeof(JCOEF) != 2)
1135 return 0;
1136 if (BITS_IN_JSAMPLE != 8)
1137 return 0;
1138 if (sizeof(JDIMENSION) != 4)
1139 return 0;
1140 if (sizeof(IFAST_MULT_TYPE) != 2)
1141 return 0;
1142 if (IFAST_SCALE_BITS != 2)
1143 return 0;
1144
1145 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1146 return 1;
1147 if (simd_support & JSIMD_MMX)
1148 return 1;
1149
1150 return 0;
1151 }
1152
1153 GLOBAL(int)
jsimd_can_idct_float(void)1154 jsimd_can_idct_float(void)
1155 {
1156 init_simd();
1157
1158 if (DCTSIZE != 8)
1159 return 0;
1160 if (sizeof(JCOEF) != 2)
1161 return 0;
1162 if (BITS_IN_JSAMPLE != 8)
1163 return 0;
1164 if (sizeof(JDIMENSION) != 4)
1165 return 0;
1166 if (sizeof(FAST_FLOAT) != 4)
1167 return 0;
1168 if (sizeof(FLOAT_MULT_TYPE) != 4)
1169 return 0;
1170
1171 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1172 return 1;
1173 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1174 return 1;
1175 if (simd_support & JSIMD_3DNOW)
1176 return 1;
1177
1178 return 0;
1179 }
1180
1181 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1182 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1183 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1184 JDIMENSION output_col)
1185 {
1186 if (simd_support == ~0U)
1187 init_simd();
1188
1189 if (simd_support & JSIMD_AVX2)
1190 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1191 output_col);
1192 else if (simd_support & JSIMD_SSE2)
1193 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1194 output_col);
1195 else
1196 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1197 output_col);
1198 }
1199
1200 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1201 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1202 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1203 JDIMENSION output_col)
1204 {
1205 if (simd_support == ~0U)
1206 init_simd();
1207
1208 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1209 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1210 output_col);
1211 else
1212 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1213 output_col);
1214 }
1215
1216 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1217 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1218 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1219 JDIMENSION output_col)
1220 {
1221 if (simd_support == ~0U)
1222 init_simd();
1223
1224 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1225 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1226 output_col);
1227 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1228 jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1229 output_col);
1230 else
1231 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1232 output_col);
1233 }
1234
1235 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)1236 jsimd_can_huff_encode_one_block(void)
1237 {
1238 init_simd();
1239
1240 if (DCTSIZE != 8)
1241 return 0;
1242 if (sizeof(JCOEF) != 2)
1243 return 0;
1244
1245 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1246 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1247 return 1;
1248
1249 return 0;
1250 }
1251
1252 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1253 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1254 int last_dc_val, c_derived_tbl *dctbl,
1255 c_derived_tbl *actbl)
1256 {
1257 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1258 dctbl, actbl);
1259 }
1260
1261 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1262 jsimd_can_encode_mcu_AC_first_prepare(void)
1263 {
1264 init_simd();
1265
1266 if (DCTSIZE != 8)
1267 return 0;
1268 if (sizeof(JCOEF) != 2)
1269 return 0;
1270 if (SIZEOF_SIZE_T != 4)
1271 return 0;
1272 if (simd_support & JSIMD_SSE2)
1273 return 1;
1274
1275 return 0;
1276 }
1277
1278 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * values,size_t * zerobits)1279 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1280 const int *jpeg_natural_order_start, int Sl,
1281 int Al, UJCOEF *values, size_t *zerobits)
1282 {
1283 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1284 Sl, Al, values, zerobits);
1285 }
1286
1287 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1288 jsimd_can_encode_mcu_AC_refine_prepare(void)
1289 {
1290 init_simd();
1291
1292 if (DCTSIZE != 8)
1293 return 0;
1294 if (sizeof(JCOEF) != 2)
1295 return 0;
1296 if (SIZEOF_SIZE_T != 4)
1297 return 0;
1298 if (simd_support & JSIMD_SSE2)
1299 return 1;
1300
1301 return 0;
1302 }
1303
1304 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * absvalues,size_t * bits)1305 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1306 const int *jpeg_natural_order_start, int Sl,
1307 int Al, UJCOEF *absvalues, size_t *bits)
1308 {
1309 return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1310 jpeg_natural_order_start,
1311 Sl, Al, absvalues, bits);
1312 }
1313