1*a67afe4dSAndroid Build Coastguard Worker 2*a67afe4dSAndroid Build Coastguard Worker /* intel_init.c - SSE2 optimized filter functions 3*a67afe4dSAndroid Build Coastguard Worker * 4*a67afe4dSAndroid Build Coastguard Worker * Copyright (c) 2018 Cosmin Truta 5*a67afe4dSAndroid Build Coastguard Worker * Copyright (c) 2016-2017 Glenn Randers-Pehrson 6*a67afe4dSAndroid Build Coastguard Worker * Written by Mike Klein and Matt Sarett, Google, Inc. 7*a67afe4dSAndroid Build Coastguard Worker * Derived from arm/arm_init.c 8*a67afe4dSAndroid Build Coastguard Worker * 9*a67afe4dSAndroid Build Coastguard Worker * This code is released under the libpng license. 10*a67afe4dSAndroid Build Coastguard Worker * For conditions of distribution and use, see the disclaimer 11*a67afe4dSAndroid Build Coastguard Worker * and license in png.h 12*a67afe4dSAndroid Build Coastguard Worker */ 13*a67afe4dSAndroid Build Coastguard Worker 14*a67afe4dSAndroid Build Coastguard Worker #include "../pngpriv.h" 15*a67afe4dSAndroid Build Coastguard Worker 16*a67afe4dSAndroid Build Coastguard Worker #ifdef PNG_READ_SUPPORTED 17*a67afe4dSAndroid Build Coastguard Worker #if PNG_INTEL_SSE_IMPLEMENTATION > 0 18*a67afe4dSAndroid Build Coastguard Worker 19*a67afe4dSAndroid Build Coastguard Worker void png_init_filter_functions_sse2(png_structp pp,unsigned int bpp)20*a67afe4dSAndroid Build Coastguard Workerpng_init_filter_functions_sse2(png_structp pp, unsigned int bpp) 21*a67afe4dSAndroid Build Coastguard Worker { 22*a67afe4dSAndroid Build Coastguard Worker /* The techniques used to implement each of these filters in SSE operate on 23*a67afe4dSAndroid Build Coastguard Worker * one pixel at a time. 24*a67afe4dSAndroid Build Coastguard Worker * So they generally speed up 3bpp images about 3x, 4bpp images about 4x. 25*a67afe4dSAndroid Build Coastguard Worker * They can scale up to 6 and 8 bpp images and down to 2 bpp images, 26*a67afe4dSAndroid Build Coastguard Worker * but they'd not likely have any benefit for 1bpp images. 27*a67afe4dSAndroid Build Coastguard Worker * Most of these can be implemented using only MMX and 64-bit registers, 28*a67afe4dSAndroid Build Coastguard Worker * but they end up a bit slower than using the equally-ubiquitous SSE2. 29*a67afe4dSAndroid Build Coastguard Worker */ 30*a67afe4dSAndroid Build Coastguard Worker png_debug(1, "in png_init_filter_functions_sse2"); 31*a67afe4dSAndroid Build Coastguard Worker if (bpp == 3) 32*a67afe4dSAndroid Build Coastguard Worker { 33*a67afe4dSAndroid Build Coastguard Worker pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2; 34*a67afe4dSAndroid Build Coastguard Worker pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2; 35*a67afe4dSAndroid Build Coastguard Worker pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = 36*a67afe4dSAndroid Build Coastguard Worker png_read_filter_row_paeth3_sse2; 37*a67afe4dSAndroid Build Coastguard Worker } 38*a67afe4dSAndroid Build Coastguard Worker else if (bpp == 4) 39*a67afe4dSAndroid Build Coastguard Worker { 40*a67afe4dSAndroid Build Coastguard Worker pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2; 41*a67afe4dSAndroid Build Coastguard Worker pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2; 42*a67afe4dSAndroid Build Coastguard Worker pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = 43*a67afe4dSAndroid Build Coastguard Worker png_read_filter_row_paeth4_sse2; 44*a67afe4dSAndroid Build Coastguard Worker } 45*a67afe4dSAndroid Build Coastguard Worker 46*a67afe4dSAndroid Build Coastguard Worker /* No need optimize PNG_FILTER_VALUE_UP. The compiler should 47*a67afe4dSAndroid Build Coastguard Worker * autovectorize. 48*a67afe4dSAndroid Build Coastguard Worker */ 49*a67afe4dSAndroid Build Coastguard Worker } 50*a67afe4dSAndroid Build Coastguard Worker 51*a67afe4dSAndroid Build Coastguard Worker #endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */ 52*a67afe4dSAndroid Build Coastguard Worker #endif /* PNG_READ_SUPPORTED */ 53