1*fb1b10abSAndroid Build Coastguard Worker /* 2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3*fb1b10abSAndroid Build Coastguard Worker * 4*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license 5*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source 6*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found 7*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may 8*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree. 9*fb1b10abSAndroid Build Coastguard Worker */ 10*fb1b10abSAndroid Build Coastguard Worker #ifndef VPX_VPX_DSP_X86_CONVOLVE_H_ 11*fb1b10abSAndroid Build Coastguard Worker #define VPX_VPX_DSP_X86_CONVOLVE_H_ 12*fb1b10abSAndroid Build Coastguard Worker 13*fb1b10abSAndroid Build Coastguard Worker #include <assert.h> 14*fb1b10abSAndroid Build Coastguard Worker 15*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_config.h" 16*fb1b10abSAndroid Build Coastguard Worker #include "vpx/vpx_integer.h" 17*fb1b10abSAndroid Build Coastguard Worker #include "vpx_ports/compiler_attributes.h" 18*fb1b10abSAndroid Build Coastguard Worker 19*fb1b10abSAndroid Build Coastguard Worker // TODO([email protected]): Refactor the code here. Currently this is pretty 20*fb1b10abSAndroid Build Coastguard Worker // hacky and awful to read. Note that there is a filter_x[3] == 128 check in 21*fb1b10abSAndroid Build Coastguard Worker // HIGHBD_FUN_CONV_2D to avoid seg fault due to the fact that the c function 22*fb1b10abSAndroid Build Coastguard Worker // assumes the filter is always 8 tap. 23*fb1b10abSAndroid Build Coastguard Worker typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, 24*fb1b10abSAndroid Build Coastguard Worker uint8_t *output_ptr, ptrdiff_t out_pitch, 25*fb1b10abSAndroid Build Coastguard Worker uint32_t output_height, const int16_t *filter); 26*fb1b10abSAndroid Build Coastguard Worker 27*fb1b10abSAndroid Build Coastguard Worker // TODO([email protected]): Remove the is_avg argument to the MACROS once we 28*fb1b10abSAndroid Build Coastguard Worker // have 4-tap vert avg filter. 29*fb1b10abSAndroid Build Coastguard Worker #define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt, is_avg) \ 30*fb1b10abSAndroid Build Coastguard Worker void vpx_convolve8_##name##_##opt( \ 31*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 32*fb1b10abSAndroid Build Coastguard Worker ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ 33*fb1b10abSAndroid Build Coastguard Worker int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ 34*fb1b10abSAndroid Build Coastguard Worker const int16_t *filter_row = filter[offset]; \ 35*fb1b10abSAndroid Build Coastguard Worker (void)x0_q4; \ 36*fb1b10abSAndroid Build Coastguard Worker (void)x_step_q4; \ 37*fb1b10abSAndroid Build Coastguard Worker (void)y0_q4; \ 38*fb1b10abSAndroid Build Coastguard Worker (void)y_step_q4; \ 39*fb1b10abSAndroid Build Coastguard Worker assert(filter_row[3] != 128); \ 40*fb1b10abSAndroid Build Coastguard Worker assert(step_q4 == 16); \ 41*fb1b10abSAndroid Build Coastguard Worker if (filter_row[0] | filter_row[1] | filter_row[6] | filter_row[7]) { \ 42*fb1b10abSAndroid Build Coastguard Worker const int num_taps = 8; \ 43*fb1b10abSAndroid Build Coastguard Worker while (w >= 16) { \ 44*fb1b10abSAndroid Build Coastguard Worker vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \ 45*fb1b10abSAndroid Build Coastguard Worker dst_stride, h, filter_row); \ 46*fb1b10abSAndroid Build Coastguard Worker src += 16; \ 47*fb1b10abSAndroid Build Coastguard Worker dst += 16; \ 48*fb1b10abSAndroid Build Coastguard Worker w -= 16; \ 49*fb1b10abSAndroid Build Coastguard Worker } \ 50*fb1b10abSAndroid Build Coastguard Worker if (w == 8) { \ 51*fb1b10abSAndroid Build Coastguard Worker vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \ 52*fb1b10abSAndroid Build Coastguard Worker dst_stride, h, filter_row); \ 53*fb1b10abSAndroid Build Coastguard Worker } else if (w == 4) { \ 54*fb1b10abSAndroid Build Coastguard Worker vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \ 55*fb1b10abSAndroid Build Coastguard Worker dst_stride, h, filter_row); \ 56*fb1b10abSAndroid Build Coastguard Worker } \ 57*fb1b10abSAndroid Build Coastguard Worker (void)num_taps; \ 58*fb1b10abSAndroid Build Coastguard Worker } else if (filter_row[2] | filter_row[5]) { \ 59*fb1b10abSAndroid Build Coastguard Worker const int num_taps = is_avg ? 8 : 4; \ 60*fb1b10abSAndroid Build Coastguard Worker while (w >= 16) { \ 61*fb1b10abSAndroid Build Coastguard Worker vpx_filter_block1d16_##dir##4_##avg##opt(src_start, src_stride, dst, \ 62*fb1b10abSAndroid Build Coastguard Worker dst_stride, h, filter_row); \ 63*fb1b10abSAndroid Build Coastguard Worker src += 16; \ 64*fb1b10abSAndroid Build Coastguard Worker dst += 16; \ 65*fb1b10abSAndroid Build Coastguard Worker w -= 16; \ 66*fb1b10abSAndroid Build Coastguard Worker } \ 67*fb1b10abSAndroid Build Coastguard Worker if (w == 8) { \ 68*fb1b10abSAndroid Build Coastguard Worker vpx_filter_block1d8_##dir##4_##avg##opt(src_start, src_stride, dst, \ 69*fb1b10abSAndroid Build Coastguard Worker dst_stride, h, filter_row); \ 70*fb1b10abSAndroid Build Coastguard Worker } else if (w == 4) { \ 71*fb1b10abSAndroid Build Coastguard Worker vpx_filter_block1d4_##dir##4_##avg##opt(src_start, src_stride, dst, \ 72*fb1b10abSAndroid Build Coastguard Worker dst_stride, h, filter_row); \ 73*fb1b10abSAndroid Build Coastguard Worker } \ 74*fb1b10abSAndroid Build Coastguard Worker (void)num_taps; \ 75*fb1b10abSAndroid Build Coastguard Worker } else { \ 76*fb1b10abSAndroid Build Coastguard Worker const int num_taps = 2; \ 77*fb1b10abSAndroid Build Coastguard Worker while (w >= 16) { \ 78*fb1b10abSAndroid Build Coastguard Worker vpx_filter_block1d16_##dir##2_##avg##opt(src_start, src_stride, dst, \ 79*fb1b10abSAndroid Build Coastguard Worker dst_stride, h, filter_row); \ 80*fb1b10abSAndroid Build Coastguard Worker src += 16; \ 81*fb1b10abSAndroid Build Coastguard Worker dst += 16; \ 82*fb1b10abSAndroid Build Coastguard Worker w -= 16; \ 83*fb1b10abSAndroid Build Coastguard Worker } \ 84*fb1b10abSAndroid Build Coastguard Worker if (w == 8) { \ 85*fb1b10abSAndroid Build Coastguard Worker vpx_filter_block1d8_##dir##2_##avg##opt(src_start, src_stride, dst, \ 86*fb1b10abSAndroid Build Coastguard Worker dst_stride, h, filter_row); \ 87*fb1b10abSAndroid Build Coastguard Worker } else if (w == 4) { \ 88*fb1b10abSAndroid Build Coastguard Worker vpx_filter_block1d4_##dir##2_##avg##opt(src_start, src_stride, dst, \ 89*fb1b10abSAndroid Build Coastguard Worker dst_stride, h, filter_row); \ 90*fb1b10abSAndroid Build Coastguard Worker } \ 91*fb1b10abSAndroid Build Coastguard Worker (void)num_taps; \ 92*fb1b10abSAndroid Build Coastguard Worker } \ 93*fb1b10abSAndroid Build Coastguard Worker } 94*fb1b10abSAndroid Build Coastguard Worker 95*fb1b10abSAndroid Build Coastguard Worker #define FUN_CONV_2D(avg, opt, is_avg) \ 96*fb1b10abSAndroid Build Coastguard Worker void vpx_convolve8_##avg##opt( \ 97*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 98*fb1b10abSAndroid Build Coastguard Worker ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ 99*fb1b10abSAndroid Build Coastguard Worker int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ 100*fb1b10abSAndroid Build Coastguard Worker const int16_t *filter_x = filter[x0_q4]; \ 101*fb1b10abSAndroid Build Coastguard Worker const int16_t *filter_y = filter[y0_q4]; \ 102*fb1b10abSAndroid Build Coastguard Worker (void)filter_y; \ 103*fb1b10abSAndroid Build Coastguard Worker assert(filter_x[3] != 128); \ 104*fb1b10abSAndroid Build Coastguard Worker assert(filter_y[3] != 128); \ 105*fb1b10abSAndroid Build Coastguard Worker assert(w <= 64); \ 106*fb1b10abSAndroid Build Coastguard Worker assert(h <= 64); \ 107*fb1b10abSAndroid Build Coastguard Worker assert(x_step_q4 == 16); \ 108*fb1b10abSAndroid Build Coastguard Worker assert(y_step_q4 == 16); \ 109*fb1b10abSAndroid Build Coastguard Worker if (filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) { \ 110*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ 111*fb1b10abSAndroid Build Coastguard Worker vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ 112*fb1b10abSAndroid Build Coastguard Worker filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ 113*fb1b10abSAndroid Build Coastguard Worker h + 7); \ 114*fb1b10abSAndroid Build Coastguard Worker vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ 115*fb1b10abSAndroid Build Coastguard Worker filter, x0_q4, x_step_q4, y0_q4, \ 116*fb1b10abSAndroid Build Coastguard Worker y_step_q4, w, h); \ 117*fb1b10abSAndroid Build Coastguard Worker } else if (filter_x[2] | filter_x[5]) { \ 118*fb1b10abSAndroid Build Coastguard Worker const int num_taps = is_avg ? 8 : 4; \ 119*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ 120*fb1b10abSAndroid Build Coastguard Worker vpx_convolve8_horiz_##opt( \ 121*fb1b10abSAndroid Build Coastguard Worker src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \ 122*fb1b10abSAndroid Build Coastguard Worker filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1); \ 123*fb1b10abSAndroid Build Coastguard Worker vpx_convolve8_##avg##vert_##opt(fdata2 + 64 * (num_taps / 2 - 1), 64, \ 124*fb1b10abSAndroid Build Coastguard Worker dst, dst_stride, filter, x0_q4, \ 125*fb1b10abSAndroid Build Coastguard Worker x_step_q4, y0_q4, y_step_q4, w, h); \ 126*fb1b10abSAndroid Build Coastguard Worker } else { \ 127*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65] VPX_UNINITIALIZED); \ 128*fb1b10abSAndroid Build Coastguard Worker vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \ 129*fb1b10abSAndroid Build Coastguard Worker x_step_q4, y0_q4, y_step_q4, w, h + 1); \ 130*fb1b10abSAndroid Build Coastguard Worker vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \ 131*fb1b10abSAndroid Build Coastguard Worker x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ 132*fb1b10abSAndroid Build Coastguard Worker h); \ 133*fb1b10abSAndroid Build Coastguard Worker } \ 134*fb1b10abSAndroid Build Coastguard Worker } 135*fb1b10abSAndroid Build Coastguard Worker 136*fb1b10abSAndroid Build Coastguard Worker #if CONFIG_VP9_HIGHBITDEPTH 137*fb1b10abSAndroid Build Coastguard Worker 138*fb1b10abSAndroid Build Coastguard Worker typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, 139*fb1b10abSAndroid Build Coastguard Worker const ptrdiff_t src_pitch, 140*fb1b10abSAndroid Build Coastguard Worker uint16_t *output_ptr, 141*fb1b10abSAndroid Build Coastguard Worker ptrdiff_t out_pitch, 142*fb1b10abSAndroid Build Coastguard Worker unsigned int output_height, 143*fb1b10abSAndroid Build Coastguard Worker const int16_t *filter, int bd); 144*fb1b10abSAndroid Build Coastguard Worker 145*fb1b10abSAndroid Build Coastguard Worker #define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt, \ 146*fb1b10abSAndroid Build Coastguard Worker is_avg) \ 147*fb1b10abSAndroid Build Coastguard Worker void vpx_highbd_convolve8_##name##_##opt( \ 148*fb1b10abSAndroid Build Coastguard Worker const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ 149*fb1b10abSAndroid Build Coastguard Worker ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ 150*fb1b10abSAndroid Build Coastguard Worker int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ 151*fb1b10abSAndroid Build Coastguard Worker const int16_t *filter_row = filter_kernel[offset]; \ 152*fb1b10abSAndroid Build Coastguard Worker if (step_q4 == 16 && filter_row[3] != 128) { \ 153*fb1b10abSAndroid Build Coastguard Worker if (filter_row[0] | filter_row[1] | filter_row[6] | filter_row[7]) { \ 154*fb1b10abSAndroid Build Coastguard Worker const int num_taps = 8; \ 155*fb1b10abSAndroid Build Coastguard Worker while (w >= 16) { \ 156*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \ 157*fb1b10abSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ 158*fb1b10abSAndroid Build Coastguard Worker src += 16; \ 159*fb1b10abSAndroid Build Coastguard Worker dst += 16; \ 160*fb1b10abSAndroid Build Coastguard Worker w -= 16; \ 161*fb1b10abSAndroid Build Coastguard Worker } \ 162*fb1b10abSAndroid Build Coastguard Worker while (w >= 8) { \ 163*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \ 164*fb1b10abSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ 165*fb1b10abSAndroid Build Coastguard Worker src += 8; \ 166*fb1b10abSAndroid Build Coastguard Worker dst += 8; \ 167*fb1b10abSAndroid Build Coastguard Worker w -= 8; \ 168*fb1b10abSAndroid Build Coastguard Worker } \ 169*fb1b10abSAndroid Build Coastguard Worker while (w >= 4) { \ 170*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \ 171*fb1b10abSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ 172*fb1b10abSAndroid Build Coastguard Worker src += 4; \ 173*fb1b10abSAndroid Build Coastguard Worker dst += 4; \ 174*fb1b10abSAndroid Build Coastguard Worker w -= 4; \ 175*fb1b10abSAndroid Build Coastguard Worker } \ 176*fb1b10abSAndroid Build Coastguard Worker (void)num_taps; \ 177*fb1b10abSAndroid Build Coastguard Worker } else if (filter_row[2] | filter_row[5]) { \ 178*fb1b10abSAndroid Build Coastguard Worker const int num_taps = is_avg ? 8 : 4; \ 179*fb1b10abSAndroid Build Coastguard Worker while (w >= 16) { \ 180*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_filter_block1d16_##dir##4_##avg##opt( \ 181*fb1b10abSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ 182*fb1b10abSAndroid Build Coastguard Worker src += 16; \ 183*fb1b10abSAndroid Build Coastguard Worker dst += 16; \ 184*fb1b10abSAndroid Build Coastguard Worker w -= 16; \ 185*fb1b10abSAndroid Build Coastguard Worker } \ 186*fb1b10abSAndroid Build Coastguard Worker while (w >= 8) { \ 187*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_filter_block1d8_##dir##4_##avg##opt( \ 188*fb1b10abSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ 189*fb1b10abSAndroid Build Coastguard Worker src += 8; \ 190*fb1b10abSAndroid Build Coastguard Worker dst += 8; \ 191*fb1b10abSAndroid Build Coastguard Worker w -= 8; \ 192*fb1b10abSAndroid Build Coastguard Worker } \ 193*fb1b10abSAndroid Build Coastguard Worker while (w >= 4) { \ 194*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_filter_block1d4_##dir##4_##avg##opt( \ 195*fb1b10abSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ 196*fb1b10abSAndroid Build Coastguard Worker src += 4; \ 197*fb1b10abSAndroid Build Coastguard Worker dst += 4; \ 198*fb1b10abSAndroid Build Coastguard Worker w -= 4; \ 199*fb1b10abSAndroid Build Coastguard Worker } \ 200*fb1b10abSAndroid Build Coastguard Worker (void)num_taps; \ 201*fb1b10abSAndroid Build Coastguard Worker } else { \ 202*fb1b10abSAndroid Build Coastguard Worker const int num_taps = 2; \ 203*fb1b10abSAndroid Build Coastguard Worker while (w >= 16) { \ 204*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \ 205*fb1b10abSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ 206*fb1b10abSAndroid Build Coastguard Worker src += 16; \ 207*fb1b10abSAndroid Build Coastguard Worker dst += 16; \ 208*fb1b10abSAndroid Build Coastguard Worker w -= 16; \ 209*fb1b10abSAndroid Build Coastguard Worker } \ 210*fb1b10abSAndroid Build Coastguard Worker while (w >= 8) { \ 211*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \ 212*fb1b10abSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ 213*fb1b10abSAndroid Build Coastguard Worker src += 8; \ 214*fb1b10abSAndroid Build Coastguard Worker dst += 8; \ 215*fb1b10abSAndroid Build Coastguard Worker w -= 8; \ 216*fb1b10abSAndroid Build Coastguard Worker } \ 217*fb1b10abSAndroid Build Coastguard Worker while (w >= 4) { \ 218*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \ 219*fb1b10abSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ 220*fb1b10abSAndroid Build Coastguard Worker src += 4; \ 221*fb1b10abSAndroid Build Coastguard Worker dst += 4; \ 222*fb1b10abSAndroid Build Coastguard Worker w -= 4; \ 223*fb1b10abSAndroid Build Coastguard Worker } \ 224*fb1b10abSAndroid Build Coastguard Worker (void)num_taps; \ 225*fb1b10abSAndroid Build Coastguard Worker } \ 226*fb1b10abSAndroid Build Coastguard Worker } \ 227*fb1b10abSAndroid Build Coastguard Worker if (w) { \ 228*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ 229*fb1b10abSAndroid Build Coastguard Worker filter_kernel, x0_q4, x_step_q4, y0_q4, \ 230*fb1b10abSAndroid Build Coastguard Worker y_step_q4, w, h, bd); \ 231*fb1b10abSAndroid Build Coastguard Worker } \ 232*fb1b10abSAndroid Build Coastguard Worker } 233*fb1b10abSAndroid Build Coastguard Worker 234*fb1b10abSAndroid Build Coastguard Worker #define HIGH_FUN_CONV_2D(avg, opt, is_avg) \ 235*fb1b10abSAndroid Build Coastguard Worker void vpx_highbd_convolve8_##avg##opt( \ 236*fb1b10abSAndroid Build Coastguard Worker const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ 237*fb1b10abSAndroid Build Coastguard Worker ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ 238*fb1b10abSAndroid Build Coastguard Worker int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ 239*fb1b10abSAndroid Build Coastguard Worker const int16_t *filter_x = filter[x0_q4]; \ 240*fb1b10abSAndroid Build Coastguard Worker assert(w <= 64); \ 241*fb1b10abSAndroid Build Coastguard Worker assert(h <= 64); \ 242*fb1b10abSAndroid Build Coastguard Worker if (x_step_q4 == 16 && y_step_q4 == 16) { \ 243*fb1b10abSAndroid Build Coastguard Worker if ((filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) || \ 244*fb1b10abSAndroid Build Coastguard Worker filter_x[3] == 128) { \ 245*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ 246*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ 247*fb1b10abSAndroid Build Coastguard Worker fdata2, 64, filter, x0_q4, x_step_q4, \ 248*fb1b10abSAndroid Build Coastguard Worker y0_q4, y_step_q4, w, h + 7, bd); \ 249*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_convolve8_##avg##vert_##opt( \ 250*fb1b10abSAndroid Build Coastguard Worker fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4, \ 251*fb1b10abSAndroid Build Coastguard Worker y0_q4, y_step_q4, w, h, bd); \ 252*fb1b10abSAndroid Build Coastguard Worker } else if (filter_x[2] | filter_x[5]) { \ 253*fb1b10abSAndroid Build Coastguard Worker const int num_taps = is_avg ? 8 : 4; \ 254*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ 255*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_convolve8_horiz_##opt( \ 256*fb1b10abSAndroid Build Coastguard Worker src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \ 257*fb1b10abSAndroid Build Coastguard Worker filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1, \ 258*fb1b10abSAndroid Build Coastguard Worker bd); \ 259*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_convolve8_##avg##vert_##opt( \ 260*fb1b10abSAndroid Build Coastguard Worker fdata2 + 64 * (num_taps / 2 - 1), 64, dst, dst_stride, filter, \ 261*fb1b10abSAndroid Build Coastguard Worker x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \ 262*fb1b10abSAndroid Build Coastguard Worker } else { \ 263*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65] VPX_UNINITIALIZED); \ 264*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \ 265*fb1b10abSAndroid Build Coastguard Worker x0_q4, x_step_q4, y0_q4, y_step_q4, \ 266*fb1b10abSAndroid Build Coastguard Worker w, h + 1, bd); \ 267*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ 268*fb1b10abSAndroid Build Coastguard Worker filter, x0_q4, x_step_q4, \ 269*fb1b10abSAndroid Build Coastguard Worker y0_q4, y_step_q4, w, h, bd); \ 270*fb1b10abSAndroid Build Coastguard Worker } \ 271*fb1b10abSAndroid Build Coastguard Worker } else { \ 272*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter, \ 273*fb1b10abSAndroid Build Coastguard Worker x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, \ 274*fb1b10abSAndroid Build Coastguard Worker bd); \ 275*fb1b10abSAndroid Build Coastguard Worker } \ 276*fb1b10abSAndroid Build Coastguard Worker } 277*fb1b10abSAndroid Build Coastguard Worker 278*fb1b10abSAndroid Build Coastguard Worker #endif // CONFIG_VP9_HIGHBITDEPTH 279*fb1b10abSAndroid Build Coastguard Worker #endif // VPX_VPX_DSP_X86_CONVOLVE_H_ 280