1*77c1e3ccSAndroid Build Coastguard Worker /* 2*77c1e3ccSAndroid Build Coastguard Worker * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3*77c1e3ccSAndroid Build Coastguard Worker * 4*77c1e3ccSAndroid Build Coastguard Worker * This source code is subject to the terms of the BSD 2 Clause License and 5*77c1e3ccSAndroid Build Coastguard Worker * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6*77c1e3ccSAndroid Build Coastguard Worker * was not distributed with this source code in the LICENSE file, you can 7*77c1e3ccSAndroid Build Coastguard Worker * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8*77c1e3ccSAndroid Build Coastguard Worker * Media Patent License 1.0 was not distributed with this source code in the 9*77c1e3ccSAndroid Build Coastguard Worker * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10*77c1e3ccSAndroid Build Coastguard Worker */ 11*77c1e3ccSAndroid Build Coastguard Worker #ifndef AOM_AOM_DSP_X86_CONVOLVE_H_ 12*77c1e3ccSAndroid Build Coastguard Worker #define AOM_AOM_DSP_X86_CONVOLVE_H_ 13*77c1e3ccSAndroid Build Coastguard Worker 14*77c1e3ccSAndroid Build Coastguard Worker #include <assert.h> 15*77c1e3ccSAndroid Build Coastguard Worker 16*77c1e3ccSAndroid Build Coastguard Worker #include "config/aom_config.h" 17*77c1e3ccSAndroid Build Coastguard Worker #include "config/aom_dsp_rtcd.h" 18*77c1e3ccSAndroid Build Coastguard Worker 19*77c1e3ccSAndroid Build Coastguard Worker #include "aom/aom_integer.h" 20*77c1e3ccSAndroid Build Coastguard Worker #include "aom_ports/mem.h" 21*77c1e3ccSAndroid Build Coastguard Worker 22*77c1e3ccSAndroid Build Coastguard Worker typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, 23*77c1e3ccSAndroid Build Coastguard Worker uint8_t *output_ptr, ptrdiff_t out_pitch, 24*77c1e3ccSAndroid Build Coastguard Worker uint32_t output_height, const int16_t *filter); 25*77c1e3ccSAndroid Build Coastguard Worker 26*77c1e3ccSAndroid Build Coastguard Worker #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 27*77c1e3ccSAndroid Build Coastguard Worker void aom_convolve8_##name##_##opt( \ 28*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 29*77c1e3ccSAndroid Build Coastguard Worker ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ 30*77c1e3ccSAndroid Build Coastguard Worker const int16_t *filter_y, int y_step_q4, int w, int h) { \ 31*77c1e3ccSAndroid Build Coastguard Worker (void)filter_x; \ 32*77c1e3ccSAndroid Build Coastguard Worker (void)x_step_q4; \ 33*77c1e3ccSAndroid Build Coastguard Worker (void)filter_y; \ 34*77c1e3ccSAndroid Build Coastguard Worker (void)y_step_q4; \ 35*77c1e3ccSAndroid Build Coastguard Worker assert((-128 <= filter[3]) && (filter[3] <= 127)); \ 36*77c1e3ccSAndroid Build Coastguard Worker assert(step_q4 == 16); \ 37*77c1e3ccSAndroid Build Coastguard Worker if (((filter[0] | filter[1] | filter[6] | filter[7]) == 0) && \ 38*77c1e3ccSAndroid Build Coastguard Worker (filter[2] | filter[5])) { \ 39*77c1e3ccSAndroid Build Coastguard Worker while (w >= 16) { \ 40*77c1e3ccSAndroid Build Coastguard Worker aom_filter_block1d16_##dir##4_##avg##opt(src_start, src_stride, dst, \ 41*77c1e3ccSAndroid Build Coastguard Worker dst_stride, h, filter); \ 42*77c1e3ccSAndroid Build Coastguard Worker src += 16; \ 43*77c1e3ccSAndroid Build Coastguard Worker dst += 16; \ 44*77c1e3ccSAndroid Build Coastguard Worker w -= 16; \ 45*77c1e3ccSAndroid Build Coastguard Worker } \ 46*77c1e3ccSAndroid Build Coastguard Worker while (w >= 8) { \ 47*77c1e3ccSAndroid Build Coastguard Worker aom_filter_block1d8_##dir##4_##avg##opt(src_start, src_stride, dst, \ 48*77c1e3ccSAndroid Build Coastguard Worker dst_stride, h, filter); \ 49*77c1e3ccSAndroid Build Coastguard Worker src += 8; \ 50*77c1e3ccSAndroid Build Coastguard Worker dst += 8; \ 51*77c1e3ccSAndroid Build Coastguard Worker w -= 8; \ 52*77c1e3ccSAndroid Build Coastguard Worker } \ 53*77c1e3ccSAndroid Build Coastguard Worker while (w >= 4) { \ 54*77c1e3ccSAndroid Build Coastguard Worker aom_filter_block1d4_##dir##4_##avg##opt(src_start, src_stride, dst, \ 55*77c1e3ccSAndroid Build Coastguard Worker dst_stride, h, filter); \ 56*77c1e3ccSAndroid Build Coastguard Worker src += 4; \ 57*77c1e3ccSAndroid Build Coastguard Worker dst += 4; \ 58*77c1e3ccSAndroid Build Coastguard Worker w -= 4; \ 59*77c1e3ccSAndroid Build Coastguard Worker } \ 60*77c1e3ccSAndroid Build Coastguard Worker } else if (filter[0] | filter[1] | filter[2]) { \ 61*77c1e3ccSAndroid Build Coastguard Worker while (w >= 16) { \ 62*77c1e3ccSAndroid Build Coastguard Worker aom_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \ 63*77c1e3ccSAndroid Build Coastguard Worker dst_stride, h, filter); \ 64*77c1e3ccSAndroid Build Coastguard Worker src += 16; \ 65*77c1e3ccSAndroid Build Coastguard Worker dst += 16; \ 66*77c1e3ccSAndroid Build Coastguard Worker w -= 16; \ 67*77c1e3ccSAndroid Build Coastguard Worker } \ 68*77c1e3ccSAndroid Build Coastguard Worker while (w >= 8) { \ 69*77c1e3ccSAndroid Build Coastguard Worker aom_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \ 70*77c1e3ccSAndroid Build Coastguard Worker dst_stride, h, filter); \ 71*77c1e3ccSAndroid Build Coastguard Worker src += 8; \ 72*77c1e3ccSAndroid Build Coastguard Worker dst += 8; \ 73*77c1e3ccSAndroid Build Coastguard Worker w -= 8; \ 74*77c1e3ccSAndroid Build Coastguard Worker } \ 75*77c1e3ccSAndroid Build Coastguard Worker while (w >= 4) { \ 76*77c1e3ccSAndroid Build Coastguard Worker aom_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \ 77*77c1e3ccSAndroid Build Coastguard Worker dst_stride, h, filter); \ 78*77c1e3ccSAndroid Build Coastguard Worker src += 4; \ 79*77c1e3ccSAndroid Build Coastguard Worker dst += 4; \ 80*77c1e3ccSAndroid Build Coastguard Worker w -= 4; \ 81*77c1e3ccSAndroid Build Coastguard Worker } \ 82*77c1e3ccSAndroid Build Coastguard Worker } else { \ 83*77c1e3ccSAndroid Build Coastguard Worker while (w >= 16) { \ 84*77c1e3ccSAndroid Build Coastguard Worker aom_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst, \ 85*77c1e3ccSAndroid Build Coastguard Worker dst_stride, h, filter); \ 86*77c1e3ccSAndroid Build Coastguard Worker src += 16; \ 87*77c1e3ccSAndroid Build Coastguard Worker dst += 16; \ 88*77c1e3ccSAndroid Build Coastguard Worker w -= 16; \ 89*77c1e3ccSAndroid Build Coastguard Worker } \ 90*77c1e3ccSAndroid Build Coastguard Worker while (w >= 8) { \ 91*77c1e3ccSAndroid Build Coastguard Worker aom_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst, \ 92*77c1e3ccSAndroid Build Coastguard Worker dst_stride, h, filter); \ 93*77c1e3ccSAndroid Build Coastguard Worker src += 8; \ 94*77c1e3ccSAndroid Build Coastguard Worker dst += 8; \ 95*77c1e3ccSAndroid Build Coastguard Worker w -= 8; \ 96*77c1e3ccSAndroid Build Coastguard Worker } \ 97*77c1e3ccSAndroid Build Coastguard Worker while (w >= 4) { \ 98*77c1e3ccSAndroid Build Coastguard Worker aom_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst, \ 99*77c1e3ccSAndroid Build Coastguard Worker dst_stride, h, filter); \ 100*77c1e3ccSAndroid Build Coastguard Worker src += 4; \ 101*77c1e3ccSAndroid Build Coastguard Worker dst += 4; \ 102*77c1e3ccSAndroid Build Coastguard Worker w -= 4; \ 103*77c1e3ccSAndroid Build Coastguard Worker } \ 104*77c1e3ccSAndroid Build Coastguard Worker } \ 105*77c1e3ccSAndroid Build Coastguard Worker if (w) { \ 106*77c1e3ccSAndroid Build Coastguard Worker aom_convolve8_##name##_c(src, src_stride, dst, dst_stride, filter_x, \ 107*77c1e3ccSAndroid Build Coastguard Worker x_step_q4, filter_y, y_step_q4, w, h); \ 108*77c1e3ccSAndroid Build Coastguard Worker } \ 109*77c1e3ccSAndroid Build Coastguard Worker } 110*77c1e3ccSAndroid Build Coastguard Worker 111*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH 112*77c1e3ccSAndroid Build Coastguard Worker typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, 113*77c1e3ccSAndroid Build Coastguard Worker const ptrdiff_t src_pitch, 114*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_ptr, 115*77c1e3ccSAndroid Build Coastguard Worker ptrdiff_t out_pitch, 116*77c1e3ccSAndroid Build Coastguard Worker unsigned int output_height, 117*77c1e3ccSAndroid Build Coastguard Worker const int16_t *filter, int bd); 118*77c1e3ccSAndroid Build Coastguard Worker 119*77c1e3ccSAndroid Build Coastguard Worker #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 120*77c1e3ccSAndroid Build Coastguard Worker void aom_highbd_convolve8_##name##_##opt( \ 121*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8, \ 122*77c1e3ccSAndroid Build Coastguard Worker ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ 123*77c1e3ccSAndroid Build Coastguard Worker const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \ 124*77c1e3ccSAndroid Build Coastguard Worker uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ 125*77c1e3ccSAndroid Build Coastguard Worker uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ 126*77c1e3ccSAndroid Build Coastguard Worker if (step_q4 == 16 && filter[3] != 128) { \ 127*77c1e3ccSAndroid Build Coastguard Worker if (((filter[0] | filter[1] | filter[6] | filter[7]) == 0) && \ 128*77c1e3ccSAndroid Build Coastguard Worker (filter[2] | filter[5])) { \ 129*77c1e3ccSAndroid Build Coastguard Worker while (w >= 16) { \ 130*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_filter_block1d16_##dir##4_##avg##opt( \ 131*77c1e3ccSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter, bd); \ 132*77c1e3ccSAndroid Build Coastguard Worker src += 16; \ 133*77c1e3ccSAndroid Build Coastguard Worker dst += 16; \ 134*77c1e3ccSAndroid Build Coastguard Worker w -= 16; \ 135*77c1e3ccSAndroid Build Coastguard Worker } \ 136*77c1e3ccSAndroid Build Coastguard Worker while (w >= 8) { \ 137*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_filter_block1d8_##dir##4_##avg##opt( \ 138*77c1e3ccSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter, bd); \ 139*77c1e3ccSAndroid Build Coastguard Worker src += 8; \ 140*77c1e3ccSAndroid Build Coastguard Worker dst += 8; \ 141*77c1e3ccSAndroid Build Coastguard Worker w -= 8; \ 142*77c1e3ccSAndroid Build Coastguard Worker } \ 143*77c1e3ccSAndroid Build Coastguard Worker while (w >= 4) { \ 144*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_filter_block1d4_##dir##4_##avg##opt( \ 145*77c1e3ccSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter, bd); \ 146*77c1e3ccSAndroid Build Coastguard Worker src += 4; \ 147*77c1e3ccSAndroid Build Coastguard Worker dst += 4; \ 148*77c1e3ccSAndroid Build Coastguard Worker w -= 4; \ 149*77c1e3ccSAndroid Build Coastguard Worker } \ 150*77c1e3ccSAndroid Build Coastguard Worker } else if (filter[0] | filter[1] | filter[2]) { \ 151*77c1e3ccSAndroid Build Coastguard Worker while (w >= 16) { \ 152*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_filter_block1d16_##dir##8_##avg##opt( \ 153*77c1e3ccSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter, bd); \ 154*77c1e3ccSAndroid Build Coastguard Worker src += 16; \ 155*77c1e3ccSAndroid Build Coastguard Worker dst += 16; \ 156*77c1e3ccSAndroid Build Coastguard Worker w -= 16; \ 157*77c1e3ccSAndroid Build Coastguard Worker } \ 158*77c1e3ccSAndroid Build Coastguard Worker while (w >= 8) { \ 159*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_filter_block1d8_##dir##8_##avg##opt( \ 160*77c1e3ccSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter, bd); \ 161*77c1e3ccSAndroid Build Coastguard Worker src += 8; \ 162*77c1e3ccSAndroid Build Coastguard Worker dst += 8; \ 163*77c1e3ccSAndroid Build Coastguard Worker w -= 8; \ 164*77c1e3ccSAndroid Build Coastguard Worker } \ 165*77c1e3ccSAndroid Build Coastguard Worker while (w >= 4) { \ 166*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_filter_block1d4_##dir##8_##avg##opt( \ 167*77c1e3ccSAndroid Build Coastguard Worker src_start, src_stride, dst, dst_stride, h, filter, bd); \ 168*77c1e3ccSAndroid Build Coastguard Worker src += 4; \ 169*77c1e3ccSAndroid Build Coastguard Worker dst += 4; \ 170*77c1e3ccSAndroid Build Coastguard Worker w -= 4; \ 171*77c1e3ccSAndroid Build Coastguard Worker } \ 172*77c1e3ccSAndroid Build Coastguard Worker } else { \ 173*77c1e3ccSAndroid Build Coastguard Worker while (w >= 16) { \ 174*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_filter_block1d16_##dir##2_##avg##opt( \ 175*77c1e3ccSAndroid Build Coastguard Worker src, src_stride, dst, dst_stride, h, filter, bd); \ 176*77c1e3ccSAndroid Build Coastguard Worker src += 16; \ 177*77c1e3ccSAndroid Build Coastguard Worker dst += 16; \ 178*77c1e3ccSAndroid Build Coastguard Worker w -= 16; \ 179*77c1e3ccSAndroid Build Coastguard Worker } \ 180*77c1e3ccSAndroid Build Coastguard Worker while (w >= 8) { \ 181*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_filter_block1d8_##dir##2_##avg##opt( \ 182*77c1e3ccSAndroid Build Coastguard Worker src, src_stride, dst, dst_stride, h, filter, bd); \ 183*77c1e3ccSAndroid Build Coastguard Worker src += 8; \ 184*77c1e3ccSAndroid Build Coastguard Worker dst += 8; \ 185*77c1e3ccSAndroid Build Coastguard Worker w -= 8; \ 186*77c1e3ccSAndroid Build Coastguard Worker } \ 187*77c1e3ccSAndroid Build Coastguard Worker while (w >= 4) { \ 188*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_filter_block1d4_##dir##2_##avg##opt( \ 189*77c1e3ccSAndroid Build Coastguard Worker src, src_stride, dst, dst_stride, h, filter, bd); \ 190*77c1e3ccSAndroid Build Coastguard Worker src += 4; \ 191*77c1e3ccSAndroid Build Coastguard Worker dst += 4; \ 192*77c1e3ccSAndroid Build Coastguard Worker w -= 4; \ 193*77c1e3ccSAndroid Build Coastguard Worker } \ 194*77c1e3ccSAndroid Build Coastguard Worker } \ 195*77c1e3ccSAndroid Build Coastguard Worker } \ 196*77c1e3ccSAndroid Build Coastguard Worker if (w) { \ 197*77c1e3ccSAndroid Build Coastguard Worker aom_highbd_convolve8_##name##_c( \ 198*77c1e3ccSAndroid Build Coastguard Worker CONVERT_TO_BYTEPTR(src), src_stride, CONVERT_TO_BYTEPTR(dst), \ 199*77c1e3ccSAndroid Build Coastguard Worker dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); \ 200*77c1e3ccSAndroid Build Coastguard Worker } \ 201*77c1e3ccSAndroid Build Coastguard Worker } 202*77c1e3ccSAndroid Build Coastguard Worker #endif // CONFIG_AV1_HIGHBITDEPTH 203*77c1e3ccSAndroid Build Coastguard Worker 204*77c1e3ccSAndroid Build Coastguard Worker #endif // AOM_AOM_DSP_X86_CONVOLVE_H_ 205