1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 #ifndef AOM_AOM_DSP_X86_CONVOLVE_H_ 12 #define AOM_AOM_DSP_X86_CONVOLVE_H_ 13 14 #include <assert.h> 15 16 #include "config/aom_config.h" 17 #include "config/aom_dsp_rtcd.h" 18 19 #include "aom/aom_integer.h" 20 #include "aom_ports/mem.h" 21 22 typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, 23 uint8_t *output_ptr, ptrdiff_t out_pitch, 24 uint32_t output_height, const int16_t *filter); 25 26 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 27 void aom_convolve8_##name##_##opt( \ 28 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 29 ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ 30 const int16_t *filter_y, int y_step_q4, int w, int h) { \ 31 (void)filter_x; \ 32 (void)x_step_q4; \ 33 (void)filter_y; \ 34 (void)y_step_q4; \ 35 assert((-128 <= filter[3]) && (filter[3] <= 127)); \ 36 assert(step_q4 == 16); \ 37 if (((filter[0] | filter[1] | filter[6] | filter[7]) == 0) && \ 38 (filter[2] | filter[5])) { \ 39 while (w >= 16) { \ 40 aom_filter_block1d16_##dir##4_##avg##opt(src_start, src_stride, dst, \ 41 dst_stride, h, filter); \ 42 src += 16; \ 43 dst += 16; \ 44 w -= 16; \ 45 } \ 46 while (w >= 8) { \ 47 aom_filter_block1d8_##dir##4_##avg##opt(src_start, src_stride, dst, \ 48 dst_stride, h, filter); \ 49 src += 8; \ 50 dst += 8; \ 51 w -= 8; \ 52 } \ 53 while (w >= 4) { \ 54 aom_filter_block1d4_##dir##4_##avg##opt(src_start, src_stride, dst, \ 55 dst_stride, h, filter); \ 56 src += 4; \ 57 dst += 4; \ 58 w -= 4; \ 59 } \ 60 } else if (filter[0] | filter[1] | filter[2]) { \ 61 while (w >= 16) { \ 62 aom_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \ 63 dst_stride, h, filter); \ 64 src += 16; \ 65 dst += 16; \ 66 w -= 16; \ 67 } \ 68 while (w >= 8) { \ 69 aom_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \ 70 dst_stride, h, filter); \ 71 src += 8; \ 72 dst += 8; \ 73 w -= 8; \ 74 } \ 75 while (w >= 4) { \ 76 aom_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \ 77 dst_stride, h, filter); \ 78 src += 4; \ 79 dst += 4; \ 80 w -= 4; \ 81 } \ 82 } else { \ 83 while (w >= 16) { \ 84 aom_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst, \ 85 dst_stride, h, filter); \ 86 src += 16; \ 87 dst += 16; \ 88 w -= 16; \ 89 } \ 90 while (w >= 8) { \ 91 aom_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst, \ 92 dst_stride, h, filter); \ 93 src += 8; \ 94 dst += 8; \ 95 w -= 8; \ 96 } \ 97 while (w >= 4) { \ 98 aom_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst, \ 99 dst_stride, h, filter); \ 100 src += 4; \ 101 dst += 4; \ 102 w -= 4; \ 103 } \ 104 } \ 105 if (w) { \ 106 aom_convolve8_##name##_c(src, src_stride, dst, dst_stride, filter_x, \ 107 x_step_q4, filter_y, y_step_q4, w, h); \ 108 } \ 109 } 110 111 #if CONFIG_AV1_HIGHBITDEPTH 112 typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, 113 const ptrdiff_t src_pitch, 114 uint16_t *output_ptr, 115 ptrdiff_t out_pitch, 116 unsigned int output_height, 117 const int16_t *filter, int bd); 118 119 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 120 void aom_highbd_convolve8_##name##_##opt( \ 121 const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8, \ 122 ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ 123 const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \ 124 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ 125 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ 126 if (step_q4 == 16 && filter[3] != 128) { \ 127 if (((filter[0] | filter[1] | filter[6] | filter[7]) == 0) && \ 128 (filter[2] | filter[5])) { \ 129 while (w >= 16) { \ 130 aom_highbd_filter_block1d16_##dir##4_##avg##opt( \ 131 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 132 src += 16; \ 133 dst += 16; \ 134 w -= 16; \ 135 } \ 136 while (w >= 8) { \ 137 aom_highbd_filter_block1d8_##dir##4_##avg##opt( \ 138 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 139 src += 8; \ 140 dst += 8; \ 141 w -= 8; \ 142 } \ 143 while (w >= 4) { \ 144 aom_highbd_filter_block1d4_##dir##4_##avg##opt( \ 145 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 146 src += 4; \ 147 dst += 4; \ 148 w -= 4; \ 149 } \ 150 } else if (filter[0] | filter[1] | filter[2]) { \ 151 while (w >= 16) { \ 152 aom_highbd_filter_block1d16_##dir##8_##avg##opt( \ 153 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 154 src += 16; \ 155 dst += 16; \ 156 w -= 16; \ 157 } \ 158 while (w >= 8) { \ 159 aom_highbd_filter_block1d8_##dir##8_##avg##opt( \ 160 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 161 src += 8; \ 162 dst += 8; \ 163 w -= 8; \ 164 } \ 165 while (w >= 4) { \ 166 aom_highbd_filter_block1d4_##dir##8_##avg##opt( \ 167 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 168 src += 4; \ 169 dst += 4; \ 170 w -= 4; \ 171 } \ 172 } else { \ 173 while (w >= 16) { \ 174 aom_highbd_filter_block1d16_##dir##2_##avg##opt( \ 175 src, src_stride, dst, dst_stride, h, filter, bd); \ 176 src += 16; \ 177 dst += 16; \ 178 w -= 16; \ 179 } \ 180 while (w >= 8) { \ 181 aom_highbd_filter_block1d8_##dir##2_##avg##opt( \ 182 src, src_stride, dst, dst_stride, h, filter, bd); \ 183 src += 8; \ 184 dst += 8; \ 185 w -= 8; \ 186 } \ 187 while (w >= 4) { \ 188 aom_highbd_filter_block1d4_##dir##2_##avg##opt( \ 189 src, src_stride, dst, dst_stride, h, filter, bd); \ 190 src += 4; \ 191 dst += 4; \ 192 w -= 4; \ 193 } \ 194 } \ 195 } \ 196 if (w) { \ 197 aom_highbd_convolve8_##name##_c( \ 198 CONVERT_TO_BYTEPTR(src), src_stride, CONVERT_TO_BYTEPTR(dst), \ 199 dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); \ 200 } \ 201 } 202 #endif // CONFIG_AV1_HIGHBITDEPTH 203 204 #endif // AOM_AOM_DSP_X86_CONVOLVE_H_ 205