1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <stddef.h> 13 #include <stdint.h> 14 15 #include "config/aom_config.h" 16 #include "config/aom_dsp_rtcd.h" 17 18 #include "aom_dsp/aom_dsp_common.h" 19 20 // The 2 unused parameters are place holders for PIC enabled build. 21 // These definitions are for functions defined in subpel_variance.asm 22 #define DECL(w, opt) \ 23 int aom_sub_pixel_variance##w##xh_##opt( \ 24 const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ 25 const uint8_t *dst, ptrdiff_t dst_stride, int height, unsigned int *sse, \ 26 void *unused0, void *unused) 27 #define DECLS(opt) \ 28 DECL(4, opt); \ 29 DECL(8, opt); \ 30 DECL(16, opt) 31 32 DECLS(ssse3); 33 #undef DECLS 34 #undef DECL 35 36 #define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ 37 unsigned int aom_sub_pixel_variance##w##x##h##_##opt( \ 38 const uint8_t *src, int src_stride, int x_offset, int y_offset, \ 39 const uint8_t *dst, int dst_stride, unsigned int *sse_ptr) { \ 40 /*Avoid overflow in helper by capping height.*/ \ 41 const int hf = AOMMIN(h, 64); \ 42 unsigned int sse = 0; \ 43 int se = 0; \ 44 for (int i = 0; i < (w / wf); ++i) { \ 45 const uint8_t *src_ptr = src; \ 46 const uint8_t *dst_ptr = dst; \ 47 for (int j = 0; j < (h / hf); ++j) { \ 48 unsigned int sse2; \ 49 const int se2 = aom_sub_pixel_variance##wf##xh_##opt( \ 50 src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, hf, \ 51 &sse2, NULL, NULL); \ 52 dst_ptr += hf * dst_stride; \ 53 src_ptr += hf * src_stride; \ 54 se += se2; \ 55 sse += sse2; \ 56 } \ 57 src += wf; \ 58 dst += wf; \ 59 } \ 60 *sse_ptr = sse; \ 61 return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ 62 } 63 64 #if !CONFIG_REALTIME_ONLY 65 #define FNS(opt) \ 66 FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)) \ 67 FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)) \ 68 FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)) \ 69 FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)) \ 70 FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)) \ 71 FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)) \ 72 FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)) \ 73 FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)) \ 74 FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)) \ 75 FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)) \ 76 FN(16, 8, 16, 4, 3, opt, (int32_t), (int32_t)) \ 77 FN(8, 16, 8, 3, 4, opt, (int32_t), (int32_t)) \ 78 FN(8, 8, 8, 3, 3, opt, (int32_t), (int32_t)) \ 79 FN(8, 4, 8, 3, 2, opt, (int32_t), (int32_t)) \ 80 FN(4, 8, 4, 2, 3, opt, (int32_t), (int32_t)) \ 81 FN(4, 4, 4, 2, 2, opt, (int32_t), (int32_t)) \ 82 FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)) \ 83 FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)) \ 84 FN(8, 32, 8, 3, 5, opt, (uint32_t), (int64_t)) \ 85 FN(32, 8, 16, 5, 3, opt, (uint32_t), (int64_t)) \ 86 FN(16, 64, 16, 4, 6, opt, (int64_t), (int64_t)) \ 87 FN(64, 16, 16, 6, 4, opt, (int64_t), (int64_t)) 88 #else 89 #define FNS(opt) \ 90 FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)) \ 91 FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)) \ 92 FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)) \ 93 FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)) \ 94 FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)) \ 95 FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)) \ 96 FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)) \ 97 FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)) \ 98 FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)) \ 99 FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)) \ 100 FN(16, 8, 16, 4, 3, opt, (int32_t), (int32_t)) \ 101 FN(8, 16, 8, 3, 4, opt, (int32_t), (int32_t)) \ 102 FN(8, 8, 8, 3, 3, opt, (int32_t), (int32_t)) \ 103 FN(8, 4, 8, 3, 2, opt, (int32_t), (int32_t)) \ 104 FN(4, 8, 4, 2, 3, opt, (int32_t), (int32_t)) \ 105 FN(4, 4, 4, 2, 2, opt, (int32_t), (int32_t)) 106 #endif 107 108 FNS(ssse3) 109 110 #undef FNS 111 #undef FN 112 113 // The 2 unused parameters are place holders for PIC enabled build. 114 #define DECL(w, opt) \ 115 int aom_sub_pixel_avg_variance##w##xh_##opt( \ 116 const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ 117 const uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *sec, \ 118 ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \ 119 void *unused) 120 #define DECLS(opt) \ 121 DECL(4, opt); \ 122 DECL(8, opt); \ 123 DECL(16, opt) 124 125 DECLS(ssse3); 126 #undef DECL 127 #undef DECLS 128 129 #define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ 130 unsigned int aom_sub_pixel_avg_variance##w##x##h##_##opt( \ 131 const uint8_t *src, int src_stride, int x_offset, int y_offset, \ 132 const uint8_t *dst, int dst_stride, unsigned int *sse_ptr, \ 133 const uint8_t *sec) { \ 134 /*Avoid overflow in helper by capping height.*/ \ 135 const int hf = AOMMIN(h, 64); \ 136 unsigned int sse = 0; \ 137 int se = 0; \ 138 for (int i = 0; i < (w / wf); ++i) { \ 139 const uint8_t *src_ptr = src; \ 140 const uint8_t *dst_ptr = dst; \ 141 const uint8_t *sec_ptr = sec; \ 142 for (int j = 0; j < (h / hf); ++j) { \ 143 unsigned int sse2; \ 144 const int se2 = aom_sub_pixel_avg_variance##wf##xh_##opt( \ 145 src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, \ 146 sec_ptr, w, hf, &sse2, NULL, NULL); \ 147 dst_ptr += hf * dst_stride; \ 148 src_ptr += hf * src_stride; \ 149 sec_ptr += hf * w; \ 150 se += se2; \ 151 sse += sse2; \ 152 } \ 153 src += wf; \ 154 dst += wf; \ 155 sec += wf; \ 156 } \ 157 *sse_ptr = sse; \ 158 return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ 159 } 160 161 #if !CONFIG_REALTIME_ONLY 162 #define FNS(opt) \ 163 FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)) \ 164 FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)) \ 165 FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)) \ 166 FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)) \ 167 FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)) \ 168 FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)) \ 169 FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)) \ 170 FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)) \ 171 FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)) \ 172 FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)) \ 173 FN(16, 8, 16, 4, 3, opt, (uint32_t), (int32_t)) \ 174 FN(8, 16, 8, 3, 4, opt, (uint32_t), (int32_t)) \ 175 FN(8, 8, 8, 3, 3, opt, (uint32_t), (int32_t)) \ 176 FN(8, 4, 8, 3, 2, opt, (uint32_t), (int32_t)) \ 177 FN(4, 8, 4, 2, 3, opt, (uint32_t), (int32_t)) \ 178 FN(4, 4, 4, 2, 2, opt, (uint32_t), (int32_t)) \ 179 FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)) \ 180 FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)) \ 181 FN(8, 32, 8, 3, 5, opt, (uint32_t), (int64_t)) \ 182 FN(32, 8, 16, 5, 3, opt, (uint32_t), (int64_t)) \ 183 FN(16, 64, 16, 4, 6, opt, (int64_t), (int64_t)) \ 184 FN(64, 16, 16, 6, 4, opt, (int64_t), (int64_t)) 185 #else 186 #define FNS(opt) \ 187 FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)) \ 188 FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)) \ 189 FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)) \ 190 FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)) \ 191 FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)) \ 192 FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)) \ 193 FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)) \ 194 FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)) \ 195 FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)) \ 196 FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)) \ 197 FN(16, 8, 16, 4, 3, opt, (uint32_t), (int32_t)) \ 198 FN(8, 16, 8, 3, 4, opt, (uint32_t), (int32_t)) \ 199 FN(8, 8, 8, 3, 3, opt, (uint32_t), (int32_t)) \ 200 FN(8, 4, 8, 3, 2, opt, (uint32_t), (int32_t)) \ 201 FN(4, 8, 4, 2, 3, opt, (uint32_t), (int32_t)) \ 202 FN(4, 4, 4, 2, 2, opt, (uint32_t), (int32_t)) 203 #endif 204 205 FNS(ssse3) 206 207 #undef FNS 208 #undef FN 209