1 /* 2 * Copyright (c) 2022 Samsung Electronics Co., Ltd. 3 * All Rights Reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * - Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * - Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * - Neither the name of the copyright owner, nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #ifndef _OAPV_TQ_AVX_H_ 33 #define _OAPV_TQ_AVX_H_ 34 35 36 /////////////////////////////////////////////////////////////////////////////// 37 // start of encoder code 38 #if ENABLE_ENCODER 39 /////////////////////////////////////////////////////////////////////////////// 40 41 #if X86_SSE 42 43 #define CALCU_2x8(c0, c1, d0, d1) \ 44 v0 = _mm256_madd_epi16(s0, c0); \ 45 v1 = _mm256_madd_epi16(s1, c0); \ 46 v2 = _mm256_madd_epi16(s2, c0); \ 47 v3 = _mm256_madd_epi16(s3, c0); \ 48 v4 = _mm256_madd_epi16(s0, c1); \ 49 v5 = _mm256_madd_epi16(s1, c1); \ 50 v6 = _mm256_madd_epi16(s2, c1); \ 51 v7 = _mm256_madd_epi16(s3, c1); \ 52 v0 = _mm256_hadd_epi32(v0, v1); \ 53 v2 = _mm256_hadd_epi32(v2, v3); \ 54 v4 = _mm256_hadd_epi32(v4, v5); \ 55 v6 = _mm256_hadd_epi32(v6, v7); \ 56 d0 = _mm256_hadd_epi32(v0, v2); \ 57 d1 = _mm256_hadd_epi32(v4, v6) 58 59 #define CALCU_2x8_ADD_SHIFT(d0, d1, d2, d3, add, shift)\ 60 d0 = _mm256_add_epi32(d0, add); \ 61 d1 = _mm256_add_epi32(d1, add); \ 62 d2 = _mm256_add_epi32(d2, add); \ 63 d3 = _mm256_add_epi32(d3, add); \ 64 d0 = _mm256_srai_epi32(d0, shift); \ 65 d1 = _mm256_srai_epi32(d1, shift); \ 66 d2 = _mm256_srai_epi32(d2, shift); \ 67 d3 = _mm256_srai_epi32(d3, shift); 68 69 #define CALCU_2x4(c0, c1, c2, c3, d0, d1) \ 70 v0 = _mm256_madd_epi16(s0, c0); \ 71 v1 = _mm256_madd_epi16(s1, c0); \ 72 v2 = _mm256_madd_epi16(s0, c1); \ 73 v3 = _mm256_madd_epi16(s1, c1); \ 74 v4 = _mm256_madd_epi16(s0, c2); \ 75 v5 = _mm256_madd_epi16(s1, c2); \ 76 v6 = _mm256_madd_epi16(s0, c3); \ 77 v7 = _mm256_madd_epi16(s1, c3); \ 78 v0 = _mm256_hadd_epi32(v0, v1); \ 79 v2 = _mm256_hadd_epi32(v2, v3); \ 80 v4 = _mm256_hadd_epi32(v4, v5); \ 81 v6 = _mm256_hadd_epi32(v6, v7); \ 82 d0 = _mm256_hadd_epi32(v0, v2); \ 83 d1 = _mm256_hadd_epi32(v4, v6); \ 84 d0 = _mm256_permute4x64_epi64(d0, 0xd8); \ 85 d1 = _mm256_permute4x64_epi64(d1, 0xd8) 86 87 #define CALCU_LINE_1x8(coeff0, dst) \ 88 v0 = _mm256_madd_epi16(s00, coeff0); \ 89 v1 = _mm256_madd_epi16(s01, coeff0); \ 90 v2 = _mm256_madd_epi16(s02, coeff0); \ 91 v3 = _mm256_madd_epi16(s03, coeff0); \ 92 v4 = _mm256_madd_epi16(s04, coeff0); \ 93 v5 = _mm256_madd_epi16(s05, coeff0); \ 94 v6 = _mm256_madd_epi16(s06, coeff0); \ 95 v7 = _mm256_madd_epi16(s07, coeff0); \ 96 v0 = _mm256_hadd_epi32(v0, v1); \ 97 v2 = _mm256_hadd_epi32(v2, v3); \ 98 v4 = _mm256_hadd_epi32(v4, v5); \ 99 v6 = _mm256_hadd_epi32(v6, v7); \ 100 v0 = _mm256_hadd_epi32(v0, v2); \ 101 v4 = _mm256_hadd_epi32(v4, v6); \ 102 v1 = _mm256_permute2x128_si256(v0, v4, 0x20); \ 103 v2 = _mm256_permute2x128_si256(v0, v4, 0x31); \ 104 dst = _mm256_add_epi32(v1, v2) 105 106 #define CALCU_LINE_1x8_ADD_SHIFT(d0, d1, d2, d3, d4, d5, d6, d7, add, shift) \ 107 d0 = _mm256_add_epi32(d0, add); \ 108 d1 = _mm256_add_epi32(d1, add); \ 109 d2 = _mm256_add_epi32(d2, add); \ 110 d3 = _mm256_add_epi32(d3, add); \ 111 d4 = _mm256_add_epi32(d4, add); \ 112 d5 = _mm256_add_epi32(d5, add); \ 113 d6 = _mm256_add_epi32(d6, add); \ 114 d7 = _mm256_add_epi32(d7, add); \ 115 d0 = _mm256_srai_epi32(d0, shift); \ 116 d1 = _mm256_srai_epi32(d1, shift); \ 117 d2 = _mm256_srai_epi32(d2, shift); \ 118 d3 = _mm256_srai_epi32(d3, shift); \ 119 d4 = _mm256_srai_epi32(d4, shift); \ 120 d5 = _mm256_srai_epi32(d5, shift); \ 121 d6 = _mm256_srai_epi32(d6, shift); \ 122 d7 = _mm256_srai_epi32(d7, shift) 123 #endif /* X86_SSE */ 124 125 /////////////////////////////////////////////////////////////////////////////// 126 // end of encoder code 127 #endif // ENABLE_ENCODER 128 /////////////////////////////////////////////////////////////////////////////// 129 130 131 #if X86_SSE 132 extern const oapv_fn_tx_t oapv_tbl_fn_txb_avx[2]; 133 extern const oapv_fn_quant_t oapv_tbl_fn_quant_avx[2]; 134 extern const oapv_fn_itx_part_t oapv_tbl_fn_itx_part_avx[2]; 135 extern const oapv_fn_itx_t oapv_tbl_fn_itx_avx[2]; 136 extern const oapv_fn_dquant_t oapv_tbl_fn_dquant_avx[2]; 137 extern const oapv_fn_itx_adj_t oapv_tbl_fn_itx_adj_avx[2]; 138 #endif /* X86_SSE */ 139 140 141 #endif /* _OAPV_TQ_AVX_H_ */ 142