1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker *
4*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker */
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker #include <arm_neon.h>
12*fb1b10abSAndroid Build Coastguard Worker
13*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_config.h"
14*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
15*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/txfm_common.h"
16*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/vpx_dsp_common.h"
17*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/arm/idct_neon.h"
18*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/arm/fdct_neon.h"
19*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/arm/mem_neon.h"
20*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/arm/transpose_neon.h"
21*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/arm/fdct4x4_neon.h"
22*fb1b10abSAndroid Build Coastguard Worker
vpx_fdct4x4_neon(const int16_t * input,tran_low_t * final_output,int stride)23*fb1b10abSAndroid Build Coastguard Worker void vpx_fdct4x4_neon(const int16_t *input, tran_low_t *final_output,
24*fb1b10abSAndroid Build Coastguard Worker int stride) {
25*fb1b10abSAndroid Build Coastguard Worker // input[M * stride] * 16
26*fb1b10abSAndroid Build Coastguard Worker int16x4_t in[4];
27*fb1b10abSAndroid Build Coastguard Worker in[0] = vshl_n_s16(vld1_s16(input + 0 * stride), 4);
28*fb1b10abSAndroid Build Coastguard Worker in[1] = vshl_n_s16(vld1_s16(input + 1 * stride), 4);
29*fb1b10abSAndroid Build Coastguard Worker in[2] = vshl_n_s16(vld1_s16(input + 2 * stride), 4);
30*fb1b10abSAndroid Build Coastguard Worker in[3] = vshl_n_s16(vld1_s16(input + 3 * stride), 4);
31*fb1b10abSAndroid Build Coastguard Worker
32*fb1b10abSAndroid Build Coastguard Worker // If the very first value != 0, then add 1.
33*fb1b10abSAndroid Build Coastguard Worker if (input[0] != 0) {
34*fb1b10abSAndroid Build Coastguard Worker const int16x4_t one = vreinterpret_s16_s64(vdup_n_s64(1));
35*fb1b10abSAndroid Build Coastguard Worker in[0] = vadd_s16(in[0], one);
36*fb1b10abSAndroid Build Coastguard Worker }
37*fb1b10abSAndroid Build Coastguard Worker vpx_fdct4x4_pass1_neon(in);
38*fb1b10abSAndroid Build Coastguard Worker vpx_fdct4x4_pass2_neon(in);
39*fb1b10abSAndroid Build Coastguard Worker {
40*fb1b10abSAndroid Build Coastguard Worker // Not quite a rounding shift. Only add 1 despite shifting by 2.
41*fb1b10abSAndroid Build Coastguard Worker const int16x8_t one = vdupq_n_s16(1);
42*fb1b10abSAndroid Build Coastguard Worker int16x8_t out_01 = vcombine_s16(in[0], in[1]);
43*fb1b10abSAndroid Build Coastguard Worker int16x8_t out_23 = vcombine_s16(in[2], in[3]);
44*fb1b10abSAndroid Build Coastguard Worker out_01 = vshrq_n_s16(vaddq_s16(out_01, one), 2);
45*fb1b10abSAndroid Build Coastguard Worker out_23 = vshrq_n_s16(vaddq_s16(out_23, one), 2);
46*fb1b10abSAndroid Build Coastguard Worker store_s16q_to_tran_low(final_output + 0 * 8, out_01);
47*fb1b10abSAndroid Build Coastguard Worker store_s16q_to_tran_low(final_output + 1 * 8, out_23);
48*fb1b10abSAndroid Build Coastguard Worker }
49*fb1b10abSAndroid Build Coastguard Worker }
50*fb1b10abSAndroid Build Coastguard Worker
51*fb1b10abSAndroid Build Coastguard Worker #if CONFIG_VP9_HIGHBITDEPTH
52*fb1b10abSAndroid Build Coastguard Worker
vpx_highbd_fdct4x4_neon(const int16_t * input,tran_low_t * final_output,int stride)53*fb1b10abSAndroid Build Coastguard Worker void vpx_highbd_fdct4x4_neon(const int16_t *input, tran_low_t *final_output,
54*fb1b10abSAndroid Build Coastguard Worker int stride) {
55*fb1b10abSAndroid Build Coastguard Worker const int32x4_t const_one = vdupq_n_s32(1);
56*fb1b10abSAndroid Build Coastguard Worker
57*fb1b10abSAndroid Build Coastguard Worker // input[M * stride] * 16
58*fb1b10abSAndroid Build Coastguard Worker int32x4_t in[4];
59*fb1b10abSAndroid Build Coastguard Worker in[0] = vshll_n_s16(vld1_s16(input + 0 * stride), 4);
60*fb1b10abSAndroid Build Coastguard Worker in[1] = vshll_n_s16(vld1_s16(input + 1 * stride), 4);
61*fb1b10abSAndroid Build Coastguard Worker in[2] = vshll_n_s16(vld1_s16(input + 2 * stride), 4);
62*fb1b10abSAndroid Build Coastguard Worker in[3] = vshll_n_s16(vld1_s16(input + 3 * stride), 4);
63*fb1b10abSAndroid Build Coastguard Worker
64*fb1b10abSAndroid Build Coastguard Worker // If the very first value != 0, then add 1.
65*fb1b10abSAndroid Build Coastguard Worker if (input[0] != 0) {
66*fb1b10abSAndroid Build Coastguard Worker static const int32_t k1000[4] = { 1, 0, 0, 0 };
67*fb1b10abSAndroid Build Coastguard Worker in[0] = vaddq_s32(in[0], vld1q_s32(k1000));
68*fb1b10abSAndroid Build Coastguard Worker }
69*fb1b10abSAndroid Build Coastguard Worker
70*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_fdct4x4_pass1_neon(in);
71*fb1b10abSAndroid Build Coastguard Worker vpx_highbd_fdct4x4_pass1_neon(in);
72*fb1b10abSAndroid Build Coastguard Worker {
73*fb1b10abSAndroid Build Coastguard Worker // Not quite a rounding shift. Only add 1 despite shifting by 2.
74*fb1b10abSAndroid Build Coastguard Worker in[0] = vshrq_n_s32(vaddq_s32(in[0], const_one), 2);
75*fb1b10abSAndroid Build Coastguard Worker in[1] = vshrq_n_s32(vaddq_s32(in[1], const_one), 2);
76*fb1b10abSAndroid Build Coastguard Worker in[2] = vshrq_n_s32(vaddq_s32(in[2], const_one), 2);
77*fb1b10abSAndroid Build Coastguard Worker in[3] = vshrq_n_s32(vaddq_s32(in[3], const_one), 2);
78*fb1b10abSAndroid Build Coastguard Worker
79*fb1b10abSAndroid Build Coastguard Worker vst1q_s32(final_output, in[0]);
80*fb1b10abSAndroid Build Coastguard Worker vst1q_s32(final_output + 4, in[1]);
81*fb1b10abSAndroid Build Coastguard Worker vst1q_s32(final_output + 8, in[2]);
82*fb1b10abSAndroid Build Coastguard Worker vst1q_s32(final_output + 12, in[3]);
83*fb1b10abSAndroid Build Coastguard Worker }
84*fb1b10abSAndroid Build Coastguard Worker }
85*fb1b10abSAndroid Build Coastguard Worker #endif // CONFIG_VP9_HIGHBITDEPTH
86