1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker *
4*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker */
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker #include "vp8/encoder/denoising.h"
12*fb1b10abSAndroid Build Coastguard Worker #include "vp8/common/reconinter.h"
13*fb1b10abSAndroid Build Coastguard Worker #include "vpx/vpx_integer.h"
14*fb1b10abSAndroid Build Coastguard Worker #include "vpx_mem/vpx_mem.h"
15*fb1b10abSAndroid Build Coastguard Worker #include "vp8_rtcd.h"
16*fb1b10abSAndroid Build Coastguard Worker
17*fb1b10abSAndroid Build Coastguard Worker #include <emmintrin.h>
18*fb1b10abSAndroid Build Coastguard Worker #include "vpx_ports/emmintrin_compat.h"
19*fb1b10abSAndroid Build Coastguard Worker
20*fb1b10abSAndroid Build Coastguard Worker /* Compute the sum of all pixel differences of this MB. */
abs_sum_diff_16x1(__m128i acc_diff)21*fb1b10abSAndroid Build Coastguard Worker static INLINE unsigned int abs_sum_diff_16x1(__m128i acc_diff) {
22*fb1b10abSAndroid Build Coastguard Worker const __m128i k_1 = _mm_set1_epi16(1);
23*fb1b10abSAndroid Build Coastguard Worker const __m128i acc_diff_lo =
24*fb1b10abSAndroid Build Coastguard Worker _mm_srai_epi16(_mm_unpacklo_epi8(acc_diff, acc_diff), 8);
25*fb1b10abSAndroid Build Coastguard Worker const __m128i acc_diff_hi =
26*fb1b10abSAndroid Build Coastguard Worker _mm_srai_epi16(_mm_unpackhi_epi8(acc_diff, acc_diff), 8);
27*fb1b10abSAndroid Build Coastguard Worker const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi);
28*fb1b10abSAndroid Build Coastguard Worker const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1);
29*fb1b10abSAndroid Build Coastguard Worker const __m128i hgfe_dcba =
30*fb1b10abSAndroid Build Coastguard Worker _mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8));
31*fb1b10abSAndroid Build Coastguard Worker const __m128i hgfedcba =
32*fb1b10abSAndroid Build Coastguard Worker _mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4));
33*fb1b10abSAndroid Build Coastguard Worker unsigned int sum_diff = (unsigned int)abs(_mm_cvtsi128_si32(hgfedcba));
34*fb1b10abSAndroid Build Coastguard Worker
35*fb1b10abSAndroid Build Coastguard Worker return sum_diff;
36*fb1b10abSAndroid Build Coastguard Worker }
37*fb1b10abSAndroid Build Coastguard Worker
vp8_denoiser_filter_sse2(unsigned char * mc_running_avg_y,int mc_avg_y_stride,unsigned char * running_avg_y,int avg_y_stride,unsigned char * sig,int sig_stride,unsigned int motion_magnitude,int increase_denoising)38*fb1b10abSAndroid Build Coastguard Worker int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
39*fb1b10abSAndroid Build Coastguard Worker int mc_avg_y_stride, unsigned char *running_avg_y,
40*fb1b10abSAndroid Build Coastguard Worker int avg_y_stride, unsigned char *sig,
41*fb1b10abSAndroid Build Coastguard Worker int sig_stride, unsigned int motion_magnitude,
42*fb1b10abSAndroid Build Coastguard Worker int increase_denoising) {
43*fb1b10abSAndroid Build Coastguard Worker unsigned char *running_avg_y_start = running_avg_y;
44*fb1b10abSAndroid Build Coastguard Worker unsigned char *sig_start = sig;
45*fb1b10abSAndroid Build Coastguard Worker unsigned int sum_diff_thresh;
46*fb1b10abSAndroid Build Coastguard Worker int r;
47*fb1b10abSAndroid Build Coastguard Worker int shift_inc =
48*fb1b10abSAndroid Build Coastguard Worker (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
49*fb1b10abSAndroid Build Coastguard Worker ? 1
50*fb1b10abSAndroid Build Coastguard Worker : 0;
51*fb1b10abSAndroid Build Coastguard Worker __m128i acc_diff = _mm_setzero_si128();
52*fb1b10abSAndroid Build Coastguard Worker const __m128i k_0 = _mm_setzero_si128();
53*fb1b10abSAndroid Build Coastguard Worker const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
54*fb1b10abSAndroid Build Coastguard Worker const __m128i k_8 = _mm_set1_epi8(8);
55*fb1b10abSAndroid Build Coastguard Worker const __m128i k_16 = _mm_set1_epi8(16);
56*fb1b10abSAndroid Build Coastguard Worker /* Modify each level's adjustment according to motion_magnitude. */
57*fb1b10abSAndroid Build Coastguard Worker const __m128i l3 = _mm_set1_epi8(
58*fb1b10abSAndroid Build Coastguard Worker (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6);
59*fb1b10abSAndroid Build Coastguard Worker /* Difference between level 3 and level 2 is 2. */
60*fb1b10abSAndroid Build Coastguard Worker const __m128i l32 = _mm_set1_epi8(2);
61*fb1b10abSAndroid Build Coastguard Worker /* Difference between level 2 and level 1 is 1. */
62*fb1b10abSAndroid Build Coastguard Worker const __m128i l21 = _mm_set1_epi8(1);
63*fb1b10abSAndroid Build Coastguard Worker
64*fb1b10abSAndroid Build Coastguard Worker for (r = 0; r < 16; ++r) {
65*fb1b10abSAndroid Build Coastguard Worker /* Calculate differences */
66*fb1b10abSAndroid Build Coastguard Worker const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
67*fb1b10abSAndroid Build Coastguard Worker const __m128i v_mc_running_avg_y =
68*fb1b10abSAndroid Build Coastguard Worker _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0]));
69*fb1b10abSAndroid Build Coastguard Worker __m128i v_running_avg_y;
70*fb1b10abSAndroid Build Coastguard Worker const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
71*fb1b10abSAndroid Build Coastguard Worker const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
72*fb1b10abSAndroid Build Coastguard Worker /* Obtain the sign. FF if diff is negative. */
73*fb1b10abSAndroid Build Coastguard Worker const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
74*fb1b10abSAndroid Build Coastguard Worker /* Clamp absolute difference to 16 to be used to get mask. Doing this
75*fb1b10abSAndroid Build Coastguard Worker * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */
76*fb1b10abSAndroid Build Coastguard Worker const __m128i clamped_absdiff =
77*fb1b10abSAndroid Build Coastguard Worker _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_16);
78*fb1b10abSAndroid Build Coastguard Worker /* Get masks for l2 l1 and l0 adjustments */
79*fb1b10abSAndroid Build Coastguard Worker const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
80*fb1b10abSAndroid Build Coastguard Worker const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
81*fb1b10abSAndroid Build Coastguard Worker const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
82*fb1b10abSAndroid Build Coastguard Worker /* Get adjustments for l2, l1, and l0 */
83*fb1b10abSAndroid Build Coastguard Worker __m128i adj2 = _mm_and_si128(mask2, l32);
84*fb1b10abSAndroid Build Coastguard Worker const __m128i adj1 = _mm_and_si128(mask1, l21);
85*fb1b10abSAndroid Build Coastguard Worker const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
86*fb1b10abSAndroid Build Coastguard Worker __m128i adj, padj, nadj;
87*fb1b10abSAndroid Build Coastguard Worker
88*fb1b10abSAndroid Build Coastguard Worker /* Combine the adjustments and get absolute adjustments. */
89*fb1b10abSAndroid Build Coastguard Worker adj2 = _mm_add_epi8(adj2, adj1);
90*fb1b10abSAndroid Build Coastguard Worker adj = _mm_sub_epi8(l3, adj2);
91*fb1b10abSAndroid Build Coastguard Worker adj = _mm_andnot_si128(mask0, adj);
92*fb1b10abSAndroid Build Coastguard Worker adj = _mm_or_si128(adj, adj0);
93*fb1b10abSAndroid Build Coastguard Worker
94*fb1b10abSAndroid Build Coastguard Worker /* Restore the sign and get positive and negative adjustments. */
95*fb1b10abSAndroid Build Coastguard Worker padj = _mm_andnot_si128(diff_sign, adj);
96*fb1b10abSAndroid Build Coastguard Worker nadj = _mm_and_si128(diff_sign, adj);
97*fb1b10abSAndroid Build Coastguard Worker
98*fb1b10abSAndroid Build Coastguard Worker /* Calculate filtered value. */
99*fb1b10abSAndroid Build Coastguard Worker v_running_avg_y = _mm_adds_epu8(v_sig, padj);
100*fb1b10abSAndroid Build Coastguard Worker v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj);
101*fb1b10abSAndroid Build Coastguard Worker _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y);
102*fb1b10abSAndroid Build Coastguard Worker
103*fb1b10abSAndroid Build Coastguard Worker /* Adjustments <=7, and each element in acc_diff can fit in signed
104*fb1b10abSAndroid Build Coastguard Worker * char.
105*fb1b10abSAndroid Build Coastguard Worker */
106*fb1b10abSAndroid Build Coastguard Worker acc_diff = _mm_adds_epi8(acc_diff, padj);
107*fb1b10abSAndroid Build Coastguard Worker acc_diff = _mm_subs_epi8(acc_diff, nadj);
108*fb1b10abSAndroid Build Coastguard Worker
109*fb1b10abSAndroid Build Coastguard Worker /* Update pointers for next iteration. */
110*fb1b10abSAndroid Build Coastguard Worker sig += sig_stride;
111*fb1b10abSAndroid Build Coastguard Worker mc_running_avg_y += mc_avg_y_stride;
112*fb1b10abSAndroid Build Coastguard Worker running_avg_y += avg_y_stride;
113*fb1b10abSAndroid Build Coastguard Worker }
114*fb1b10abSAndroid Build Coastguard Worker
115*fb1b10abSAndroid Build Coastguard Worker {
116*fb1b10abSAndroid Build Coastguard Worker /* Compute the sum of all pixel differences of this MB. */
117*fb1b10abSAndroid Build Coastguard Worker unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff);
118*fb1b10abSAndroid Build Coastguard Worker sum_diff_thresh = SUM_DIFF_THRESHOLD;
119*fb1b10abSAndroid Build Coastguard Worker if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
120*fb1b10abSAndroid Build Coastguard Worker if (abs_sum_diff > sum_diff_thresh) {
121*fb1b10abSAndroid Build Coastguard Worker // Before returning to copy the block (i.e., apply no denoising),
122*fb1b10abSAndroid Build Coastguard Worker // check if we can still apply some (weaker) temporal filtering to
123*fb1b10abSAndroid Build Coastguard Worker // this block, that would otherwise not be denoised at all. Simplest
124*fb1b10abSAndroid Build Coastguard Worker // is to apply an additional adjustment to running_avg_y to bring it
125*fb1b10abSAndroid Build Coastguard Worker // closer to sig. The adjustment is capped by a maximum delta, and
126*fb1b10abSAndroid Build Coastguard Worker // chosen such that in most cases the resulting sum_diff will be
127*fb1b10abSAndroid Build Coastguard Worker // within the acceptable range given by sum_diff_thresh.
128*fb1b10abSAndroid Build Coastguard Worker
129*fb1b10abSAndroid Build Coastguard Worker // The delta is set by the excess of absolute pixel diff over the
130*fb1b10abSAndroid Build Coastguard Worker // threshold.
131*fb1b10abSAndroid Build Coastguard Worker int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1;
132*fb1b10abSAndroid Build Coastguard Worker // Only apply the adjustment for max delta up to 3.
133*fb1b10abSAndroid Build Coastguard Worker if (delta < 4) {
134*fb1b10abSAndroid Build Coastguard Worker const __m128i k_delta = _mm_set1_epi8(delta);
135*fb1b10abSAndroid Build Coastguard Worker sig -= sig_stride * 16;
136*fb1b10abSAndroid Build Coastguard Worker mc_running_avg_y -= mc_avg_y_stride * 16;
137*fb1b10abSAndroid Build Coastguard Worker running_avg_y -= avg_y_stride * 16;
138*fb1b10abSAndroid Build Coastguard Worker for (r = 0; r < 16; ++r) {
139*fb1b10abSAndroid Build Coastguard Worker __m128i v_running_avg_y =
140*fb1b10abSAndroid Build Coastguard Worker _mm_loadu_si128((__m128i *)(&running_avg_y[0]));
141*fb1b10abSAndroid Build Coastguard Worker // Calculate differences.
142*fb1b10abSAndroid Build Coastguard Worker const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
143*fb1b10abSAndroid Build Coastguard Worker const __m128i v_mc_running_avg_y =
144*fb1b10abSAndroid Build Coastguard Worker _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0]));
145*fb1b10abSAndroid Build Coastguard Worker const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
146*fb1b10abSAndroid Build Coastguard Worker const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
147*fb1b10abSAndroid Build Coastguard Worker // Obtain the sign. FF if diff is negative.
148*fb1b10abSAndroid Build Coastguard Worker const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
149*fb1b10abSAndroid Build Coastguard Worker // Clamp absolute difference to delta to get the adjustment.
150*fb1b10abSAndroid Build Coastguard Worker const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
151*fb1b10abSAndroid Build Coastguard Worker // Restore the sign and get positive and negative adjustments.
152*fb1b10abSAndroid Build Coastguard Worker __m128i padj, nadj;
153*fb1b10abSAndroid Build Coastguard Worker padj = _mm_andnot_si128(diff_sign, adj);
154*fb1b10abSAndroid Build Coastguard Worker nadj = _mm_and_si128(diff_sign, adj);
155*fb1b10abSAndroid Build Coastguard Worker // Calculate filtered value.
156*fb1b10abSAndroid Build Coastguard Worker v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj);
157*fb1b10abSAndroid Build Coastguard Worker v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj);
158*fb1b10abSAndroid Build Coastguard Worker _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y);
159*fb1b10abSAndroid Build Coastguard Worker
160*fb1b10abSAndroid Build Coastguard Worker // Accumulate the adjustments.
161*fb1b10abSAndroid Build Coastguard Worker acc_diff = _mm_subs_epi8(acc_diff, padj);
162*fb1b10abSAndroid Build Coastguard Worker acc_diff = _mm_adds_epi8(acc_diff, nadj);
163*fb1b10abSAndroid Build Coastguard Worker
164*fb1b10abSAndroid Build Coastguard Worker // Update pointers for next iteration.
165*fb1b10abSAndroid Build Coastguard Worker sig += sig_stride;
166*fb1b10abSAndroid Build Coastguard Worker mc_running_avg_y += mc_avg_y_stride;
167*fb1b10abSAndroid Build Coastguard Worker running_avg_y += avg_y_stride;
168*fb1b10abSAndroid Build Coastguard Worker }
169*fb1b10abSAndroid Build Coastguard Worker abs_sum_diff = abs_sum_diff_16x1(acc_diff);
170*fb1b10abSAndroid Build Coastguard Worker if (abs_sum_diff > sum_diff_thresh) {
171*fb1b10abSAndroid Build Coastguard Worker return COPY_BLOCK;
172*fb1b10abSAndroid Build Coastguard Worker }
173*fb1b10abSAndroid Build Coastguard Worker } else {
174*fb1b10abSAndroid Build Coastguard Worker return COPY_BLOCK;
175*fb1b10abSAndroid Build Coastguard Worker }
176*fb1b10abSAndroid Build Coastguard Worker }
177*fb1b10abSAndroid Build Coastguard Worker }
178*fb1b10abSAndroid Build Coastguard Worker
179*fb1b10abSAndroid Build Coastguard Worker vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
180*fb1b10abSAndroid Build Coastguard Worker return FILTER_BLOCK;
181*fb1b10abSAndroid Build Coastguard Worker }
182*fb1b10abSAndroid Build Coastguard Worker
vp8_denoiser_filter_uv_sse2(unsigned char * mc_running_avg,int mc_avg_stride,unsigned char * running_avg,int avg_stride,unsigned char * sig,int sig_stride,unsigned int motion_magnitude,int increase_denoising)183*fb1b10abSAndroid Build Coastguard Worker int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg,
184*fb1b10abSAndroid Build Coastguard Worker int mc_avg_stride, unsigned char *running_avg,
185*fb1b10abSAndroid Build Coastguard Worker int avg_stride, unsigned char *sig,
186*fb1b10abSAndroid Build Coastguard Worker int sig_stride, unsigned int motion_magnitude,
187*fb1b10abSAndroid Build Coastguard Worker int increase_denoising) {
188*fb1b10abSAndroid Build Coastguard Worker unsigned char *running_avg_start = running_avg;
189*fb1b10abSAndroid Build Coastguard Worker unsigned char *sig_start = sig;
190*fb1b10abSAndroid Build Coastguard Worker unsigned int sum_diff_thresh;
191*fb1b10abSAndroid Build Coastguard Worker int r;
192*fb1b10abSAndroid Build Coastguard Worker int shift_inc =
193*fb1b10abSAndroid Build Coastguard Worker (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV)
194*fb1b10abSAndroid Build Coastguard Worker ? 1
195*fb1b10abSAndroid Build Coastguard Worker : 0;
196*fb1b10abSAndroid Build Coastguard Worker __m128i acc_diff = _mm_setzero_si128();
197*fb1b10abSAndroid Build Coastguard Worker const __m128i k_0 = _mm_setzero_si128();
198*fb1b10abSAndroid Build Coastguard Worker const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
199*fb1b10abSAndroid Build Coastguard Worker const __m128i k_8 = _mm_set1_epi8(8);
200*fb1b10abSAndroid Build Coastguard Worker const __m128i k_16 = _mm_set1_epi8(16);
201*fb1b10abSAndroid Build Coastguard Worker /* Modify each level's adjustment according to motion_magnitude. */
202*fb1b10abSAndroid Build Coastguard Worker const __m128i l3 = _mm_set1_epi8(
203*fb1b10abSAndroid Build Coastguard Worker (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 7 + shift_inc : 6);
204*fb1b10abSAndroid Build Coastguard Worker /* Difference between level 3 and level 2 is 2. */
205*fb1b10abSAndroid Build Coastguard Worker const __m128i l32 = _mm_set1_epi8(2);
206*fb1b10abSAndroid Build Coastguard Worker /* Difference between level 2 and level 1 is 1. */
207*fb1b10abSAndroid Build Coastguard Worker const __m128i l21 = _mm_set1_epi8(1);
208*fb1b10abSAndroid Build Coastguard Worker
209*fb1b10abSAndroid Build Coastguard Worker {
210*fb1b10abSAndroid Build Coastguard Worker const __m128i k_1 = _mm_set1_epi16(1);
211*fb1b10abSAndroid Build Coastguard Worker __m128i vec_sum_block = _mm_setzero_si128();
212*fb1b10abSAndroid Build Coastguard Worker
213*fb1b10abSAndroid Build Coastguard Worker // Avoid denoising color signal if its close to average level.
214*fb1b10abSAndroid Build Coastguard Worker for (r = 0; r < 8; ++r) {
215*fb1b10abSAndroid Build Coastguard Worker const __m128i v_sig = _mm_loadl_epi64((__m128i *)(&sig[0]));
216*fb1b10abSAndroid Build Coastguard Worker const __m128i v_sig_unpack = _mm_unpacklo_epi8(v_sig, k_0);
217*fb1b10abSAndroid Build Coastguard Worker vec_sum_block = _mm_add_epi16(vec_sum_block, v_sig_unpack);
218*fb1b10abSAndroid Build Coastguard Worker sig += sig_stride;
219*fb1b10abSAndroid Build Coastguard Worker }
220*fb1b10abSAndroid Build Coastguard Worker sig -= sig_stride * 8;
221*fb1b10abSAndroid Build Coastguard Worker {
222*fb1b10abSAndroid Build Coastguard Worker const __m128i hg_fe_dc_ba = _mm_madd_epi16(vec_sum_block, k_1);
223*fb1b10abSAndroid Build Coastguard Worker const __m128i hgfe_dcba =
224*fb1b10abSAndroid Build Coastguard Worker _mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8));
225*fb1b10abSAndroid Build Coastguard Worker const __m128i hgfedcba =
226*fb1b10abSAndroid Build Coastguard Worker _mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4));
227*fb1b10abSAndroid Build Coastguard Worker const int sum_block = _mm_cvtsi128_si32(hgfedcba);
228*fb1b10abSAndroid Build Coastguard Worker if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) {
229*fb1b10abSAndroid Build Coastguard Worker return COPY_BLOCK;
230*fb1b10abSAndroid Build Coastguard Worker }
231*fb1b10abSAndroid Build Coastguard Worker }
232*fb1b10abSAndroid Build Coastguard Worker }
233*fb1b10abSAndroid Build Coastguard Worker
234*fb1b10abSAndroid Build Coastguard Worker for (r = 0; r < 4; ++r) {
235*fb1b10abSAndroid Build Coastguard Worker /* Calculate differences */
236*fb1b10abSAndroid Build Coastguard Worker const __m128i v_sig_low =
237*fb1b10abSAndroid Build Coastguard Worker _mm_castpd_si128(_mm_load_sd((double *)(&sig[0])));
238*fb1b10abSAndroid Build Coastguard Worker const __m128i v_sig = _mm_castpd_si128(_mm_loadh_pd(
239*fb1b10abSAndroid Build Coastguard Worker _mm_castsi128_pd(v_sig_low), (double *)(&sig[sig_stride])));
240*fb1b10abSAndroid Build Coastguard Worker const __m128i v_mc_running_avg_low =
241*fb1b10abSAndroid Build Coastguard Worker _mm_castpd_si128(_mm_load_sd((double *)(&mc_running_avg[0])));
242*fb1b10abSAndroid Build Coastguard Worker const __m128i v_mc_running_avg = _mm_castpd_si128(
243*fb1b10abSAndroid Build Coastguard Worker _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low),
244*fb1b10abSAndroid Build Coastguard Worker (double *)(&mc_running_avg[mc_avg_stride])));
245*fb1b10abSAndroid Build Coastguard Worker const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig);
246*fb1b10abSAndroid Build Coastguard Worker const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg);
247*fb1b10abSAndroid Build Coastguard Worker /* Obtain the sign. FF if diff is negative. */
248*fb1b10abSAndroid Build Coastguard Worker const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
249*fb1b10abSAndroid Build Coastguard Worker /* Clamp absolute difference to 16 to be used to get mask. Doing this
250*fb1b10abSAndroid Build Coastguard Worker * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */
251*fb1b10abSAndroid Build Coastguard Worker const __m128i clamped_absdiff =
252*fb1b10abSAndroid Build Coastguard Worker _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_16);
253*fb1b10abSAndroid Build Coastguard Worker /* Get masks for l2 l1 and l0 adjustments */
254*fb1b10abSAndroid Build Coastguard Worker const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
255*fb1b10abSAndroid Build Coastguard Worker const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
256*fb1b10abSAndroid Build Coastguard Worker const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
257*fb1b10abSAndroid Build Coastguard Worker /* Get adjustments for l2, l1, and l0 */
258*fb1b10abSAndroid Build Coastguard Worker __m128i adj2 = _mm_and_si128(mask2, l32);
259*fb1b10abSAndroid Build Coastguard Worker const __m128i adj1 = _mm_and_si128(mask1, l21);
260*fb1b10abSAndroid Build Coastguard Worker const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
261*fb1b10abSAndroid Build Coastguard Worker __m128i adj, padj, nadj;
262*fb1b10abSAndroid Build Coastguard Worker __m128i v_running_avg;
263*fb1b10abSAndroid Build Coastguard Worker
264*fb1b10abSAndroid Build Coastguard Worker /* Combine the adjustments and get absolute adjustments. */
265*fb1b10abSAndroid Build Coastguard Worker adj2 = _mm_add_epi8(adj2, adj1);
266*fb1b10abSAndroid Build Coastguard Worker adj = _mm_sub_epi8(l3, adj2);
267*fb1b10abSAndroid Build Coastguard Worker adj = _mm_andnot_si128(mask0, adj);
268*fb1b10abSAndroid Build Coastguard Worker adj = _mm_or_si128(adj, adj0);
269*fb1b10abSAndroid Build Coastguard Worker
270*fb1b10abSAndroid Build Coastguard Worker /* Restore the sign and get positive and negative adjustments. */
271*fb1b10abSAndroid Build Coastguard Worker padj = _mm_andnot_si128(diff_sign, adj);
272*fb1b10abSAndroid Build Coastguard Worker nadj = _mm_and_si128(diff_sign, adj);
273*fb1b10abSAndroid Build Coastguard Worker
274*fb1b10abSAndroid Build Coastguard Worker /* Calculate filtered value. */
275*fb1b10abSAndroid Build Coastguard Worker v_running_avg = _mm_adds_epu8(v_sig, padj);
276*fb1b10abSAndroid Build Coastguard Worker v_running_avg = _mm_subs_epu8(v_running_avg, nadj);
277*fb1b10abSAndroid Build Coastguard Worker
278*fb1b10abSAndroid Build Coastguard Worker _mm_storel_pd((double *)&running_avg[0], _mm_castsi128_pd(v_running_avg));
279*fb1b10abSAndroid Build Coastguard Worker _mm_storeh_pd((double *)&running_avg[avg_stride],
280*fb1b10abSAndroid Build Coastguard Worker _mm_castsi128_pd(v_running_avg));
281*fb1b10abSAndroid Build Coastguard Worker
282*fb1b10abSAndroid Build Coastguard Worker /* Adjustments <=7, and each element in acc_diff can fit in signed
283*fb1b10abSAndroid Build Coastguard Worker * char.
284*fb1b10abSAndroid Build Coastguard Worker */
285*fb1b10abSAndroid Build Coastguard Worker acc_diff = _mm_adds_epi8(acc_diff, padj);
286*fb1b10abSAndroid Build Coastguard Worker acc_diff = _mm_subs_epi8(acc_diff, nadj);
287*fb1b10abSAndroid Build Coastguard Worker
288*fb1b10abSAndroid Build Coastguard Worker /* Update pointers for next iteration. */
289*fb1b10abSAndroid Build Coastguard Worker sig += sig_stride * 2;
290*fb1b10abSAndroid Build Coastguard Worker mc_running_avg += mc_avg_stride * 2;
291*fb1b10abSAndroid Build Coastguard Worker running_avg += avg_stride * 2;
292*fb1b10abSAndroid Build Coastguard Worker }
293*fb1b10abSAndroid Build Coastguard Worker
294*fb1b10abSAndroid Build Coastguard Worker {
295*fb1b10abSAndroid Build Coastguard Worker unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff);
296*fb1b10abSAndroid Build Coastguard Worker sum_diff_thresh = SUM_DIFF_THRESHOLD_UV;
297*fb1b10abSAndroid Build Coastguard Worker if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
298*fb1b10abSAndroid Build Coastguard Worker if (abs_sum_diff > sum_diff_thresh) {
299*fb1b10abSAndroid Build Coastguard Worker // Before returning to copy the block (i.e., apply no denoising),
300*fb1b10abSAndroid Build Coastguard Worker // check if we can still apply some (weaker) temporal filtering to
301*fb1b10abSAndroid Build Coastguard Worker // this block, that would otherwise not be denoised at all. Simplest
302*fb1b10abSAndroid Build Coastguard Worker // is to apply an additional adjustment to running_avg_y to bring it
303*fb1b10abSAndroid Build Coastguard Worker // closer to sig. The adjustment is capped by a maximum delta, and
304*fb1b10abSAndroid Build Coastguard Worker // chosen such that in most cases the resulting sum_diff will be
305*fb1b10abSAndroid Build Coastguard Worker // within the acceptable range given by sum_diff_thresh.
306*fb1b10abSAndroid Build Coastguard Worker
307*fb1b10abSAndroid Build Coastguard Worker // The delta is set by the excess of absolute pixel diff over the
308*fb1b10abSAndroid Build Coastguard Worker // threshold.
309*fb1b10abSAndroid Build Coastguard Worker int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1;
310*fb1b10abSAndroid Build Coastguard Worker // Only apply the adjustment for max delta up to 3.
311*fb1b10abSAndroid Build Coastguard Worker if (delta < 4) {
312*fb1b10abSAndroid Build Coastguard Worker const __m128i k_delta = _mm_set1_epi8(delta);
313*fb1b10abSAndroid Build Coastguard Worker sig -= sig_stride * 8;
314*fb1b10abSAndroid Build Coastguard Worker mc_running_avg -= mc_avg_stride * 8;
315*fb1b10abSAndroid Build Coastguard Worker running_avg -= avg_stride * 8;
316*fb1b10abSAndroid Build Coastguard Worker for (r = 0; r < 4; ++r) {
317*fb1b10abSAndroid Build Coastguard Worker // Calculate differences.
318*fb1b10abSAndroid Build Coastguard Worker const __m128i v_sig_low =
319*fb1b10abSAndroid Build Coastguard Worker _mm_castpd_si128(_mm_load_sd((double *)(&sig[0])));
320*fb1b10abSAndroid Build Coastguard Worker const __m128i v_sig = _mm_castpd_si128(_mm_loadh_pd(
321*fb1b10abSAndroid Build Coastguard Worker _mm_castsi128_pd(v_sig_low), (double *)(&sig[sig_stride])));
322*fb1b10abSAndroid Build Coastguard Worker const __m128i v_mc_running_avg_low =
323*fb1b10abSAndroid Build Coastguard Worker _mm_castpd_si128(_mm_load_sd((double *)(&mc_running_avg[0])));
324*fb1b10abSAndroid Build Coastguard Worker const __m128i v_mc_running_avg = _mm_castpd_si128(
325*fb1b10abSAndroid Build Coastguard Worker _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low),
326*fb1b10abSAndroid Build Coastguard Worker (double *)(&mc_running_avg[mc_avg_stride])));
327*fb1b10abSAndroid Build Coastguard Worker const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig);
328*fb1b10abSAndroid Build Coastguard Worker const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg);
329*fb1b10abSAndroid Build Coastguard Worker // Obtain the sign. FF if diff is negative.
330*fb1b10abSAndroid Build Coastguard Worker const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
331*fb1b10abSAndroid Build Coastguard Worker // Clamp absolute difference to delta to get the adjustment.
332*fb1b10abSAndroid Build Coastguard Worker const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
333*fb1b10abSAndroid Build Coastguard Worker // Restore the sign and get positive and negative adjustments.
334*fb1b10abSAndroid Build Coastguard Worker __m128i padj, nadj;
335*fb1b10abSAndroid Build Coastguard Worker const __m128i v_running_avg_low =
336*fb1b10abSAndroid Build Coastguard Worker _mm_castpd_si128(_mm_load_sd((double *)(&running_avg[0])));
337*fb1b10abSAndroid Build Coastguard Worker __m128i v_running_avg = _mm_castpd_si128(
338*fb1b10abSAndroid Build Coastguard Worker _mm_loadh_pd(_mm_castsi128_pd(v_running_avg_low),
339*fb1b10abSAndroid Build Coastguard Worker (double *)(&running_avg[avg_stride])));
340*fb1b10abSAndroid Build Coastguard Worker padj = _mm_andnot_si128(diff_sign, adj);
341*fb1b10abSAndroid Build Coastguard Worker nadj = _mm_and_si128(diff_sign, adj);
342*fb1b10abSAndroid Build Coastguard Worker // Calculate filtered value.
343*fb1b10abSAndroid Build Coastguard Worker v_running_avg = _mm_subs_epu8(v_running_avg, padj);
344*fb1b10abSAndroid Build Coastguard Worker v_running_avg = _mm_adds_epu8(v_running_avg, nadj);
345*fb1b10abSAndroid Build Coastguard Worker
346*fb1b10abSAndroid Build Coastguard Worker _mm_storel_pd((double *)&running_avg[0],
347*fb1b10abSAndroid Build Coastguard Worker _mm_castsi128_pd(v_running_avg));
348*fb1b10abSAndroid Build Coastguard Worker _mm_storeh_pd((double *)&running_avg[avg_stride],
349*fb1b10abSAndroid Build Coastguard Worker _mm_castsi128_pd(v_running_avg));
350*fb1b10abSAndroid Build Coastguard Worker
351*fb1b10abSAndroid Build Coastguard Worker // Accumulate the adjustments.
352*fb1b10abSAndroid Build Coastguard Worker acc_diff = _mm_subs_epi8(acc_diff, padj);
353*fb1b10abSAndroid Build Coastguard Worker acc_diff = _mm_adds_epi8(acc_diff, nadj);
354*fb1b10abSAndroid Build Coastguard Worker
355*fb1b10abSAndroid Build Coastguard Worker // Update pointers for next iteration.
356*fb1b10abSAndroid Build Coastguard Worker sig += sig_stride * 2;
357*fb1b10abSAndroid Build Coastguard Worker mc_running_avg += mc_avg_stride * 2;
358*fb1b10abSAndroid Build Coastguard Worker running_avg += avg_stride * 2;
359*fb1b10abSAndroid Build Coastguard Worker }
360*fb1b10abSAndroid Build Coastguard Worker abs_sum_diff = abs_sum_diff_16x1(acc_diff);
361*fb1b10abSAndroid Build Coastguard Worker if (abs_sum_diff > sum_diff_thresh) {
362*fb1b10abSAndroid Build Coastguard Worker return COPY_BLOCK;
363*fb1b10abSAndroid Build Coastguard Worker }
364*fb1b10abSAndroid Build Coastguard Worker } else {
365*fb1b10abSAndroid Build Coastguard Worker return COPY_BLOCK;
366*fb1b10abSAndroid Build Coastguard Worker }
367*fb1b10abSAndroid Build Coastguard Worker }
368*fb1b10abSAndroid Build Coastguard Worker }
369*fb1b10abSAndroid Build Coastguard Worker
370*fb1b10abSAndroid Build Coastguard Worker vp8_copy_mem8x8(running_avg_start, avg_stride, sig_start, sig_stride);
371*fb1b10abSAndroid Build Coastguard Worker return FILTER_BLOCK;
372*fb1b10abSAndroid Build Coastguard Worker }
373