1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 ******************************************************************************
23 * @file ime_distortion_metrics.c
24 *
25 * @brief
26 * This file contains definitions of routines that compute distortion
27 * between two macro/sub blocks of identical dimensions
28 *
29 * @author
30 * Ittiam
31 *
32 * @par List of Functions:
33 * - ime_sub_pel_compute_sad_16x16()
34 * - ime_calculate_sad4_prog()
35 * - ime_calculate_sad3_prog()
36 * - ime_calculate_sad2_prog()
37 * - ime_compute_sad_16x16()
38 * - ime_compute_sad_16x16_fast()
39 * - ime_compute_sad_16x16_ea8()
40 * - ime_compute_sad_8x8()
41 * - ime_compute_sad_4x4()
42 * - ime_compute_sad_16x8()
43 * - ime_compute_satqd_16x16_lumainter()
44 * - ime_compute_satqd_8x16_chroma()
45 * - ime_compute_satqd_16x16_lumaintra()
46 *
47 * @remarks
48 * None
49 *
50 *******************************************************************************
51 */
52
53 /*****************************************************************************/
54 /* File Includes */
55 /*****************************************************************************/
56
57 /* System include files */
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61
62 /* User include files */
63 #include "ime_typedefs.h"
64 #include "ime_defs.h"
65 #include "ime_macros.h"
66 #include "ime_statistics.h"
67 #include "ime_platform_macros.h"
68 #include "ime_distortion_metrics.h"
69
70
71 /*****************************************************************************/
72 /* Function Definitions */
73 /*****************************************************************************/
74
75 /**
76 ******************************************************************************
77 *
78 * @brief computes distortion (SAD) at all subpel points about the src location
79 *
80 * @par Description
81 * This functions computes SAD at all points at a subpel distance from the
82 * current source location.
83 *
84 * @param[in] pu1_src
85 * UWORD8 pointer to the source
86 *
87 * @param[out] pu1_ref_half_x
88 * UWORD8 pointer to half pel buffer
89 *
90 * @param[out] pu1_ref_half_y
91 * UWORD8 pointer to half pel buffer
92 *
93 * @param[out] pu1_ref_half_xy
94 * UWORD8 pointer to half pel buffer
95 *
96 * @param[in] src_strd
97 * integer source stride
98 *
99 * @param[in] ref_strd
100 * integer ref stride
101 *
102 * @param[out] pi4_sad
103 * integer evaluated sad
104 * pi4_sad[0] - half x
105 * pi4_sad[1] - half x - 1
106 * pi4_sad[2] - half y
107 * pi4_sad[3] - half y - 1
108 * pi4_sad[4] - half xy
109 * pi4_sad[5] - half xy - 1
110 * pi4_sad[6] - half xy - strd
111 * pi4_sad[7] - half xy - 1 - strd
112 *
113 * @remarks
114 *
115 ******************************************************************************
116 */
ime_sub_pel_compute_sad_16x16(UWORD8 * pu1_src,UWORD8 * pu1_ref_half_x,UWORD8 * pu1_ref_half_y,UWORD8 * pu1_ref_half_xy,WORD32 src_strd,WORD32 ref_strd,WORD32 * pi4_sad)117 void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src,
118 UWORD8 *pu1_ref_half_x,
119 UWORD8 *pu1_ref_half_y,
120 UWORD8 *pu1_ref_half_xy,
121 WORD32 src_strd,
122 WORD32 ref_strd,
123 WORD32 *pi4_sad)
124 {
125 UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
126 UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
127 UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
128 UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
129 UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
130
131 WORD32 row, col;
132
133 memset(pi4_sad, 0, 8 * sizeof(WORD32));
134
135 for(row = 0; row < MB_SIZE; row++)
136 {
137 for(col = 0; col < MB_SIZE; col++)
138 {
139 WORD32 src;
140 WORD32 diff;
141
142 src = pu1_src[col];
143
144 diff = src - pu1_ref_half_x[col];
145 pi4_sad[0] += ABS(diff);
146
147 diff = src - pu1_ref_half_x_left[col];
148 pi4_sad[1] += ABS(diff);
149
150 diff = src - pu1_ref_half_y[col];
151 pi4_sad[2] += ABS(diff);
152
153 diff = src - pu1_ref_half_y_top[col];
154 pi4_sad[3] += ABS(diff);
155
156 diff = src - pu1_ref_half_xy[col];
157 pi4_sad[4] += ABS(diff);
158
159 diff = src - pu1_ref_half_xy_left[col];
160 pi4_sad[5] += ABS(diff);
161
162 diff = src - pu1_ref_half_xy_top[col];
163 pi4_sad[6] += ABS(diff);
164
165 diff = src - pu1_ref_half_xy_top_left[col];
166 pi4_sad[7] += ABS(diff);
167 }
168
169 pu1_src += src_strd;
170
171 pu1_ref_half_x += ref_strd;
172 pu1_ref_half_x_left += ref_strd;
173
174 pu1_ref_half_y += ref_strd;
175 pu1_ref_half_y_top += ref_strd;
176
177 pu1_ref_half_xy += ref_strd;
178 pu1_ref_half_xy_left += ref_strd;
179 pu1_ref_half_xy_top += ref_strd;
180 pu1_ref_half_xy_top_left += ref_strd;
181 }
182 }
183
184 /**
185 *******************************************************************************
186 *
187 * @brief compute sad
188 *
189 * @par Description: This function computes the sad at vertices of diamond grid
190 * centered at reference pointer and at unit distance from it.
191 *
192 * @param[in] pu1_ref
193 * UWORD8 pointer to the reference
194 *
195 * @param[out] pu1_src
196 * UWORD8 pointer to the source
197 *
198 * @param[in] ref_strd
199 * integer reference stride
200 *
201 * @param[in] src_strd
202 * integer source stride
203 *
204 * @param[out] pi4_sad
205 * pointer to integer array evaluated sad
206 *
207 * @returns sad at all evaluated vertexes
208 *
209 * @remarks none
210 *
211 *******************************************************************************
212 */
ime_calculate_sad4_prog(UWORD8 * pu1_ref,UWORD8 * pu1_src,WORD32 ref_strd,WORD32 src_strd,WORD32 * pi4_sad)213 void ime_calculate_sad4_prog(UWORD8 *pu1_ref,
214 UWORD8 *pu1_src,
215 WORD32 ref_strd,
216 WORD32 src_strd,
217 WORD32 *pi4_sad)
218 {
219
220 /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
221 UWORD8 *left_ptr = pu1_ref - 1;
222 UWORD8 *right_ptr = pu1_ref + 1;
223 UWORD8 *top_ptr = pu1_ref - ref_strd;
224 UWORD8 *bot_ptr = pu1_ref + ref_strd;
225
226 /* temp var */
227 WORD32 count2, count3;
228 UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
229 UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
230
231 memset(pi4_sad, 0, 4 * sizeof(WORD32));
232
233 for(count2 = MB_SIZE; count2 > 0; count2--)
234 {
235 for(count3 = MB_SIZE; count3 > 0 ; count3--)
236 {
237 WORD32 src;
238 WORD32 diff;
239
240 src = *pu1_src++;
241
242 diff = src - *left_ptr++;
243 pi4_sad[0] += ABS(diff);
244
245 diff = src - *right_ptr++;
246 pi4_sad[1] += ABS(diff);
247
248 diff = src - *top_ptr++;
249 pi4_sad[2] += ABS(diff);
250
251 diff = src - *bot_ptr++;
252 pi4_sad[3] += ABS(diff);
253 }
254
255 bot_ptr += u4_ref_buf_offset;
256 left_ptr += u4_ref_buf_offset;
257 right_ptr += u4_ref_buf_offset;
258 top_ptr += u4_ref_buf_offset;
259
260 pu1_src += u4_cur_buf_offset;
261 }
262
263 }
264
265 /**
266 *******************************************************************************
267 *
268 * @brief compute sad
269 *
270 * @par Description: This function computes the sad at vertices of diamond grid
271 * centered at reference pointer and at unit distance from it.
272 *
273 * @param[in] pu1_ref1, pu1_ref2, pu1_ref3
274 * UWORD8 pointer to the reference
275 *
276 * @param[out] pu1_src
277 * UWORD8 pointer to the source
278 *
279 * @param[in] ref_strd
280 * integer reference stride
281 *
282 * @param[in] src_strd
283 * integer source stride
284 *
285 * @param[out] pi4_sad
286 * pointer to integer array evaluated sad
287 *
288 * @returns sad at all evaluated vertexes
289 *
290 * @remarks none
291 *
292 *******************************************************************************
293 */
ime_calculate_sad3_prog(UWORD8 * pu1_ref1,UWORD8 * pu1_ref2,UWORD8 * pu1_ref3,UWORD8 * pu1_src,WORD32 ref_strd,WORD32 src_strd,WORD32 * pi4_sad)294 void ime_calculate_sad3_prog(UWORD8 *pu1_ref1,
295 UWORD8 *pu1_ref2,
296 UWORD8 *pu1_ref3,
297 UWORD8 *pu1_src,
298 WORD32 ref_strd,
299 WORD32 src_strd,
300 WORD32 *pi4_sad)
301 {
302 /* temp var */
303 WORD32 i;
304 UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
305 UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
306
307 for(i = 16; i > 0; i--)
308 {
309 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
310 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
311 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
312 pu1_src += 4;
313 pu1_ref1 += 4;
314 pu1_ref2 += 4;
315 pu1_ref3 += 4;
316
317 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
318 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
319 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
320 pu1_src += 4;
321 pu1_ref1 += 4;
322 pu1_ref2 += 4;
323 pu1_ref3 += 4;
324
325 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
326 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
327 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
328 pu1_src += 4;
329 pu1_ref1 += 4;
330 pu1_ref2 += 4;
331 pu1_ref3 += 4;
332
333 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
334 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
335 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
336 pu1_src += 4;
337 pu1_ref1 += 4;
338 pu1_ref2 += 4;
339 pu1_ref3 += 4;
340
341 pu1_src += u4_cur_buf_offset;
342 pu1_ref1 += u4_ref_buf_offset;
343 pu1_ref2 += u4_ref_buf_offset;
344 pu1_ref3 += u4_ref_buf_offset;
345 }
346
347 }
348
349 /**
350 *******************************************************************************
351 *
352 * @brief compute sad
353 *
354 * @par Description: This function computes the sad at vertices of diamond grid
355 * centered at reference pointer and at unit distance from it.
356 *
357 * @param[in] pu1_ref1, pu1_ref2
358 * UWORD8 pointer to the reference
359 *
360 * @param[out] pu1_src
361 * UWORD8 pointer to the source
362 *
363 * @param[in] ref_strd
364 * integer reference stride
365 *
366 * @param[in] src_strd
367 * integer source stride
368 *
369 * @param[out] pi4_sad
370 * pointer to integer array evaluated sad
371 *
372 * @returns sad at all evaluated vertexes
373 *
374 * @remarks none
375 *
376 *******************************************************************************
377 */
ime_calculate_sad2_prog(UWORD8 * pu1_ref1,UWORD8 * pu1_ref2,UWORD8 * pu1_src,WORD32 ref_strd,WORD32 src_strd,WORD32 * pi4_sad)378 void ime_calculate_sad2_prog(UWORD8 *pu1_ref1,
379 UWORD8 *pu1_ref2,
380 UWORD8 *pu1_src,
381 WORD32 ref_strd,
382 WORD32 src_strd,
383 WORD32 *pi4_sad)
384 {
385 /* temp var */
386 WORD32 i;
387 UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
388 UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
389
390 for(i = 16; i > 0; i--)
391 {
392 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
393 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
394 pu1_src += 4;
395 pu1_ref1 += 4;
396 pu1_ref2 += 4;
397
398 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
399 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
400 pu1_src += 4;
401 pu1_ref1 += 4;
402 pu1_ref2 += 4;
403
404 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
405 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
406 pu1_src += 4;
407 pu1_ref1 += 4;
408 pu1_ref2 += 4;
409
410 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
411 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
412 pu1_src += 4;
413 pu1_ref1 += 4;
414 pu1_ref2 += 4;
415
416 pu1_src += u4_cur_buf_offset;
417 pu1_ref1 += u4_ref_buf_offset;
418 pu1_ref2 += u4_ref_buf_offset;
419 }
420
421 }
422
423 /**
424 ******************************************************************************
425 *
426 * @brief computes distortion (SAD) between 2 16x16 blocks
427 *
428 * @par Description
429 * This functions computes SAD between 2 16x16 blocks. There is a provision
430 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
431 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
432 *
433 * @param[in] pu1_src
434 * UWORD8 pointer to the source
435 *
436 * @param[out] pu1_dst
437 * UWORD8 pointer to the destination
438 *
439 * @param[in] src_strd
440 * integer source stride
441 *
442 * @param[in] dst_strd
443 * integer destination stride
444 *
445 * @param[in] i4_max_sad
446 * integer maximum allowed distortion
447 *
448 * @param[out] pi4_mb_distortion
449 * integer evaluated sad
450 *
451 * @remarks
452 *
453 ******************************************************************************
454 */
ime_compute_sad_16x16(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)455 void ime_compute_sad_16x16(UWORD8 *pu1_src,
456 UWORD8 *pu1_est,
457 WORD32 src_strd,
458 WORD32 est_strd,
459 WORD32 i4_max_sad,
460 WORD32 *pi4_mb_distortion)
461 {
462 WORD32 i4_sad = 0;
463 UWORD32 u4_src_offset = src_strd - 16;
464 UWORD32 u4_est_offset = est_strd - 16;
465 UWORD32 i;
466
467 GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16);
468
469 for(i = 16; i > 0; i--)
470 {
471 USADA8(pu1_src, pu1_est, i4_sad);
472 pu1_src += 4;
473 pu1_est += 4;
474
475 USADA8(pu1_src, pu1_est, i4_sad);
476 pu1_src += 4;
477 pu1_est += 4;
478
479 USADA8(pu1_src, pu1_est, i4_sad);
480 pu1_src += 4;
481 pu1_est += 4;
482
483 USADA8(pu1_src, pu1_est, i4_sad);
484 pu1_src += 4;
485 pu1_est += 4;
486
487 /* early exit */
488 if(i4_max_sad < i4_sad)
489 {
490
491 GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i);
492
493 *pi4_mb_distortion = i4_sad;
494 return ;
495 }
496 pu1_src += u4_src_offset;
497 pu1_est += u4_est_offset;
498 }
499
500 *pi4_mb_distortion = i4_sad;
501 return ;
502 }
503
504 /**
505 ******************************************************************************
506 *
507 * @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
508 *
509 * @par Description
510 * This functions computes SAD between 2 16x16 blocks. There is a provision
511 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
512 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
513 *
514 * @param[in] pu1_src
515 * UWORD8 pointer to the source
516 *
517 * @param[out] pu1_dst
518 * UWORD8 pointer to the destination
519 *
520 * @param[in] src_strd
521 * integer source stride
522 *
523 * @param[in] dst_strd
524 * integer destination stride
525 *
526 * @param[in] i4_max_sad
527 * integer maximum allowed distortion
528 *
529 * @param[out] pi4_mb_distortion
530 * integer evaluated sad
531 *
532 * @remarks
533 *
534 ******************************************************************************
535 */
ime_compute_sad_16x16_fast(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)536 void ime_compute_sad_16x16_fast(UWORD8 *pu1_src,
537 UWORD8 *pu1_est,
538 WORD32 src_strd,
539 WORD32 est_strd,
540 WORD32 i4_max_sad,
541 WORD32 *pi4_mb_distortion)
542 {
543 WORD32 i4_sad = 0;
544 UWORD32 u4_src_offset = 2 * src_strd - 16;
545 UWORD32 u4_est_offset = 2 * est_strd - 16;
546 UWORD32 i;
547
548 UNUSED(i4_max_sad);
549
550 for(i = 16; i > 0; i-= 2)
551 {
552 USADA8(pu1_src, pu1_est, i4_sad);
553 pu1_src += 4;
554 pu1_est += 4;
555
556 USADA8(pu1_src, pu1_est, i4_sad);
557 pu1_src += 4;
558 pu1_est += 4;
559
560 USADA8(pu1_src, pu1_est, i4_sad);
561 pu1_src += 4;
562 pu1_est += 4;
563
564 USADA8(pu1_src, pu1_est, i4_sad);
565 pu1_src += 4;
566 pu1_est += 4;
567
568 pu1_src += u4_src_offset;
569 pu1_est += u4_est_offset;
570 }
571
572 *pi4_mb_distortion = (i4_sad << 1);
573 return ;
574 }
575
576 /**
577 ******************************************************************************
578 *
579 * @brief computes distortion (SAD) between 2 8x8 blocks
580 *
581 * @par Description
582 * This functions computes SAD between 2 8x8 blocks. There is a provision
583 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
584 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
585 *
586 * @param[in] pu1_src
587 * UWORD8 pointer to the source
588 *
589 * @param[out] pu1_dst
590 * UWORD8 pointer to the destination
591 *
592 * @param[in] src_strd
593 * integer source stride
594 *
595 * @param[in] dst_strd
596 * integer destination stride
597 *
598 * @param[in] u4_max_sad
599 * integer maximum allowed distortion
600 *
601 * @param[out] i4_sad
602 * integer evaluated sad
603 *
604 * @remarks
605 *
606 ******************************************************************************
607 */
ime_compute_sad_8x8(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)608 void ime_compute_sad_8x8(UWORD8 *pu1_src,
609 UWORD8 *pu1_est,
610 WORD32 src_strd,
611 WORD32 est_strd,
612 WORD32 i4_max_sad,
613 WORD32 *pi4_mb_distortion)
614 {
615 WORD32 i4_sad = 0;
616 UWORD32 u4_src_offset = src_strd - 8;
617 UWORD32 u4_est_offset = est_strd - 8;
618 UWORD32 i, j;
619 WORD16 temp;
620
621 for(i = 8; i > 0; i--)
622 {
623 for(j = 8; j > 0; j--)
624 {
625 /* SAD */
626 temp = *pu1_src++ - *pu1_est++;
627 i4_sad += ABS(temp);
628 }
629 /* early exit */
630 if(i4_max_sad < i4_sad)
631 {
632 *pi4_mb_distortion = i4_sad;
633 return;
634 }
635 pu1_src += u4_src_offset;
636 pu1_est += u4_est_offset;
637 }
638 *pi4_mb_distortion = i4_sad;
639 }
640
641 /**
642 ******************************************************************************
643 *
644 * @brief computes distortion (SAD) between 2 4x4 blocks
645 *
646 * @par Description
647 * This functions computes SAD between 2 4x4 blocks. There is a provision
648 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
649 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
650 *
651 * @param[in] pu1_src
652 * UWORD8 pointer to the source
653 *
654 * @param[out] pu1_dst
655 * UWORD8 pointer to the destination
656 *
657 * @param[in] src_strd
658 * integer source stride
659 *
660 * @param[in] dst_strd
661 * integer destination stride
662 *
663 * @param[in] u4_max_sad
664 * integer maximum allowed distortion
665 *
666 * @param[out] pi4_mb_distortion
667 * integer evaluated sad
668 *
669 * @remarks
670 *
671 ******************************************************************************
672 */
ime_compute_sad_4x4(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)673 void ime_compute_sad_4x4(UWORD8 *pu1_src,
674 UWORD8 *pu1_est,
675 WORD32 src_strd,
676 WORD32 est_strd,
677 WORD32 i4_max_sad,
678 WORD32 *pi4_mb_distortion)
679 {
680 WORD32 i4_sad = 0;
681
682 UNUSED(i4_max_sad);
683
684 USADA8(pu1_src, pu1_est, i4_sad);
685 pu1_src += src_strd;
686 pu1_est += est_strd;
687
688 USADA8(pu1_src, pu1_est, i4_sad);
689 pu1_src += src_strd;
690 pu1_est += est_strd;
691
692 USADA8(pu1_src, pu1_est, i4_sad);
693 pu1_src += src_strd;
694 pu1_est += est_strd;
695
696 USADA8(pu1_src, pu1_est, i4_sad);
697 *pi4_mb_distortion = i4_sad;
698 }
699
700 /**
701 ******************************************************************************
702 *
703 * @brief computes distortion (SAD) between 2 16x8 blocks
704 *
705 * @par Description
706 * This functions computes SAD between 2 16x8 blocks. There is a provision
707 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
708 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
709 *
710 * @param[in] pu1_src
711 * UWORD8 pointer to the source
712 *
713 * @param[out] pu1_dst
714 * UWORD8 pointer to the destination
715 *
716 * @param[in] src_strd
717 * integer source stride
718 *
719 * @param[in] dst_strd
720 * integer destination stride
721 *
722 * @param[in] u4_max_sad
723 * integer maximum allowed distortion
724 *
725 * @param[out] pi4_mb_distortion
726 * integer evaluated sad
727 *
728 * @remarks
729 *
730 ******************************************************************************
731 */
ime_compute_sad_16x8(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)732 void ime_compute_sad_16x8(UWORD8 *pu1_src,
733 UWORD8 *pu1_est,
734 WORD32 src_strd,
735 WORD32 est_strd,
736 WORD32 i4_max_sad,
737 WORD32 *pi4_mb_distortion)
738 {
739 WORD32 i4_sad = 0;
740 UWORD32 u4_src_offset = src_strd - 16;
741 UWORD32 u4_est_offset = est_strd - 16;
742 UWORD32 i, j;
743 WORD16 temp;
744
745 GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8);
746
747 for(i = 8; i > 0; i--)
748 {
749 for(j = 16; j > 0; j--)
750 {
751 /* SAD */
752 temp = *pu1_src++ - *pu1_est++;
753 i4_sad += ABS(temp);
754 }
755 /* early exit */
756 if(i4_max_sad < i4_sad)
757 {
758
759 GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i);
760
761 *pi4_mb_distortion = i4_sad;
762
763 return;
764 }
765 pu1_src += u4_src_offset;
766 pu1_est += u4_est_offset;
767 }
768
769 *pi4_mb_distortion = i4_sad;
770 return;
771
772 }
773
774 /**
775 ******************************************************************************
776 *
777 * @brief computes distortion (SAD) between 2 16x16 blocks
778 *
779 * @par Description
780 * This functions computes SAD between 2 16x16 blocks. There is a provision
781 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
782 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
783 *
784 * @param[in] pu1_src
785 * UWORD8 pointer to the source
786 *
787 * @param[out] pu1_dst
788 * UWORD8 pointer to the destination
789 *
790 * @param[in] src_strd
791 * integer source stride
792 *
793 * @param[in] dst_strd
794 * integer destination stride
795 *
796 * @param[in] i4_max_sad
797 * integer maximum allowed distortion
798 *
799 * @param[out] pi4_mb_distortion
800 * integer evaluated sad
801 *
802 * @remarks
803 *
804 ******************************************************************************
805 */
ime_compute_sad_16x16_ea8(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)806 void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src,
807 UWORD8 *pu1_est,
808 WORD32 src_strd,
809 WORD32 est_strd,
810 WORD32 i4_max_sad,
811 WORD32 *pi4_mb_distortion)
812 {
813 WORD32 i4_sad = 0;
814 UWORD32 u4_src_offset = src_strd - 16;
815 UWORD32 u4_est_offset = est_strd - 16;
816 UWORD32 i, j;
817 WORD16 temp;
818 UWORD8 *pu1_src_temp = pu1_src + src_strd;
819 UWORD8 *pu1_est_temp = pu1_est + est_strd;
820
821 for(i = 16; i > 0; i -= 2)
822 {
823 for(j = 16; j > 0; j--)
824 {
825 /* SAD */
826 temp = *pu1_src++ - *pu1_est++;
827 i4_sad += ABS(temp);
828 }
829
830 pu1_src += (u4_src_offset + src_strd);
831 pu1_est += (u4_est_offset + est_strd);
832
833 }
834
835 /* early exit */
836 if(i4_max_sad < i4_sad)
837 {
838 *pi4_mb_distortion = i4_sad;
839 return;
840 }
841
842 pu1_src = pu1_src_temp;
843 pu1_est = pu1_est_temp;
844
845 for(i = 16; i > 0; i -= 2)
846 {
847 for(j = 16; j > 0; j--)
848 {
849 /* SAD */
850 temp = *pu1_src++ - *pu1_est++;
851 i4_sad += ABS(temp);
852 }
853
854 pu1_src += u4_src_offset + src_strd;
855 pu1_est += u4_est_offset + est_strd;
856 }
857
858 *pi4_mb_distortion = i4_sad;
859 return;
860 }
861
862 /**
863 *******************************************************************************
864 *
865 * @brief This function computes SAD between two 16x16 blocks. It also computes
866 * if the block will be zero after H264 transform and quant
867 *
868 * @param[in] pu1_src
869 * UWORD8 pointer to the source
870 *
871 * @param[out] pu1_est
872 * UWORD8 pointer to the estimated block
873 *
874 * @param[in] i4_src_strd
875 * source stride
876 *
877 * @param[in] i4_est_strd
878 * est buffer stride
879 *
880 * @param[in] pu2_thrsh
881 * Threshold for each element of transformed quantized block
882 *
883 * @param[out] pi4_mb_distortion
884 * evaluated sad
885 *
886 * @param[out] pu4_is_zero
887 * Pointer to store if the block is zero after transform and quantization
888 *
889 * @remarks
890 *
891 ******************************************************************************
892 */
ime_compute_satqd_16x16_lumainter(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 i4_src_strd,WORD32 i4_est_strd,UWORD16 * pu2_thrsh,WORD32 * pi4_mb_distortion,UWORD32 * pu4_is_non_zero)893 void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src,
894 UWORD8 *pu1_est,
895 WORD32 i4_src_strd,
896 WORD32 i4_est_strd,
897 UWORD16 *pu2_thrsh,
898 WORD32 *pi4_mb_distortion,
899 UWORD32 *pu4_is_non_zero)
900 {
901 WORD32 i, j;
902 WORD16 s1, s2, s3, s4;
903 WORD16 sad_1, sad_2;
904 WORD16 ls1, ls2, ls3, ls4, ls5, ls6, ls7, ls8;
905 UWORD8 *pu1_src_lp, *pu1_est_lp;
906 UWORD32 sad = 0;
907
908 (*pi4_mb_distortion) = 0;
909
910 for (i = 0; i < 4; i++)
911 {
912 for (j = 0; j < 4; j++)
913 {
914 pu1_src_lp = pu1_src + 4 * j;
915 pu1_est_lp = pu1_est + 4 * j;
916
917 s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
918 s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
919
920 pu1_src_lp += i4_src_strd;
921 pu1_est_lp += i4_est_strd;
922
923 s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
924 s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
925
926 pu1_src_lp += i4_src_strd;
927 pu1_est_lp += i4_est_strd;
928
929 s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
930 s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
931
932 pu1_src_lp += i4_src_strd;
933 pu1_est_lp += i4_est_strd;
934
935 s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
936 s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
937
938 sad_1 = s1 + s2 + s3 + s4;
939
940 if (sad == 0)
941 {
942 sad_2 = sad_1 << 1;
943
944 ls1 = sad_2 - (s2 + s3);
945 ls2 = sad_2 - (s1 + s4);
946 ls3 = sad_2 - (s3 + s4);
947 ls4 = sad_2 - (s3 - (s1 << 1));
948 ls5 = sad_2 - (s4 - (s2 << 1));
949 ls6 = sad_2 - (s1 + s2);
950 ls7 = sad_2 - (s2 - (s4 << 1));
951 ls8 = sad_2 - (s1 - (s3 << 1));
952
953 if (pu2_thrsh[8] <= sad_1 ||
954 pu2_thrsh[0] <= ls2 || pu2_thrsh[1] <= ls1 ||
955 pu2_thrsh[2] <= ls8 || pu2_thrsh[3] <= ls5 ||
956 pu2_thrsh[4] <= ls6 || pu2_thrsh[5] <= ls3 ||
957 pu2_thrsh[6] <= ls7 || pu2_thrsh[7] <= ls4) {
958 sad = 1;
959 }
960 }
961 (*pi4_mb_distortion) += sad_1;
962 }
963 pu1_src += (i4_src_strd * 4);
964 pu1_est += (i4_est_strd * 4);
965 }
966 *pu4_is_non_zero = sad;
967 }
968
969 /**
970 ******************************************************************************
971 *
972 * @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma
973 * blocks
974 *
975 * @par Description
976 * This functions computes SAD between2 16x8 chroma blocks(interleaved). It
977 * also checks if the SATQD, Sum of absolute transformed quantized difference
978 * between the blocks. If SAQTD is zero, it gives back zero Other wise sad is
979 * returned. There is no provison for early exit. The transform done here is
980 * the transform for chroma blocks in H264
981 *
982 * @param[in] pu1_src
983 * UWORD8 pointer to the source
984 *
985 * @param[out] pu1_dst
986 * UWORD8 pointer to the destination
987 *
988 * @param[in] src_strd
989 * integer source stride
990 *
991 * @param[in] dst_strd
992 * integer destination stride
993 *
994 * @param[in] pu2_thrsh
995 * Threshold for each element of transofrmed quantized block
996 *
997 * @param[out] pi4_mb_distortion
998 * integer evaluated sad
999 *
1000 * @remarks
1001 *
1002 ******************************************************************************
1003 */
ime_compute_satqd_8x16_chroma(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 max_sad,UWORD16 * thrsh)1004 void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src,
1005 UWORD8 *pu1_est,
1006 WORD32 src_strd,
1007 WORD32 est_strd,
1008 WORD32 max_sad,
1009 UWORD16 *thrsh)
1010 {
1011 WORD32 i, j, plane;
1012 WORD16 s1, s2, s3, s4;
1013 WORD16 sad_1, sad_2;
1014 WORD16 ls1, ls2, ls3, ls4, ls5, ls6, ls7, ls8;
1015 UWORD8 *pu1_src_lp, *pu1_est_lp, *pu1_src_plane, *pu1_est_plane;
1016 WORD32 sad = 0;
1017
1018 UNUSED(max_sad);
1019 pu1_src_plane = pu1_src;
1020 pu1_est_plane = pu1_est;
1021
1022 for (plane = 0; plane < 2; plane++)
1023 {
1024 for (i = 0; i < 4; i++)
1025 {
1026 for (j = 0; j < 4; j++)
1027 {
1028 pu1_src_lp = pu1_src + 8 * j;
1029 pu1_est_lp = pu1_est + 8 * j;
1030
1031 s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1032 s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]) + ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1033
1034 pu1_src_lp += src_strd;
1035 pu1_est_lp += est_strd;
1036
1037 s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1038 s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]) + ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1039
1040 pu1_src_lp += src_strd;
1041 pu1_est_lp += est_strd;
1042
1043 s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1044 s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]) + ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1045
1046 pu1_src_lp += src_strd;
1047 pu1_est_lp += est_strd;
1048
1049 s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1050 s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]) + ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1051
1052 sad_1 = s1 + s2 + s3 + s4;
1053 sad_2 = sad_1 << 1;
1054
1055 ls1 = sad_2 - (s2 + s3);
1056 ls2 = sad_2 - (s1 + s4);
1057 ls3 = sad_2 - (s3 + s4);
1058 ls4 = sad_2 - (s3 - (s1 << 1));
1059 ls5 = sad_2 - (s4 - (s2 << 1));
1060 ls6 = sad_2 - (s1 + s2);
1061 ls7 = sad_2 - (s2 - (s4 << 1));
1062 ls8 = sad_2 - (s1 - (s3 << 1));
1063
1064 if (thrsh[1] > ls1 && thrsh[2] > sad_1 && thrsh[3] > ls2 &&
1065 thrsh[4] > ls3 && thrsh[5] > ls4 && thrsh[6] > ls3 && thrsh[7] > ls5 &&
1066 thrsh[8] > sad_1 && thrsh[9] > ls1 && thrsh[10] > sad_1 && thrsh[11] > ls2 &&
1067 thrsh[12] > ls6 && thrsh[13] > ls7 && thrsh[14] > ls6 && thrsh[15] > ls8)
1068 {
1069 /*set current sad to be zero*/
1070 }
1071 else
1072 return ;
1073
1074 sad += sad_1;
1075 }
1076 pu1_src += (src_strd *4);
1077 pu1_est += (est_strd *4);
1078 }
1079 if (sad < (thrsh[0] << 1))
1080 sad = 0;
1081 else
1082 return;
1083
1084 pu1_src = pu1_src_plane + 1;
1085 pu1_est = pu1_est_plane + 1;
1086 }
1087 return ;
1088 }
1089
1090 /**
1091 ******************************************************************************
1092 *
1093 * @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks
1094 *
1095 * @par Description
1096 * This functions computes SAD between2 16x8 chroma blocks(interleaved). It
1097 * also checks if the SATQD, Sum of absolute transformed quantized difference
1098 * between the blocks. If SAQTD is zero, it gives back zero Other wise sad is
1099 * returned. There is no provison for early exit. The transform done here is the
1100 * transform for intra 16x16 blocks in H264
1101 *
1102 * @param[in] pu1_src
1103 * UWORD8 pointer to the source
1104 *
1105 * @param[out] pu1_dst
1106 * UWORD8 pointer to the destination
1107 *
1108 * @param[in] src_strd
1109 * integer source stride
1110 *
1111 * @param[in] dst_strd
1112 * integer destination stride
1113 *
1114 * @param[in] pu2_thrsh
1115 * Threshold for each element of transofrmed quantized block
1116 *
1117 * @param[out] pi4_mb_distortion
1118 * integer evaluated sad
1119 *
1120 * @remarks
1121 *
1122 ******************************************************************************
1123 */
ime_compute_satqd_16x16_lumaintra(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 max_sad,UWORD16 * thrsh,WORD32 * pi4_mb_distortion,UWORD8 * sig_nz_sad)1124 void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
1125 UWORD8 *pu1_est,
1126 WORD32 src_strd,
1127 WORD32 est_strd,
1128 WORD32 max_sad,
1129 UWORD16 *thrsh,
1130 WORD32 *pi4_mb_distortion,
1131 UWORD8 *sig_nz_sad)
1132 {
1133 UWORD32 i, j;
1134 WORD16 s1[4], s2[4], s3[4], s4[4], sad[4];
1135 UWORD8 *pu1_src_lp, *pu1_est_lp;
1136 UWORD8 *sig_sad_dc;
1137 UWORD32 nz_sad_sig = 0;
1138
1139 UNUSED(max_sad);
1140 *pi4_mb_distortion = 0;
1141 sig_sad_dc = sig_nz_sad;
1142 sig_nz_sad++;
1143
1144 for (i = 0; i < 4; i++)
1145 {
1146 for (j = 0; j < 4; j++)
1147 {
1148 pu1_src_lp = pu1_src + 4 * j;
1149 pu1_est_lp = pu1_est + 4 * j;
1150
1151 s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1152 s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1153
1154 pu1_src_lp += src_strd;
1155 pu1_est_lp += est_strd;
1156
1157 s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1158 s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1159
1160 pu1_src_lp += src_strd;
1161 pu1_est_lp += est_strd;
1162
1163 s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1164 s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1165
1166 pu1_src_lp += src_strd;
1167 pu1_est_lp += est_strd;
1168
1169 s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0]) + ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1170 s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1]) + ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1171
1172 sad[j] = ((s1[j] + s2[j] + s3[j] + s4[j]) << 1);
1173 }
1174
1175 for (j = 0; j < 4; j++)
1176 {
1177
1178 if (thrsh[1] > (sad[j] - (s2[j] + s3[j])) && thrsh[2] > (sad[j] >> 1)
1179 && thrsh[3] > (sad[j] - (s1[j] + s4[j])) &&
1180
1181 thrsh[4] > (sad[j] - (s3[j] + s4[j]))
1182 && thrsh[5] > (sad[j] - (s3[j] - (s1[j] << 1)))
1183 && thrsh[6] > (sad[j] - (s3[j] + s4[j]))
1184 && thrsh[7] > (sad[j] - (s4[j] - (s2[j] << 1))) &&
1185
1186 thrsh[8] > (sad[j] >> 1)
1187 && thrsh[9] > (sad[j] - (s2[j] + s3[j]))
1188 && thrsh[10] > (sad[j] >> 1)
1189 && thrsh[11] > (sad[j] - (s1[j] + s4[j])) &&
1190
1191 thrsh[12] > (sad[j] - (s1[j] + s2[j]))
1192 && thrsh[13] > (sad[j] - (s2[j] - (s4[j] << 1)))
1193 && thrsh[14] > (sad[j] - (s1[j] + s2[j]))
1194 && thrsh[15] > (sad[j] - (s1[j] - (s3[j] << 1))))
1195 {
1196 //sad[j] = 0; /*set current sad to be zero*/
1197 sig_nz_sad[j] = 0;/*Signal that the sad is zero*/
1198 }
1199 else
1200 {
1201 sig_nz_sad[j] = 1;/*signal that sad is non zero*/
1202 nz_sad_sig = 1;
1203 }
1204
1205 (*pi4_mb_distortion) += (sad[j] >> 1);
1206 //if ((*pi4_mb_distortion) >= max_sad)return; /*return or some thing*/
1207 }
1208
1209 sig_nz_sad += 4;
1210 pu1_src += (src_strd * 4);
1211 pu1_est += (est_strd * 4);
1212 }
1213
1214 if ((*pi4_mb_distortion) < thrsh[0] << 2)
1215 {
1216 *sig_sad_dc = 0;
1217 if (nz_sad_sig == 0)
1218 (*pi4_mb_distortion) = 0;
1219 }
1220 else
1221 {
1222 *sig_sad_dc = 1;
1223 }
1224 }
1225
1226
1227