xref: /aosp_15_r20/external/libopus/dnn/dred_decoder.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2022 Amazon
2*a58d3d2aSXin Li    Written by Jan Buethe */
3*a58d3d2aSXin Li /*
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #include <string.h>
29*a58d3d2aSXin Li 
30*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
31*a58d3d2aSXin Li #include "config.h"
32*a58d3d2aSXin Li #endif
33*a58d3d2aSXin Li 
34*a58d3d2aSXin Li #include "os_support.h"
35*a58d3d2aSXin Li #include "dred_decoder.h"
36*a58d3d2aSXin Li #include "dred_coding.h"
37*a58d3d2aSXin Li #include "celt/entdec.h"
38*a58d3d2aSXin Li #include "celt/laplace.h"
39*a58d3d2aSXin Li #include "dred_rdovae_stats_data.h"
40*a58d3d2aSXin Li #include "dred_rdovae_constants.h"
41*a58d3d2aSXin Li 
dred_decode_latents(ec_dec * dec,float * x,const opus_uint8 * scale,const opus_uint8 * r,const opus_uint8 * p0,int dim)42*a58d3d2aSXin Li static void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim) {
43*a58d3d2aSXin Li     int i;
44*a58d3d2aSXin Li     for (i=0;i<dim;i++) {
45*a58d3d2aSXin Li         int q;
46*a58d3d2aSXin Li         if (r[i] == 0 || p0[i] == 255) q = 0;
47*a58d3d2aSXin Li         else q = ec_laplace_decode_p0(dec, p0[i]<<7, r[i]<<7);
48*a58d3d2aSXin Li         x[i] = q*256.f/(scale[i] == 0 ? 1 : scale[i]);
49*a58d3d2aSXin Li     }
50*a58d3d2aSXin Li }
51*a58d3d2aSXin Li 
dred_ec_decode(OpusDRED * dec,const opus_uint8 * bytes,int num_bytes,int min_feature_frames,int dred_frame_offset)52*a58d3d2aSXin Li int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int min_feature_frames, int dred_frame_offset)
53*a58d3d2aSXin Li {
54*a58d3d2aSXin Li   ec_dec ec;
55*a58d3d2aSXin Li   int q_level;
56*a58d3d2aSXin Li   int i;
57*a58d3d2aSXin Li   int offset;
58*a58d3d2aSXin Li   int q0;
59*a58d3d2aSXin Li   int dQ;
60*a58d3d2aSXin Li   int qmax;
61*a58d3d2aSXin Li   int state_qoffset;
62*a58d3d2aSXin Li   int extra_offset;
63*a58d3d2aSXin Li 
64*a58d3d2aSXin Li   /* since features are decoded in quadruples, it makes no sense to go with an uneven number of redundancy frames */
65*a58d3d2aSXin Li   celt_assert(DRED_NUM_REDUNDANCY_FRAMES % 2 == 0);
66*a58d3d2aSXin Li 
67*a58d3d2aSXin Li   /* decode initial state and initialize RDOVAE decoder */
68*a58d3d2aSXin Li   ec_dec_init(&ec, (unsigned char*)bytes, num_bytes);
69*a58d3d2aSXin Li   q0 = ec_dec_uint(&ec, 16);
70*a58d3d2aSXin Li   dQ = ec_dec_uint(&ec, 8);
71*a58d3d2aSXin Li   if (ec_dec_uint(&ec, 2)) extra_offset = 32*ec_dec_uint(&ec, 256);
72*a58d3d2aSXin Li   else extra_offset = 0;
73*a58d3d2aSXin Li   /* Compute total offset, including DRED position in a multiframe packet. */
74*a58d3d2aSXin Li   dec->dred_offset = 16 - ec_dec_uint(&ec, 32) - extra_offset + dred_frame_offset;
75*a58d3d2aSXin Li   /*printf("%d %d %d\n", dred_offset, q0, dQ);*/
76*a58d3d2aSXin Li   qmax = 15;
77*a58d3d2aSXin Li   if (q0 < 14 && dQ > 0) {
78*a58d3d2aSXin Li     int nvals;
79*a58d3d2aSXin Li     int ft;
80*a58d3d2aSXin Li     int s;
81*a58d3d2aSXin Li     /* The distribution for the dQmax symbol is split evenly between zero
82*a58d3d2aSXin Li         (which implies qmax == 15) and larger values, with the probability of
83*a58d3d2aSXin Li         all larger values being uniform.
84*a58d3d2aSXin Li        This is equivalent to coding 1 bit to decide if the maximum is less than
85*a58d3d2aSXin Li         15 followed by a uint to decide the actual value if it is less than
86*a58d3d2aSXin Li         15, but combined into a single symbol. */
87*a58d3d2aSXin Li     nvals = 15 - (q0 + 1);
88*a58d3d2aSXin Li     ft = 2*nvals;
89*a58d3d2aSXin Li     s = ec_decode(&ec, ft);
90*a58d3d2aSXin Li     if (s >= nvals) {
91*a58d3d2aSXin Li       qmax = q0 + (s - nvals) + 1;
92*a58d3d2aSXin Li       ec_dec_update(&ec, s, s + 1, ft);
93*a58d3d2aSXin Li     }
94*a58d3d2aSXin Li     else {
95*a58d3d2aSXin Li       ec_dec_update(&ec, 0, nvals, ft);
96*a58d3d2aSXin Li     }
97*a58d3d2aSXin Li   }
98*a58d3d2aSXin Li   state_qoffset = q0*DRED_STATE_DIM;
99*a58d3d2aSXin Li   dred_decode_latents(
100*a58d3d2aSXin Li       &ec,
101*a58d3d2aSXin Li       dec->state,
102*a58d3d2aSXin Li       dred_state_quant_scales_q8 + state_qoffset,
103*a58d3d2aSXin Li       dred_state_r_q8 + state_qoffset,
104*a58d3d2aSXin Li       dred_state_p0_q8 + state_qoffset,
105*a58d3d2aSXin Li       DRED_STATE_DIM);
106*a58d3d2aSXin Li 
107*a58d3d2aSXin Li   /* decode newest to oldest and store oldest to newest */
108*a58d3d2aSXin Li   for (i = 0; i < IMIN(DRED_NUM_REDUNDANCY_FRAMES, (min_feature_frames+1)/2); i += 2)
109*a58d3d2aSXin Li   {
110*a58d3d2aSXin Li       /* FIXME: Figure out how to avoid missing a last frame that would take up < 8 bits. */
111*a58d3d2aSXin Li       if (8*num_bytes - ec_tell(&ec) <= 7)
112*a58d3d2aSXin Li          break;
113*a58d3d2aSXin Li       q_level = compute_quantizer(q0, dQ, qmax, i/2);
114*a58d3d2aSXin Li       offset = q_level*DRED_LATENT_DIM;
115*a58d3d2aSXin Li       dred_decode_latents(
116*a58d3d2aSXin Li           &ec,
117*a58d3d2aSXin Li           &dec->latents[(i/2)*DRED_LATENT_DIM],
118*a58d3d2aSXin Li           dred_latent_quant_scales_q8 + offset,
119*a58d3d2aSXin Li           dred_latent_r_q8 + offset,
120*a58d3d2aSXin Li           dred_latent_p0_q8 + offset,
121*a58d3d2aSXin Li           DRED_LATENT_DIM
122*a58d3d2aSXin Li           );
123*a58d3d2aSXin Li 
124*a58d3d2aSXin Li       offset = 2 * i * DRED_NUM_FEATURES;
125*a58d3d2aSXin Li   }
126*a58d3d2aSXin Li   dec->process_stage = 1;
127*a58d3d2aSXin Li   dec->nb_latents = i/2;
128*a58d3d2aSXin Li   return i/2;
129*a58d3d2aSXin Li }
130