xref: /aosp_15_r20/external/libopus/celt/entcode.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2001-2011 Timothy B. Terriberry
2*a58d3d2aSXin Li */
3*a58d3d2aSXin Li /*
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
29*a58d3d2aSXin Li #include "config.h"
30*a58d3d2aSXin Li #endif
31*a58d3d2aSXin Li 
32*a58d3d2aSXin Li #include "entcode.h"
33*a58d3d2aSXin Li #include "arch.h"
34*a58d3d2aSXin Li 
35*a58d3d2aSXin Li #if !defined(EC_CLZ)
36*a58d3d2aSXin Li /*This is a fallback for systems where we don't know how to access
37*a58d3d2aSXin Li    a BSR or CLZ instruction (see ecintrin.h).
38*a58d3d2aSXin Li   If you are optimizing Opus on a new platform and it has a native CLZ or
39*a58d3d2aSXin Li    BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be
40*a58d3d2aSXin Li    an easy performance win.*/
ec_ilog(opus_uint32 _v)41*a58d3d2aSXin Li int ec_ilog(opus_uint32 _v){
42*a58d3d2aSXin Li   /*On a Pentium M, this branchless version tested as the fastest on
43*a58d3d2aSXin Li      1,000,000,000 random 32-bit integers, edging out a similar version with
44*a58d3d2aSXin Li      branches, and a 256-entry LUT version.*/
45*a58d3d2aSXin Li   int ret;
46*a58d3d2aSXin Li   int m;
47*a58d3d2aSXin Li   ret=!!_v;
48*a58d3d2aSXin Li   m=!!(_v&0xFFFF0000)<<4;
49*a58d3d2aSXin Li   _v>>=m;
50*a58d3d2aSXin Li   ret|=m;
51*a58d3d2aSXin Li   m=!!(_v&0xFF00)<<3;
52*a58d3d2aSXin Li   _v>>=m;
53*a58d3d2aSXin Li   ret|=m;
54*a58d3d2aSXin Li   m=!!(_v&0xF0)<<2;
55*a58d3d2aSXin Li   _v>>=m;
56*a58d3d2aSXin Li   ret|=m;
57*a58d3d2aSXin Li   m=!!(_v&0xC)<<1;
58*a58d3d2aSXin Li   _v>>=m;
59*a58d3d2aSXin Li   ret|=m;
60*a58d3d2aSXin Li   ret+=!!(_v&0x2);
61*a58d3d2aSXin Li   return ret;
62*a58d3d2aSXin Li }
63*a58d3d2aSXin Li #endif
64*a58d3d2aSXin Li 
65*a58d3d2aSXin Li #if 1
66*a58d3d2aSXin Li /* This is a faster version of ec_tell_frac() that takes advantage
67*a58d3d2aSXin Li    of the low (1/8 bit) resolution to use just a linear function
68*a58d3d2aSXin Li    followed by a lookup to determine the exact transition thresholds. */
ec_tell_frac(ec_ctx * _this)69*a58d3d2aSXin Li opus_uint32 ec_tell_frac(ec_ctx *_this){
70*a58d3d2aSXin Li   static const unsigned correction[8] =
71*a58d3d2aSXin Li     {35733, 38967, 42495, 46340,
72*a58d3d2aSXin Li      50535, 55109, 60097, 65535};
73*a58d3d2aSXin Li   opus_uint32 nbits;
74*a58d3d2aSXin Li   opus_uint32 r;
75*a58d3d2aSXin Li   int         l;
76*a58d3d2aSXin Li   unsigned    b;
77*a58d3d2aSXin Li   nbits=_this->nbits_total<<BITRES;
78*a58d3d2aSXin Li   l=EC_ILOG(_this->rng);
79*a58d3d2aSXin Li   r=_this->rng>>(l-16);
80*a58d3d2aSXin Li   b = (r>>12)-8;
81*a58d3d2aSXin Li   b += r>correction[b];
82*a58d3d2aSXin Li   l = (l<<3)+b;
83*a58d3d2aSXin Li   return nbits-l;
84*a58d3d2aSXin Li }
85*a58d3d2aSXin Li #else
ec_tell_frac(ec_ctx * _this)86*a58d3d2aSXin Li opus_uint32 ec_tell_frac(ec_ctx *_this){
87*a58d3d2aSXin Li   opus_uint32 nbits;
88*a58d3d2aSXin Li   opus_uint32 r;
89*a58d3d2aSXin Li   int         l;
90*a58d3d2aSXin Li   int         i;
91*a58d3d2aSXin Li   /*To handle the non-integral number of bits still left in the encoder/decoder
92*a58d3d2aSXin Li      state, we compute the worst-case number of bits of val that must be
93*a58d3d2aSXin Li      encoded to ensure that the value is inside the range for any possible
94*a58d3d2aSXin Li      subsequent bits.
95*a58d3d2aSXin Li     The computation here is independent of val itself (the decoder does not
96*a58d3d2aSXin Li      even track that value), even though the real number of bits used after
97*a58d3d2aSXin Li      ec_enc_done() may be 1 smaller if rng is a power of two and the
98*a58d3d2aSXin Li      corresponding trailing bits of val are all zeros.
99*a58d3d2aSXin Li     If we did try to track that special case, then coding a value with a
100*a58d3d2aSXin Li      probability of 1/(1<<n) might sometimes appear to use more than n bits.
101*a58d3d2aSXin Li     This may help explain the surprising result that a newly initialized
102*a58d3d2aSXin Li      encoder or decoder claims to have used 1 bit.*/
103*a58d3d2aSXin Li   nbits=_this->nbits_total<<BITRES;
104*a58d3d2aSXin Li   l=EC_ILOG(_this->rng);
105*a58d3d2aSXin Li   r=_this->rng>>(l-16);
106*a58d3d2aSXin Li   for(i=BITRES;i-->0;){
107*a58d3d2aSXin Li     int b;
108*a58d3d2aSXin Li     r=r*r>>15;
109*a58d3d2aSXin Li     b=(int)(r>>16);
110*a58d3d2aSXin Li     l=l<<1|b;
111*a58d3d2aSXin Li     r>>=b;
112*a58d3d2aSXin Li   }
113*a58d3d2aSXin Li   return nbits-l;
114*a58d3d2aSXin Li }
115*a58d3d2aSXin Li #endif
116*a58d3d2aSXin Li 
117*a58d3d2aSXin Li #ifdef USE_SMALL_DIV_TABLE
118*a58d3d2aSXin Li /* Result of 2^32/(2*i+1), except for i=0. */
119*a58d3d2aSXin Li const opus_uint32 SMALL_DIV_TABLE[129] = {
120*a58d3d2aSXin Li    0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
121*a58d3d2aSXin Li    0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
122*a58d3d2aSXin Li    0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
123*a58d3d2aSXin Li    0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
124*a58d3d2aSXin Li    0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
125*a58d3d2aSXin Li    0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
126*a58d3d2aSXin Li    0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
127*a58d3d2aSXin Li    0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
128*a58d3d2aSXin Li    0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
129*a58d3d2aSXin Li    0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
130*a58d3d2aSXin Li    0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
131*a58d3d2aSXin Li    0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
132*a58d3d2aSXin Li    0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
133*a58d3d2aSXin Li    0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
134*a58d3d2aSXin Li    0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
135*a58d3d2aSXin Li    0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
136*a58d3d2aSXin Li    0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
137*a58d3d2aSXin Li    0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
138*a58d3d2aSXin Li    0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
139*a58d3d2aSXin Li    0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
140*a58d3d2aSXin Li    0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
141*a58d3d2aSXin Li    0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
142*a58d3d2aSXin Li    0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
143*a58d3d2aSXin Li    0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
144*a58d3d2aSXin Li    0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
145*a58d3d2aSXin Li    0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
146*a58d3d2aSXin Li    0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
147*a58d3d2aSXin Li    0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
148*a58d3d2aSXin Li    0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
149*a58d3d2aSXin Li    0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
150*a58d3d2aSXin Li    0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
151*a58d3d2aSXin Li    0x01073260, 0x0105197F, 0x0103091B, 0x01010101
152*a58d3d2aSXin Li };
153*a58d3d2aSXin Li #endif
154