xref: /aosp_15_r20/external/libopus/celt/arm/fixed_armv5e.h (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (C) 2007-2009 Xiph.Org Foundation
2*a58d3d2aSXin Li    Copyright (C) 2003-2008 Jean-Marc Valin
3*a58d3d2aSXin Li    Copyright (C) 2007-2008 CSIRO
4*a58d3d2aSXin Li    Copyright (C) 2013      Parrot */
5*a58d3d2aSXin Li /*
6*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
7*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
8*a58d3d2aSXin Li    are met:
9*a58d3d2aSXin Li 
10*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
11*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
12*a58d3d2aSXin Li 
13*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
14*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
15*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
16*a58d3d2aSXin Li 
17*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*a58d3d2aSXin Li */
29*a58d3d2aSXin Li 
30*a58d3d2aSXin Li #ifndef FIXED_ARMv5E_H
31*a58d3d2aSXin Li #define FIXED_ARMv5E_H
32*a58d3d2aSXin Li 
33*a58d3d2aSXin Li #include "fixed_armv4.h"
34*a58d3d2aSXin Li 
35*a58d3d2aSXin Li /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
36*a58d3d2aSXin Li #undef MULT16_32_Q16
MULT16_32_Q16_armv5e(opus_val16 a,opus_val32 b)37*a58d3d2aSXin Li static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
38*a58d3d2aSXin Li {
39*a58d3d2aSXin Li   int res;
40*a58d3d2aSXin Li   __asm__(
41*a58d3d2aSXin Li       "#MULT16_32_Q16\n\t"
42*a58d3d2aSXin Li       "smulwb %0, %1, %2\n\t"
43*a58d3d2aSXin Li       : "=r"(res)
44*a58d3d2aSXin Li       : "r"(b),"r"(a)
45*a58d3d2aSXin Li   );
46*a58d3d2aSXin Li   return res;
47*a58d3d2aSXin Li }
48*a58d3d2aSXin Li #define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b))
49*a58d3d2aSXin Li 
50*a58d3d2aSXin Li 
51*a58d3d2aSXin Li /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
52*a58d3d2aSXin Li #undef MULT16_32_Q15
MULT16_32_Q15_armv5e(opus_val16 a,opus_val32 b)53*a58d3d2aSXin Li static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
54*a58d3d2aSXin Li {
55*a58d3d2aSXin Li   int res;
56*a58d3d2aSXin Li   __asm__(
57*a58d3d2aSXin Li       "#MULT16_32_Q15\n\t"
58*a58d3d2aSXin Li       "smulwb %0, %1, %2\n\t"
59*a58d3d2aSXin Li       : "=r"(res)
60*a58d3d2aSXin Li       : "r"(b), "r"(a)
61*a58d3d2aSXin Li   );
62*a58d3d2aSXin Li   return SHL32(res,1);
63*a58d3d2aSXin Li }
64*a58d3d2aSXin Li #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
65*a58d3d2aSXin Li 
66*a58d3d2aSXin Li 
67*a58d3d2aSXin Li /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
68*a58d3d2aSXin Li     b must fit in 31 bits.
69*a58d3d2aSXin Li     Result fits in 32 bits. */
70*a58d3d2aSXin Li #undef MAC16_32_Q15
MAC16_32_Q15_armv5e(opus_val32 c,opus_val16 a,opus_val32 b)71*a58d3d2aSXin Li static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
72*a58d3d2aSXin Li  opus_val32 b)
73*a58d3d2aSXin Li {
74*a58d3d2aSXin Li   int res;
75*a58d3d2aSXin Li   __asm__(
76*a58d3d2aSXin Li       "#MAC16_32_Q15\n\t"
77*a58d3d2aSXin Li       "smlawb %0, %1, %2, %3;\n"
78*a58d3d2aSXin Li       : "=r"(res)
79*a58d3d2aSXin Li       : "r"(SHL32(b,1)), "r"(a), "r"(c)
80*a58d3d2aSXin Li   );
81*a58d3d2aSXin Li   return res;
82*a58d3d2aSXin Li }
83*a58d3d2aSXin Li #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
84*a58d3d2aSXin Li 
85*a58d3d2aSXin Li /** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
86*a58d3d2aSXin Li     Result fits in 32 bits. */
87*a58d3d2aSXin Li #undef MAC16_32_Q16
MAC16_32_Q16_armv5e(opus_val32 c,opus_val16 a,opus_val32 b)88*a58d3d2aSXin Li static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a,
89*a58d3d2aSXin Li  opus_val32 b)
90*a58d3d2aSXin Li {
91*a58d3d2aSXin Li   int res;
92*a58d3d2aSXin Li   __asm__(
93*a58d3d2aSXin Li       "#MAC16_32_Q16\n\t"
94*a58d3d2aSXin Li       "smlawb %0, %1, %2, %3;\n"
95*a58d3d2aSXin Li       : "=r"(res)
96*a58d3d2aSXin Li       : "r"(b), "r"(a), "r"(c)
97*a58d3d2aSXin Li   );
98*a58d3d2aSXin Li   return res;
99*a58d3d2aSXin Li }
100*a58d3d2aSXin Li #define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b))
101*a58d3d2aSXin Li 
102*a58d3d2aSXin Li /** 16x16 multiply-add where the result fits in 32 bits */
103*a58d3d2aSXin Li #undef MAC16_16
MAC16_16_armv5e(opus_val32 c,opus_val16 a,opus_val16 b)104*a58d3d2aSXin Li static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
105*a58d3d2aSXin Li  opus_val16 b)
106*a58d3d2aSXin Li {
107*a58d3d2aSXin Li   int res;
108*a58d3d2aSXin Li   __asm__(
109*a58d3d2aSXin Li       "#MAC16_16\n\t"
110*a58d3d2aSXin Li       "smlabb %0, %1, %2, %3;\n"
111*a58d3d2aSXin Li       : "=r"(res)
112*a58d3d2aSXin Li       : "r"(a), "r"(b), "r"(c)
113*a58d3d2aSXin Li   );
114*a58d3d2aSXin Li   return res;
115*a58d3d2aSXin Li }
116*a58d3d2aSXin Li #define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b))
117*a58d3d2aSXin Li 
118*a58d3d2aSXin Li /** 16x16 multiplication where the result fits in 32 bits */
119*a58d3d2aSXin Li #undef MULT16_16
MULT16_16_armv5e(opus_val16 a,opus_val16 b)120*a58d3d2aSXin Li static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
121*a58d3d2aSXin Li {
122*a58d3d2aSXin Li   int res;
123*a58d3d2aSXin Li   __asm__(
124*a58d3d2aSXin Li       "#MULT16_16\n\t"
125*a58d3d2aSXin Li       "smulbb %0, %1, %2;\n"
126*a58d3d2aSXin Li       : "=r"(res)
127*a58d3d2aSXin Li       : "r"(a), "r"(b)
128*a58d3d2aSXin Li   );
129*a58d3d2aSXin Li   return res;
130*a58d3d2aSXin Li }
131*a58d3d2aSXin Li #define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
132*a58d3d2aSXin Li 
133*a58d3d2aSXin Li #ifdef OPUS_ARM_INLINE_MEDIA
134*a58d3d2aSXin Li 
135*a58d3d2aSXin Li #undef SIG2WORD16
SIG2WORD16_armv6(opus_val32 x)136*a58d3d2aSXin Li static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x)
137*a58d3d2aSXin Li {
138*a58d3d2aSXin Li    celt_sig res;
139*a58d3d2aSXin Li    __asm__(
140*a58d3d2aSXin Li        "#SIG2WORD16\n\t"
141*a58d3d2aSXin Li        "ssat %0, #16, %1, ASR #12\n\t"
142*a58d3d2aSXin Li        : "=r"(res)
143*a58d3d2aSXin Li        : "r"(x+2048)
144*a58d3d2aSXin Li    );
145*a58d3d2aSXin Li    return EXTRACT16(res);
146*a58d3d2aSXin Li }
147*a58d3d2aSXin Li #define SIG2WORD16(x) (SIG2WORD16_armv6(x))
148*a58d3d2aSXin Li 
149*a58d3d2aSXin Li #endif /* OPUS_ARM_INLINE_MEDIA */
150*a58d3d2aSXin Li 
151*a58d3d2aSXin Li #endif
152