1 /************************************************************************
2 * Copyright (C) 2002-2009, Xiph.org Foundation
3 * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 * * Neither the names of the Xiph.org Foundation nor Pinknoise
17 * Productions Ltd nor the names of its contributors may be used to
18 * endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ************************************************************************
33
34 function: normalized modified discrete cosine transform
35 power of two length transform only [64 <= n ]
36 last mod: $Id: mdct.c,v 1.9.6.5 2003/04/29 04:03:27 xiphmont Exp $
37
38 Original algorithm adapted long ago from _The use of multirate filter
39 banks for coding of high quality digital audio_, by T. Sporer,
40 K. Brandenburg and B. Edler, collection of the European Signal
41 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
42 211-214
43
44 The below code implements an algorithm that no longer looks much like
45 that presented in the paper, but the basic structure remains if you
46 dig deep enough to see it.
47
48 This module DOES NOT INCLUDE code to generate/apply the window
49 function. Everybody has their own weird favorite including me... I
50 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
51 vehemently disagree.
52
53 ************************************************************************/
54
55 #include "ivorbiscodec.h"
56 #include "os.h"
57 #include "misc.h"
58 #include "mdct.h"
59 #include "mdct_lookup.h"
60
61 #include <stdio.h>
62
63 #if defined(ONLY_C)
presymmetry(DATA_TYPE * in,int n2,int step)64 STIN void presymmetry(DATA_TYPE *in,int n2,int step){
65 DATA_TYPE *aX;
66 DATA_TYPE *bX;
67 LOOKUP_T *T;
68 int n4=n2>>1;
69
70 aX = in+n2-3;
71 T = sincos_lookup0;
72
73 do{
74 REG_TYPE s0= aX[0];
75 REG_TYPE s2= aX[2];
76 XPROD31( s0, s2, T[0], T[1], &aX[0], &aX[2] ); T+=step;
77 aX-=4;
78 }while(aX>=in+n4);
79 do{
80 REG_TYPE s0= aX[0];
81 REG_TYPE s2= aX[2];
82 XPROD31( s0, s2, T[1], T[0], &aX[0], &aX[2] ); T-=step;
83 aX-=4;
84 }while(aX>=in);
85
86 aX = in+n2-4;
87 bX = in;
88 T = sincos_lookup0;
89 do{
90 REG_TYPE ri0= aX[0];
91 REG_TYPE ri2= aX[2];
92 REG_TYPE ro0= bX[0];
93 REG_TYPE ro2= bX[2];
94
95 XNPROD31( ro2, ro0, T[1], T[0], &aX[0], &aX[2] ); T+=step;
96 XNPROD31( ri2, ri0, T[0], T[1], &bX[0], &bX[2] );
97
98 aX-=4;
99 bX+=4;
100 }while(aX>=bX);
101 }
102
103 __attribute__((no_sanitize("signed-integer-overflow")))
104 /* 8 point butterfly (in place) */
mdct_butterfly_8(DATA_TYPE * x)105 STIN void mdct_butterfly_8(DATA_TYPE *x){
106
107 REG_TYPE s0 = x[0] + x[1];
108 REG_TYPE s1 = x[0] - x[1];
109 REG_TYPE s2 = x[2] + x[3];
110 REG_TYPE s3 = x[2] - x[3];
111 REG_TYPE s4 = x[4] + x[5];
112 REG_TYPE s5 = x[4] - x[5];
113 REG_TYPE s6 = x[6] + x[7];
114 REG_TYPE s7 = x[6] - x[7];
115
116 x[0] = s5 + s3;
117 x[1] = s7 - s1;
118 x[2] = s5 - s3;
119 x[3] = s7 + s1;
120 x[4] = s4 - s0;
121 x[5] = s6 - s2;
122 x[6] = s4 + s0;
123 x[7] = s6 + s2;
124 MB();
125 }
126
127 __attribute__((no_sanitize("signed-integer-overflow")))
128 /* 16 point butterfly (in place, 4 register) */
mdct_butterfly_16(DATA_TYPE * x)129 STIN void mdct_butterfly_16(DATA_TYPE *x){
130
131 REG_TYPE s0, s1, s2, s3;
132
133 s0 = x[ 8] - x[ 9]; x[ 8] += x[ 9];
134 s1 = x[10] - x[11]; x[10] += x[11];
135 s2 = x[ 1] - x[ 0]; x[ 9] = x[ 1] + x[0];
136 s3 = x[ 3] - x[ 2]; x[11] = x[ 3] + x[2];
137 x[ 0] = MULT31((s0 - s1) , cPI2_8);
138 x[ 1] = MULT31((s2 + s3) , cPI2_8);
139 x[ 2] = MULT31((s0 + s1) , cPI2_8);
140 x[ 3] = MULT31((s3 - s2) , cPI2_8);
141 MB();
142
143 s2 = x[12] - x[13]; x[12] += x[13];
144 s3 = x[14] - x[15]; x[14] += x[15];
145 s0 = x[ 4] - x[ 5]; x[13] = x[ 5] + x[ 4];
146 s1 = x[ 7] - x[ 6]; x[15] = x[ 7] + x[ 6];
147 x[ 4] = s2; x[ 5] = s1;
148 x[ 6] = s3; x[ 7] = s0;
149 MB();
150
151 mdct_butterfly_8(x);
152 mdct_butterfly_8(x+8);
153 }
154
155 __attribute__((no_sanitize("signed-integer-overflow")))
156 /* 32 point butterfly (in place, 4 register) */
mdct_butterfly_32(DATA_TYPE * x)157 STIN void mdct_butterfly_32(DATA_TYPE *x){
158
159 REG_TYPE s0, s1, s2, s3;
160
161 s0 = x[16] - x[17]; x[16] += x[17];
162 s1 = x[18] - x[19]; x[18] += x[19];
163 s2 = x[ 1] - x[ 0]; x[17] = x[ 1] + x[ 0];
164 s3 = x[ 3] - x[ 2]; x[19] = x[ 3] + x[ 2];
165 XNPROD31( s0, s1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] );
166 XPROD31 ( s2, s3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] );
167 MB();
168
169 s0 = x[20] - x[21]; x[20] += x[21];
170 s1 = x[22] - x[23]; x[22] += x[23];
171 s2 = x[ 5] - x[ 4]; x[21] = x[ 5] + x[ 4];
172 s3 = x[ 7] - x[ 6]; x[23] = x[ 7] + x[ 6];
173 x[ 4] = MULT31((s0 - s1) , cPI2_8);
174 x[ 5] = MULT31((s3 + s2) , cPI2_8);
175 x[ 6] = MULT31((s0 + s1) , cPI2_8);
176 x[ 7] = MULT31((s3 - s2) , cPI2_8);
177 MB();
178
179 s0 = x[24] - x[25]; x[24] += x[25];
180 s1 = x[26] - x[27]; x[26] += x[27];
181 s2 = x[ 9] - x[ 8]; x[25] = x[ 9] + x[ 8];
182 s3 = x[11] - x[10]; x[27] = x[11] + x[10];
183 XNPROD31( s0, s1, cPI1_8, cPI3_8, &x[ 8], &x[10] );
184 XPROD31 ( s2, s3, cPI3_8, cPI1_8, &x[ 9], &x[11] );
185 MB();
186
187 s0 = x[28] - x[29]; x[28] += x[29];
188 s1 = x[30] - x[31]; x[30] += x[31];
189 s2 = x[12] - x[13]; x[29] = x[13] + x[12];
190 s3 = x[15] - x[14]; x[31] = x[15] + x[14];
191 x[12] = s0; x[13] = s3;
192 x[14] = s1; x[15] = s2;
193 MB();
194
195 mdct_butterfly_16(x);
196 mdct_butterfly_16(x+16);
197 }
198
199 /* Ignoring overflows as the butterfly dct function expects and uses the overflows */
200 __attribute__((no_sanitize("signed-integer-overflow")))
201 /* N/stage point generic N stage butterfly (in place, 2 register) */
mdct_butterfly_generic(DATA_TYPE * x,int points,int step)202 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
203 LOOKUP_T *T = sincos_lookup0;
204 DATA_TYPE *x1 = x + points - 4;
205 DATA_TYPE *x2 = x + (points>>1) - 4;
206 REG_TYPE s0, s1, s2, s3;
207
208 do{
209 s0 = x1[0] - x1[1]; x1[0] += x1[1];
210 s1 = x1[3] - x1[2]; x1[2] += x1[3];
211 s2 = x2[1] - x2[0]; x1[1] = x2[1] + x2[0];
212 s3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2];
213 XPROD31( s1, s0, T[0], T[1], &x2[0], &x2[2] );
214 XPROD31( s2, s3, T[0], T[1], &x2[1], &x2[3] ); T+=step;
215 x1-=4;
216 x2-=4;
217 }while(T<sincos_lookup0+1024);
218 x1 = x + (points>>1) + (points>>2) - 4;
219 x2 = x + (points>>2) - 4;
220 T = sincos_lookup0+1024;
221 do{
222 s0 = x1[0] - x1[1]; x1[0] += x1[1];
223 s1 = x1[2] - x1[3]; x1[2] += x1[3];
224 s2 = x2[0] - x2[1]; x1[1] = x2[1] + x2[0];
225 s3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2];
226 XNPROD31( s0, s1, T[0], T[1], &x2[0], &x2[2] );
227 XNPROD31( s3, s2, T[0], T[1], &x2[1], &x2[3] ); T-=step;
228 x1-=4;
229 x2-=4;
230 }while(T>sincos_lookup0);
231 }
232
mdct_butterflies(DATA_TYPE * x,int points,int shift)233 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
234
235 int stages=7-shift;
236 int i,j;
237
238 for(i=0;--stages>=0;i++){
239 for(j=0;j<(1<<i);j++)
240 {
241 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
242 }
243 }
244
245 for(j=0;j<points;j+=32)
246 mdct_butterfly_32(x+j);
247 }
248
249 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
250
bitrev12(int x)251 STIN int bitrev12(int x){
252 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
253 }
254
mdct_bitreverse(DATA_TYPE * x,int n,int shift)255 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int shift){
256 int bit = 0;
257 DATA_TYPE *w = x+(n>>1);
258
259 do{
260 DATA_TYPE b = bitrev12(bit++);
261 DATA_TYPE *xx = x + (b>>shift);
262 REG_TYPE r;
263
264 w -= 2;
265
266 if(w>xx){
267
268 r = xx[0];
269 xx[0] = w[0];
270 w[0] = r;
271
272 r = xx[1];
273 xx[1] = w[1];
274 w[1] = r;
275 }
276 }while(w>x);
277 }
278
279 __attribute__((no_sanitize("signed-integer-overflow")))
mdct_step7(DATA_TYPE * x,int n,int step)280 STIN void mdct_step7(DATA_TYPE *x,int n,int step){
281 DATA_TYPE *w0 = x;
282 DATA_TYPE *w1 = x+(n>>1);
283 LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
284 LOOKUP_T *Ttop = T+1024;
285 REG_TYPE s0, s1, s2, s3;
286
287 do{
288 w1 -= 2;
289
290 s0 = w0[0] + w1[0];
291 s1 = w1[1] - w0[1];
292 s2 = MULT32(s0, T[1]) + MULT32(s1, T[0]);
293 s3 = MULT32(s1, T[1]) - MULT32(s0, T[0]);
294 T+=step;
295
296 s0 = (w0[1] + w1[1])>>1;
297 s1 = (w0[0] - w1[0])>>1;
298 w0[0] = s0 + s2;
299 w0[1] = s1 + s3;
300 w1[0] = s0 - s2;
301 w1[1] = s3 - s1;
302
303 w0 += 2;
304 }while(T<Ttop);
305 do{
306 w1 -= 2;
307
308 s0 = w0[0] + w1[0];
309 s1 = w1[1] - w0[1];
310 T-=step;
311 s2 = MULT32(s0, T[0]) + MULT32(s1, T[1]);
312 s3 = MULT32(s1, T[0]) - MULT32(s0, T[1]);
313
314 s0 = (w0[1] + w1[1])>>1;
315 s1 = (w0[0] - w1[0])>>1;
316 w0[0] = s0 + s2;
317 w0[1] = s1 + s3;
318 w1[0] = s0 - s2;
319 w1[1] = s3 - s1;
320
321 w0 += 2;
322 }while(w0<w1);
323 }
324 #endif
325
326 __attribute__((no_sanitize("signed-integer-overflow")))
mdct_step8(DATA_TYPE * x,int n,int step)327 STIN void mdct_step8(DATA_TYPE *x, int n, int step){
328 LOOKUP_T *T;
329 LOOKUP_T *V;
330 DATA_TYPE *iX =x+(n>>1);
331
332 switch(step) {
333 #if defined(ONLY_C)
334 default:
335 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
336 do{
337 REG_TYPE s0 = x[0];
338 REG_TYPE s1 = -x[1];
339 XPROD31( s0, s1, T[0], T[1], x, x+1); T+=step;
340 x +=2;
341 }while(x<iX);
342 break;
343 #endif
344
345 case 1:
346 {
347 /* linear interpolation between table values: offset=0.5, step=1 */
348 REG_TYPE t0,t1,v0,v1,s0,s1;
349 T = sincos_lookup0;
350 V = sincos_lookup1;
351 t0 = (*T++)>>1;
352 t1 = (*T++)>>1;
353 do{
354 s0 = x[0];
355 s1 = -x[1];
356 t0 += (v0 = (*V++)>>1);
357 t1 += (v1 = (*V++)>>1);
358 XPROD31( s0, s1, t0, t1, x, x+1 );
359
360 s0 = x[2];
361 s1 = -x[3];
362 v0 += (t0 = (*T++)>>1);
363 v1 += (t1 = (*T++)>>1);
364 XPROD31( s0, s1, v0, v1, x+2, x+3 );
365
366 x += 4;
367 }while(x<iX);
368 break;
369 }
370
371 case 0:
372 {
373 /* linear interpolation between table values: offset=0.25, step=0.5 */
374 REG_TYPE t0,t1,v0,v1,q0,q1,s0,s1;
375 T = sincos_lookup0;
376 V = sincos_lookup1;
377 t0 = *T++;
378 t1 = *T++;
379 do{
380
381
382 v0 = *V++;
383 v1 = *V++;
384 t0 += (q0 = (v0-t0)>>2);
385 t1 += (q1 = (v1-t1)>>2);
386 s0 = x[0];
387 s1 = -x[1];
388 XPROD31( s0, s1, t0, t1, x, x+1 );
389 t0 = v0-q0;
390 t1 = v1-q1;
391 s0 = x[2];
392 s1 = -x[3];
393 XPROD31( s0, s1, t0, t1, x+2, x+3 );
394
395 t0 = *T++;
396 t1 = *T++;
397 v0 += (q0 = (t0-v0)>>2);
398 v1 += (q1 = (t1-v1)>>2);
399 s0 = x[4];
400 s1 = -x[5];
401 XPROD31( s0, s1, v0, v1, x+4, x+5 );
402 v0 = t0-q0;
403 v1 = t1-q1;
404 s0 = x[6];
405 s1 = -x[7];
406 XPROD31( s0, s1, v0, v1, x+5, x+6 );
407
408 x+=8;
409 }while(x<iX);
410 break;
411 }
412 }
413 }
414
415 extern int mdct_backwardARM(int n, DATA_TYPE *in);
416
417 /* partial; doesn't perform last-step deinterleave/unrolling. That
418 can be done more efficiently during pcm output */
mdct_backward(int n,DATA_TYPE * in)419 void mdct_backward(int n, DATA_TYPE *in){
420 int step;
421
422 #if defined(ONLY_C)
423 int shift;
424
425 for (shift=4;!(n&(1<<shift));shift++);
426 shift=13-shift;
427 step=2<<shift;
428
429 presymmetry(in,n>>1,step);
430 mdct_butterflies(in,n>>1,shift);
431 mdct_bitreverse(in,n,shift);
432 mdct_step7(in,n,step);
433 mdct_step8(in,n,step>>2);
434 #else
435 step = mdct_backwardARM(n, in);
436 if (step <= 1)
437 mdct_step8(in,n,step);
438 #endif
439 }
440
441 #if defined(ONLY_C)
mdct_shift_right(int n,DATA_TYPE * in,DATA_TYPE * right)442 void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right){
443 int i;
444 n>>=2;
445 in+=1;
446
447 for(i=0;i<n;i++)
448 right[i]=in[i<<1];
449 }
450 #endif
451
452 extern ogg_int16_t *mdct_unroll_prelap(ogg_int16_t *out,
453 DATA_TYPE *post,
454 DATA_TYPE *l,
455 int step);
456 extern ogg_int16_t *mdct_unroll_part2(ogg_int16_t *out,
457 DATA_TYPE *post,
458 DATA_TYPE *l,
459 DATA_TYPE *r,
460 int step,
461 LOOKUP_T *wL,
462 LOOKUP_T *wR);
463 extern ogg_int16_t *mdct_unroll_part3(ogg_int16_t *out,
464 DATA_TYPE *post,
465 DATA_TYPE *l,
466 DATA_TYPE *r,
467 int step,
468 LOOKUP_T *wL,
469 LOOKUP_T *wR);
470 extern ogg_int16_t *mdct_unroll_postlap(ogg_int16_t *out,
471 DATA_TYPE *post,
472 DATA_TYPE *l,
473 int step);
474
mdct_unroll_lap(int n0,int n1,int lW,int W,DATA_TYPE * in,DATA_TYPE * right,LOOKUP_T * w0,LOOKUP_T * w1,ogg_int16_t * out,int step,int start,int end)475 void mdct_unroll_lap(int n0,int n1,
476 int lW,int W,
477 DATA_TYPE *in,
478 DATA_TYPE *right,
479 LOOKUP_T *w0,
480 LOOKUP_T *w1,
481 ogg_int16_t *out,
482 int step,
483 int start, /* samples, this frame */
484 int end /* samples, this frame */){
485
486 DATA_TYPE *l=in+(W&&lW ? n1>>1 : n0>>1);
487 DATA_TYPE *r=right+(lW ? n1>>2 : n0>>2);
488 DATA_TYPE *post;
489 LOOKUP_T *wR=(W && lW ? w1+(n1>>1) : w0+(n0>>1));
490 LOOKUP_T *wL=(W && lW ? w1 : w0 );
491
492 int preLap=(lW && !W ? (n1>>2)-(n0>>2) : 0 );
493 int halfLap=(lW && W ? (n1>>2) : (n0>>2) );
494 int postLap=(!lW && W ? (n1>>2)-(n0>>2) : 0 );
495 int n,off;
496
497 /* preceeding direct-copy lapping from previous frame, if any */
498 if(preLap){
499 n = (end<preLap?end:preLap);
500 off = (start<preLap?start:preLap);
501 post = r-n;
502 r -= off;
503 start -= off;
504 end -= n;
505 #if defined(ONLY_C)
506 while(r>post){
507 *out = CLIP_TO_15((*--r)>>9);
508 out+=step;
509 }
510 #else
511 out = mdct_unroll_prelap(out,post,r,step);
512 n -= off;
513 if (n < 0)
514 n = 0;
515 r -= n;
516 #endif
517 }
518
519 /* cross-lap; two halves due to wrap-around */
520 n = (end<halfLap?end:halfLap);
521 off = (start<halfLap?start:halfLap);
522 post = r-n;
523 r -= off;
524 l -= off*2;
525 start -= off;
526 wR -= off;
527 wL += off;
528 end -= n;
529 #if defined(ONLY_C)
530 while(r>post){
531 l-=2;
532 *out = CLIP_TO_15((MULT31(*--r,*--wR) + MULT31(*l,*wL++))>>9);
533 out+=step;
534 }
535 #else
536 out = mdct_unroll_part2(out, post, l, r, step, wL, wR);
537 n -= off;
538 if (n < 0)
539 n = 0;
540 l -= 2*n;
541 r -= n;
542 wR -= n;
543 wL += n;
544 #endif
545
546 n = (end<halfLap?end:halfLap);
547 off = (start<halfLap?start:halfLap);
548 post = r+n;
549 r += off;
550 l += off*2;
551 start -= off;
552 end -= n;
553 wR -= off;
554 wL += off;
555 #if defined(ONLY_C)
556 while(r<post){
557 *out = CLIP_TO_15((MULT31(*r++,*--wR) - MULT31(*l,*wL++))>>9);
558 out+=step;
559 l+=2;
560 }
561 #else
562 out = mdct_unroll_part3(out, post, l, r, step, wL, wR);
563 n -= off;
564 if (n < 0)
565 n = 0;
566 l += 2*n;
567 r += n;
568 wR -= n;
569 wL += n;
570 #endif
571
572 /* preceeding direct-copy lapping from previous frame, if any */
573 if(postLap){
574 n = (end<postLap?end:postLap);
575 off = (start<postLap?start:postLap);
576 post = l+n*2;
577 l += off*2;
578 #if defined(ONLY_C)
579 while(l<post){
580 *out = CLIP_TO_15((-*l)>>9);
581 out+=step;
582 l+=2;
583 }
584 #else
585 out = mdct_unroll_postlap(out,post,l,step);
586 #endif
587 }
588 }
589
590