xref: /aosp_15_r20/external/sonic/spectrogram.c (revision b290403dc9d28f89f133eb7e190ea8185d440ecd)
1 /* Sonic library
2    Copyright 2016
3    Bill Cox
4    This file is part of the Sonic Library.
5 
6    This file is licensed under the Apache 2.0 license.
7 */
8 
9 #ifdef  KISS_FFT
10 #include <stddef.h>  /* kiss_fft.h failes to load this */
11 #include <kiss_fft.h>
12 #include <kiss_fft_impl.h>
13 #else
14 #include <fftw3.h>
15 #endif
16 #include <float.h>
17 #include <math.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include "sonic.h"
21 #ifndef M_PI
22 #define M_PI 3.14159265358979323846
23 #endif
24 #ifndef M_E
25 #define M_E 2.7182818284590452354
26 #endif
27 
28 struct sonicSpectrumStruct;
29 typedef struct sonicSpectrumStruct* sonicSpectrum;
30 
31 struct sonicSpectrogramStruct {
32   sonicSpectrum* spectrums;
33   double minPower, maxPower;
34   int numSpectrums;
35   int allocatedSpectrums;
36   int sampleRate;
37   int totalSamples;
38 };
39 
40 struct sonicSpectrumStruct {
41   sonicSpectrogram spectrogram;
42   double* power;
43   int numFreqs; /* Number of frequencies */
44   int numSamples;
45   int startingSample;
46 };
47 
48 /* Print out spectrum data for debugging. */
dumpSpectrum(sonicSpectrum spectrum)49 static void dumpSpectrum(sonicSpectrum spectrum) {
50   printf("spectrum numFreqs:%d numSamples:%d startingSample:%d\n",
51          spectrum->numFreqs, spectrum->numSamples, spectrum->startingSample);
52   printf("   ");
53   int i;
54   for (i = 0; i < spectrum->numFreqs; i++) {
55     printf(" %.1f", spectrum->power[i]);
56   }
57   printf("\n");
58 }
59 
60 /* Print out spectrogram data for debugging. */
dumpSpectrogram(sonicSpectrogram spectrogram)61 void dumpSpectrogram(sonicSpectrogram spectrogram) {
62   printf(
63       "spectrogram minPower:%f maxPower:%f numSpectrums:%d totalSamples:%d\n",
64       spectrogram->minPower, spectrogram->maxPower, spectrogram->numSpectrums,
65       spectrogram->totalSamples);
66   int i;
67   for (i = 0; i < spectrogram->numSpectrums; i++) {
68     dumpSpectrum(spectrogram->spectrums[i]);
69   }
70 }
71 
72 /* Create an new spectrum. */
sonicCreateSpectrum(sonicSpectrogram spectrogram)73 static sonicSpectrum sonicCreateSpectrum(sonicSpectrogram spectrogram) {
74   sonicSpectrum spectrum =
75       (sonicSpectrum)calloc(1, sizeof(struct sonicSpectrumStruct));
76   if (spectrum == NULL) {
77     return NULL;
78   }
79   if (spectrogram->numSpectrums == spectrogram->allocatedSpectrums) {
80     spectrogram->allocatedSpectrums <<= 1;
81     spectrogram->spectrums = (sonicSpectrum*)realloc(
82         spectrogram->spectrums,
83         spectrogram->allocatedSpectrums * sizeof(sonicSpectrum));
84     if (spectrogram->spectrums == NULL) {
85       return NULL;
86     }
87   }
88   spectrogram->spectrums[spectrogram->numSpectrums++] = spectrum;
89   spectrum->spectrogram = spectrogram;
90   return spectrum;
91 }
92 
93 /* Destroy the spectrum. */
sonicDestroySpectrum(sonicSpectrum spectrum)94 static void sonicDestroySpectrum(sonicSpectrum spectrum) {
95   if (spectrum == NULL) {
96     return;
97   }
98   if (spectrum->power != NULL) {
99     free(spectrum->power);
100   }
101   free(spectrum);
102 }
103 
104 /* Create an empty spectrogram. */
sonicCreateSpectrogram(int sampleRate)105 sonicSpectrogram sonicCreateSpectrogram(int sampleRate) {
106   sonicSpectrogram spectrogram =
107       (sonicSpectrogram)calloc(1, sizeof(struct sonicSpectrogramStruct));
108   if (spectrogram == NULL) {
109     return NULL;
110   }
111   spectrogram->allocatedSpectrums = 32;
112   spectrogram->spectrums = (sonicSpectrum*)calloc(
113       spectrogram->allocatedSpectrums, sizeof(sonicSpectrum));
114   if (spectrogram->spectrums == NULL) {
115     sonicDestroySpectrogram(spectrogram);
116     return NULL;
117   }
118   spectrogram->sampleRate = sampleRate;
119   spectrogram->minPower = DBL_MAX;
120   spectrogram->maxPower = DBL_MIN;
121   return spectrogram;
122 }
123 
124 /* Destroy the spectrotram. */
sonicDestroySpectrogram(sonicSpectrogram spectrogram)125 void sonicDestroySpectrogram(sonicSpectrogram spectrogram) {
126   if (spectrogram != NULL) {
127     if (spectrogram->spectrums != NULL) {
128       int i;
129       for (i = 0; i < spectrogram->numSpectrums; i++) {
130         sonicSpectrum spectrum = spectrogram->spectrums[i];
131         sonicDestroySpectrum(spectrum);
132       }
133       free(spectrogram->spectrums);
134     }
135     free(spectrogram);
136   }
137 }
138 
139 /* Create a new bitmap.  This takes ownership of data. */
sonicCreateBitmap(unsigned char * data,int numRows,int numCols)140 sonicBitmap sonicCreateBitmap(unsigned char* data, int numRows, int numCols) {
141   sonicBitmap bitmap = (sonicBitmap)calloc(1, sizeof(struct sonicBitmapStruct));
142   if (bitmap == NULL) {
143     return NULL;
144   }
145   bitmap->data = data;
146   bitmap->numRows = numRows;
147   bitmap->numCols = numCols;
148   return bitmap;
149 }
150 
151 /* Destroy the bitmap. */
sonicDestroyBitmap(sonicBitmap bitmap)152 void sonicDestroyBitmap(sonicBitmap bitmap) {
153   if (bitmap == NULL) {
154     return;
155   }
156   if (bitmap->data != NULL) {
157     free(bitmap->data);
158   }
159   free(bitmap);
160 }
161 
162 /* Overlap-add the two pitch periods using a Hann window.  Caller must free the
163  * result. */
computeOverlapAdd(short * samples,int period,int numChannels,double * ola_samples)164 static void computeOverlapAdd(short* samples, int period, int numChannels,
165                               double* ola_samples) {
166   int i;
167   for (i = 0; i < period; i++) {
168     double weight = (1.0 - cos(M_PI * i / period)) / 2.0;
169     short sample1, sample2;
170     if (numChannels == 1) {
171       sample1 = samples[i];
172       sample2 = samples[i + period];
173     } else {
174       /* Average the samples */
175       int total1 = 0;
176       int total2 = 0;
177       int j;
178       for (j = 0; j < numChannels; j++) {
179         total1 += samples[i * numChannels + j];
180         total2 += samples[(i + period) * numChannels + j];
181       }
182       sample1 = (total1 + (numChannels >> 1)) / numChannels;
183       sample2 = (total2 + (numChannels >> 1)) / numChannels;
184     }
185     ola_samples[i] = weight * sample1 + (1.0 - weight) * sample2;
186   }
187 }
188 
189 #ifdef  KISS_FFT
190 /* Compute the amplitude of the kiss_complex number. */
magnitude(kiss_fft_cpx c)191 static double magnitude(kiss_fft_cpx c) {
192   return sqrt(c.r * c.r + c.i * c.i);
193 }
194 #else
195 /* Compute the amplitude of the fftw_complex number. */
magnitude(fftw_complex c)196 static double magnitude(fftw_complex c) {
197   return sqrt(c[0] * c[0] + c[1] * c[1]);
198 }
199 #endif
200 
201 /* Add two pitch periods worth of samples to the spectrogram.  There must be
202    2*period samples.  Time should advance one pitch period for each call to
203    this function. */
sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram,short * samples,int numSamples,int numChannels)204 void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram,
205                                       short* samples, int numSamples,
206                                       int numChannels) {
207   int i;
208   sonicSpectrum spectrum = sonicCreateSpectrum(spectrogram);
209   spectrum->startingSample = spectrogram->totalSamples;
210   spectrogram->totalSamples += numSamples;
211   /* TODO: convert to fixed-point */
212   double* in = calloc(numSamples, sizeof(double));
213   int numFreqs = numSamples / 2 + 1;
214   spectrum->numFreqs = numFreqs;
215   spectrum->numSamples = numSamples;
216   spectrum->power = (double*)calloc(spectrum->numFreqs, sizeof(double));
217   computeOverlapAdd(samples, numSamples, numChannels, in);
218 #ifdef  KISS_FFT
219   kiss_fft_cpx* cin = calloc(numFreqs, sizeof(kiss_fft_cpx));
220   for (i=0; i<numFreqs; i++) {
221     cin[i].r = in[i];
222   }
223   kiss_fft_cpx* out = calloc(numFreqs, sizeof(kiss_fft_cpx));
224   kiss_fft_cfg kiss_plan = kiss_fft_alloc(numFreqs, 0, NULL, NULL);
225   kiss_fft(kiss_plan, cin, out);
226   free(cin);
227 #else
228   fftw_complex* out = calloc(numFreqs, sizeof(fftw_complex));
229   fftw_plan p = fftw_plan_dft_r2c_1d(numSamples, in, out, FFTW_ESTIMATE);
230   fftw_execute(p);
231   fftw_destroy_plan(p);
232 #endif  /* FFTW */
233   /* Set the DC power to 0. */
234   spectrum->power[0] = 0.0;
235   for (i = 1; i < numFreqs; ++i) {
236     double power = magnitude(out[i]) / numSamples;
237     spectrum->power[i] = power;
238     if (power > spectrogram->maxPower) {
239       spectrogram->maxPower = power;
240     }
241     if (power < spectrogram->minPower) {
242       spectrogram->minPower = power;
243     }
244   }
245   free(in);
246   free(out);
247 }
248 
249 /* Linearly interpolate the power at a given position in the spectrogram. */
interpolateSpectrum(sonicSpectrum spectrum,int row,int numRows)250 static double interpolateSpectrum(sonicSpectrum spectrum, int row,
251                                   int numRows) {
252   /* Flip the row so that we show lowest frequency on the bottom. */
253   row = numRows - row - 1;
254   /* We want the max row to be 1/2 the Niquist frequency, or 4 samples worth. */
255   double spectrumFreqSpacing =
256       (double)spectrum->spectrogram->sampleRate / spectrum->numSamples;
257   double rowFreqSpacing = SONIC_MAX_SPECTRUM_FREQ / (numRows - 1);
258   double targetFreq = row * rowFreqSpacing;
259   int bottomIndex = targetFreq / spectrumFreqSpacing;
260   double bottomPower = spectrum->power[bottomIndex];
261   double topPower = spectrum->power[bottomIndex + 1];
262   double position =
263       (targetFreq - bottomIndex * spectrumFreqSpacing) / spectrumFreqSpacing;
264   return (1.0 - position) * bottomPower + position * topPower;
265 }
266 
267 /* Linearly interpolate the power at a given position in the spectrogram. */
interpolateSpectrogram(sonicSpectrum leftSpectrum,sonicSpectrum rightSpectrum,int row,int numRows,int colTime)268 static double interpolateSpectrogram(sonicSpectrum leftSpectrum,
269                                      sonicSpectrum rightSpectrum, int row,
270                                      int numRows, int colTime) {
271   double leftPower = interpolateSpectrum(leftSpectrum, row, numRows);
272   double rightPower = interpolateSpectrum(rightSpectrum, row, numRows);
273   if (rightSpectrum->startingSample !=
274       leftSpectrum->startingSample + leftSpectrum->numSamples) {
275     fprintf(stderr, "Invalid sample spacing\n");
276     exit(1);
277   }
278   int remainder = colTime - leftSpectrum->startingSample;
279   double position = (double)remainder / leftSpectrum->numSamples;
280   return (1.0 - position) * leftPower + position * rightPower;
281 }
282 
283 /* Add one column of data to the output bitmap data. */
addBitmapCol(unsigned char * data,int col,int numCols,int numRows,sonicSpectrogram spectrogram,sonicSpectrum spectrum,sonicSpectrum nextSpectrum,int colTime)284 static void addBitmapCol(unsigned char* data, int col, int numCols, int numRows,
285                          sonicSpectrogram spectrogram, sonicSpectrum spectrum,
286                          sonicSpectrum nextSpectrum, int colTime) {
287   double minPower = spectrogram->minPower;
288   double maxPower = spectrogram->maxPower;
289   int row;
290   for (row = 0; row < numRows; row++) {
291     double power =
292         interpolateSpectrogram(spectrum, nextSpectrum, row, numRows, colTime);
293     if (power < minPower && power > maxPower) {
294       fprintf(stderr, "Power outside min/max range\n");
295       exit(1);
296     }
297     double range = maxPower - minPower;
298     /* Use log scale such that log(min) = 0, and log(max) = 255. */
299     int value =
300         256.0 * sqrt(sqrt(log((M_E - 1.0) * (power - minPower) / range + 1.0)));
301     /* int value = (unsigned char)(((power - minPower)/range)*256); */
302     if (value >= 256) {
303       value = 255;
304     }
305     data[row * numCols + col] = 255 - value;
306   }
307 }
308 
309 /* Convert the spectrogram to a bitmap.  The returned array must be freed by
310    the caller.  It will be rows*cols in size.  The pixels are written top row
311    to bottom, and each row is left to right.  So, the pixel in the 5th row from
312    the top, in the 18th column from the left in a 32x128 array would be in
313    position 128*4 + 18.  NULL is returned if calloc fails to allocate the
314    memory. */
sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram,int numRows,int numCols)315 sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram,
316                                             int numRows, int numCols) {
317   /* dumpSpectrogram(spectrogram); */
318   unsigned char* data =
319       (unsigned char*)calloc(numRows * numCols, sizeof(unsigned char));
320   if (data == NULL) {
321     return NULL;
322   }
323   int xSpectrum = 0; /* xSpectrum is index of nextSpectrum */
324   sonicSpectrum spectrum = spectrogram->spectrums[xSpectrum++];
325   sonicSpectrum nextSpectrum = spectrogram->spectrums[xSpectrum];
326   int totalTime =
327       spectrogram->spectrums[spectrogram->numSpectrums - 1]->startingSample;
328   int col;
329   for (col = 0; col < numCols; col++) {
330     /* There must be at least two spectrums for this to work right. */
331     double colTime = (double)totalTime * col / (numCols - 1);
332     while (xSpectrum + 1 < spectrogram->numSpectrums &&
333            colTime >= nextSpectrum->startingSample) {
334       spectrum = nextSpectrum;
335       nextSpectrum = spectrogram->spectrums[++xSpectrum];
336     }
337     addBitmapCol(data, col, numCols, numRows, spectrogram, spectrum,
338                  nextSpectrum, colTime);
339   }
340   return sonicCreateBitmap(data, numRows, numCols);
341 }
342 
343 /* Write a PGM image file, which is 8-bit grayscale and looks like:
344     P2
345     # CREATOR: libsonic
346     640 400
347     255
348     ...
349 */
sonicWritePGM(sonicBitmap bitmap,char * fileName)350 int sonicWritePGM(sonicBitmap bitmap, char* fileName) {
351   printf("Writing PGM to %s\n", fileName);
352   FILE* file = fopen(fileName, "w");
353   if (file == NULL) {
354     return 0;
355   }
356   if (fprintf(file, "P2\n# CREATOR: libsonic\n%d %d\n255\n", bitmap->numCols,
357               bitmap->numRows) < 0) {
358     fclose(file);
359     return 0;
360   }
361   int i;
362   int numPixels = bitmap->numRows * bitmap->numCols;
363   unsigned char* p = bitmap->data;
364   for (i = 0; i < numPixels; i++) {
365     if (fprintf(file, "%d\n", 255 - *p++) < 0) {
366       fclose(file);
367       return 0;
368     }
369   }
370   fclose(file);
371   return 1;
372 }
373