xref: /aosp_15_r20/external/sonic/sonic.h (revision b290403dc9d28f89f133eb7e190ea8185d440ecd)
1 #ifndef SONIC_H_
2 #define SONIC_H_
3 
4 /* Sonic library
5    Copyright 2010
6    Bill Cox
7    This file is part of the Sonic Library.
8 
9    This file is licensed under the Apache 2.0 license.
10 */
11 
12 /*
13 The Sonic Library implements a new algorithm invented by Bill Cox for the
14 specific purpose of speeding up speech by high factors at high quality.  It
15 generates smooth speech at speed up factors as high as 6X, possibly more.  It is
16 also capable of slowing down speech, and generates high quality results
17 regardless of the speed up or slow down factor.  For speeding up speech by 2X or
18 more, the following equation is used:
19 
20     newSamples = period/(speed - 1.0)
21     scale = 1.0/newSamples;
22 
23 where period is the current pitch period, determined using AMDF or any other
24 pitch estimator, and speed is the speedup factor.  If the current position in
25 the input stream is pointed to by "samples", and the current output stream
26 position is pointed to by "out", then newSamples number of samples can be
27 generated with:
28 
29     out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples;
30 
31 where t = 0 to newSamples - 1.
32 
33 For speed factors < 2X, the PICOLA algorithm is used.  The above
34 algorithm is first used to double the speed of one pitch period.  Then, enough
35 input is directly copied from the input to the output to achieve the desired
36 speed up factor, where 1.0 < speed < 2.0.  The amount of data copied is derived:
37 
38     speed = (2*period + length)/(period + length)
39     speed*length + speed*period = 2*period + length
40     length(speed - 1) = 2*period - speed*period
41     length = period*(2 - speed)/(speed - 1)
42 
43 For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into
44 the output twice, and length of input is copied from the input to the output
45 until the output desired speed is reached.  The length of data copied is:
46 
47     length = period*(speed - 0.5)/(1 - speed)
48 
49 For slow down factors below 0.5, no data is copied, and an algorithm
50 similar to high speed factors is used.
51 */
52 
53 /* Uncomment this to use sin-wav based overlap add which in theory can improve
54    sound quality slightly, at the expense of lots of floating point math. */
55 /* #define SONIC_USE_SIN */
56 
57 #ifdef __cplusplus
58 extern "C" {
59 #endif
60 
61 #ifdef SONIC_INTERNAL
62 /* The following #define's are used to change the names of the routines defined
63  * here so that a new library (i.e. speedy) can reuse these names, and then call
64  * the original names.  We do this for two reasons: 1) we don't want to change
65  * the original API, and 2) we want to add a shim, using the original names and
66  * still call these routines.
67  *
68  * Original users of this API and the libsonic library need to do nothing.  The
69  * original behavior remains.
70  *
71  * A new user that add some additional functionality above this library (a shim)
72  * should #define SONIC_INTERNAL before including this file, undefine all these
73  * symbols and call the sonicIntXXX functions directly.
74  */
75 #define sonicCreateStream sonicIntCreateStream
76 #define sonicDestroyStream sonicIntDestroyStream
77 #define sonicWriteFloatToStream sonicIntWriteFloatToStream
78 #define sonicWriteShortToStream sonicIntWriteShortToStream
79 #define sonicWriteUnsignedCharToStream sonicIntWriteUnsignedCharToStream
80 #define sonicReadFloatFromStream sonicIntReadFloatFromStream
81 #define sonicReadShortFromStream sonicIntReadShortFromStream
82 #define sonicReadUnsignedCharFromStream sonicIntReadUnsignedCharFromStream
83 #define sonicFlushStream sonicIntFlushStream
84 #define sonicSamplesAvailable sonicIntSamplesAvailable
85 #define sonicGetSpeed sonicIntGetSpeed
86 #define sonicSetSpeed sonicIntSetSpeed
87 #define sonicGetPitch sonicIntGetPitch
88 #define sonicSetPitch sonicIntSetPitch
89 #define sonicGetRate sonicIntGetRate
90 #define sonicSetRate sonicIntSetRate
91 #define sonicGetVolume sonicIntGetVolume
92 #define sonicSetVolume sonicIntSetVolume
93 #define sonicGetQuality sonicIntGetQuality
94 #define sonicSetQuality sonicIntSetQuality
95 #define sonicGetSampleRate sonicIntGetSampleRate
96 #define sonicSetSampleRate sonicIntSetSampleRate
97 #define sonicGetNumChannels sonicIntGetNumChannels
98 #define sonicGetUserData sonicIntGetUserData
99 #define sonicSetUserData sonicIntSetUserData
100 #define sonicSetNumChannels sonicIntSetNumChannels
101 #define sonicChangeFloatSpeed sonicIntChangeFloatSpeed
102 #define sonicChangeShortSpeed sonicIntChangeShortSpeed
103 #define sonicEnableNonlinearSpeedup sonicIntEnableNonlinearSpeedup
104 #define sonicSetDurationFeedbackStrength sonicIntSetDurationFeedbackStrength
105 #define sonicComputeSpectrogram sonicIntComputeSpectrogram
106 #define sonicGetSpectrogram sonicIntGetSpectrogram
107 
108 #endif /* SONIC_INTERNAL */
109 
110 /* This specifies the range of voice pitches we try to match.
111    Note that if we go lower than 65, we could overflow in findPitchInRange */
112 #ifndef SONIC_MIN_PITCH
113 #define SONIC_MIN_PITCH 65
114 #endif  /* SONIC_MIN_PITCH */
115 #ifndef SONIC_MAX_PITCH
116 #define SONIC_MAX_PITCH 400
117 #endif  /* SONIC_MAX_PITCH */
118 
119 /* These are used to down-sample some inputs to improve speed */
120 #define SONIC_AMDF_FREQ 4000
121 
122 struct sonicStreamStruct;
123 typedef struct sonicStreamStruct* sonicStream;
124 
125 /* For all of the following functions, numChannels is multiplied by numSamples
126    to determine the actual number of values read or returned. */
127 
128 /* Create a sonic stream.  Return NULL only if we are out of memory and cannot
129   allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */
130 sonicStream sonicCreateStream(int sampleRate, int numChannels);
131 /* Destroy the sonic stream. */
132 void sonicDestroyStream(sonicStream stream);
133 /* Attach user data to the stream. */
134 void sonicSetUserData(sonicStream stream, void *userData);
135 /* Retrieve user data attached to the stream. */
136 void *sonicGetUserData(sonicStream stream);
137 /* Use this to write floating point data to be speed up or down into the stream.
138    Values must be between -1 and 1.  Return 0 if memory realloc failed,
139    otherwise 1 */
140 int sonicWriteFloatToStream(sonicStream stream, const float* samples, int numSamples);
141 /* Use this to write 16-bit data to be speed up or down into the stream.
142    Return 0 if memory realloc failed, otherwise 1 */
143 int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples);
144 /* Use this to write 8-bit unsigned data to be speed up or down into the stream.
145    Return 0 if memory realloc failed, otherwise 1 */
146 int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples,
147                                    int numSamples);
148 /* Use this to read floating point data out of the stream.  Sometimes no data
149    will be available, and zero is returned, which is not an error condition. */
150 int sonicReadFloatFromStream(sonicStream stream, float* samples,
151                              int maxSamples);
152 /* Use this to read 16-bit data out of the stream.  Sometimes no data will
153    be available, and zero is returned, which is not an error condition. */
154 int sonicReadShortFromStream(sonicStream stream, short* samples,
155                              int maxSamples);
156 /* Use this to read 8-bit unsigned data out of the stream.  Sometimes no data
157    will be available, and zero is returned, which is not an error condition. */
158 int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples,
159                                     int maxSamples);
160 /* Force the sonic stream to generate output using whatever data it currently
161    has.  No extra delay will be added to the output, but flushing in the middle
162    of words could introduce distortion. */
163 int sonicFlushStream(sonicStream stream);
164 /* Return the number of samples in the output buffer */
165 int sonicSamplesAvailable(sonicStream stream);
166 /* Get the speed of the stream. */
167 float sonicGetSpeed(sonicStream stream);
168 /* Set the speed of the stream. */
169 void sonicSetSpeed(sonicStream stream, float speed);
170 /* Get the pitch of the stream. */
171 float sonicGetPitch(sonicStream stream);
172 /* Set the pitch of the stream. */
173 void sonicSetPitch(sonicStream stream, float pitch);
174 /* Get the rate of the stream. */
175 float sonicGetRate(sonicStream stream);
176 /* Set the rate of the stream. */
177 void sonicSetRate(sonicStream stream, float rate);
178 /* Get the scaling factor of the stream. */
179 float sonicGetVolume(sonicStream stream);
180 /* Set the scaling factor of the stream. */
181 void sonicSetVolume(sonicStream stream, float volume);
182 /* Chord pitch is DEPRECATED.  AFAIK, it was never used by anyone.  These
183    functions still exist to avoid breaking existing code. */
184 /* Get the chord pitch setting. */
185 int sonicGetChordPitch(sonicStream stream);
186 /* Set chord pitch mode on or off.  Default is off.  See the documentation
187    page for a description of this feature. */
188 void sonicSetChordPitch(sonicStream stream, int useChordPitch);
189 /* Get the quality setting. */
190 int sonicGetQuality(sonicStream stream);
191 /* Set the "quality".  Default 0 is virtually as good as 1, but very much
192  * faster. */
193 void sonicSetQuality(sonicStream stream, int quality);
194 /* Get the sample rate of the stream. */
195 int sonicGetSampleRate(sonicStream stream);
196 /* Set the sample rate of the stream.  This will drop any samples that have not
197  * been read. */
198 void sonicSetSampleRate(sonicStream stream, int sampleRate);
199 /* Get the number of channels. */
200 int sonicGetNumChannels(sonicStream stream);
201 /* Set the number of channels.  This will drop any samples that have not been
202  * read. */
203 void sonicSetNumChannels(sonicStream stream, int numChannels);
204 /* This is a non-stream oriented interface to just change the speed of a sound
205    sample.  It works in-place on the sample array, so there must be at least
206    speed*numSamples available space in the array. Returns the new number of
207    samples. */
208 int sonicChangeFloatSpeed(float* samples, int numSamples, float speed,
209                           float pitch, float rate, float volume,
210                           int useChordPitch, int sampleRate, int numChannels);
211 /* This is a non-stream oriented interface to just change the speed of a sound
212    sample.  It works in-place on the sample array, so there must be at least
213    speed*numSamples available space in the array. Returns the new number of
214    samples. */
215 int sonicChangeShortSpeed(short* samples, int numSamples, float speed,
216                           float pitch, float rate, float volume,
217                           int useChordPitch, int sampleRate, int numChannels);
218 
219 #ifdef SONIC_SPECTROGRAM
220 /*
221 This code generates high quality spectrograms from sound samples, using
222 Time-Aliased-FFTs as described at:
223 
224     https://github.com/waywardgeek/spectrogram
225 
226 Basically, two adjacent pitch periods are overlap-added to create a sound
227 sample that accurately represents the speech sound at that moment in time.
228 This set of samples is converted to a spetral line using an FFT, and the result
229 is saved as a single spectral line at that moment in time.  The resulting
230 spectral lines vary in resolution (it is equal to the number of samples in the
231 pitch period), and the spacing of spectral lines also varies (proportional to
232 the numver of samples in the pitch period).
233 
234 To generate a bitmap, linear interpolation is used to render the grayscale
235 value at any particular point in time and frequency.
236 */
237 
238 #define SONIC_MAX_SPECTRUM_FREQ 5000
239 
240 struct sonicSpectrogramStruct;
241 struct sonicBitmapStruct;
242 typedef struct sonicSpectrogramStruct* sonicSpectrogram;
243 typedef struct sonicBitmapStruct* sonicBitmap;
244 
245 /* sonicBitmap objects represent spectrograms as grayscale bitmaps where each
246    pixel is from 0 (black) to 255 (white).  Bitmaps are rows*cols in size.
247    Rows are indexed top to bottom and columns are indexed left to right */
248 struct sonicBitmapStruct {
249   unsigned char* data;
250   int numRows;
251   int numCols;
252 };
253 
254 typedef struct sonicBitmapStruct* sonicBitmap;
255 
256 /* Enable coomputation of a spectrogram on the fly. */
257 void sonicComputeSpectrogram(sonicStream stream);
258 
259 /* Get the spectrogram. */
260 sonicSpectrogram sonicGetSpectrogram(sonicStream stream);
261 
262 /* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram
263    has been called. */
264 sonicSpectrogram sonicCreateSpectrogram(int sampleRate);
265 
266 /* Destroy the spectrotram.  This is called automatically when calling
267    sonicDestroyStream. */
268 void sonicDestroySpectrogram(sonicSpectrogram spectrogram);
269 
270 /* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */
271 sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram,
272                                             int numRows, int numCols);
273 
274 /* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */
275 void sonicDestroyBitmap(sonicBitmap bitmap);
276 
277 int sonicWritePGM(sonicBitmap bitmap, char* fileName);
278 
279 /* Add two pitch periods worth of samples to the spectrogram.  There must be
280    2*period samples.  Time should advance one pitch period for each call to
281    this function. */
282 void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram,
283                                       short* samples, int numSamples,
284                                       int numChannels);
285 #endif  /* SONIC_SPECTROGRAM */
286 
287 #ifdef __cplusplus
288 }
289 #endif
290 
291 #endif  /* SONIC_H_ */
292