diff options
Diffstat (limited to 'sine.c')
| -rw-r--r-- | sine.c | 591 |
1 files changed, 286 insertions, 305 deletions
| @@ -27,64 +27,66 @@ | |||
| 27 | 27 | ||
| 28 | /*---------------------------------------------------------------------------*\ | 28 | /*---------------------------------------------------------------------------*\ |
| 29 | 29 | ||
| 30 | INCLUDES | 30 | INCLUDES |
| 31 | 31 | ||
| 32 | \*---------------------------------------------------------------------------*/ | 32 | \*---------------------------------------------------------------------------*/ |
| 33 | 33 | ||
| 34 | #include <stdlib.h> | 34 | #include "sine.h" |
| 35 | #include <stdio.h> | 35 | |
| 36 | #include <math.h> | 36 | #include <math.h> |
| 37 | #include <stdio.h> | ||
| 38 | #include <stdlib.h> | ||
| 37 | 39 | ||
| 38 | #include "defines.h" | 40 | #include "defines.h" |
| 39 | #include "sine.h" | ||
| 40 | #include "kiss_fft.h" | 41 | #include "kiss_fft.h" |
| 41 | 42 | ||
| 42 | #define HPF_BETA 0.125 | 43 | #define HPF_BETA 0.125 |
| 43 | 44 | ||
| 44 | /*---------------------------------------------------------------------------*\ | 45 | /*---------------------------------------------------------------------------*\ |
| 45 | 46 | ||
| 46 | HEADERS | 47 | HEADERS |
| 47 | 48 | ||
| 48 | \*---------------------------------------------------------------------------*/ | 49 | \*---------------------------------------------------------------------------*/ |
| 49 | 50 | ||
| 50 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, | 51 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, |
| 51 | float pstep); | 52 | float pstep); |
| 52 | 53 | ||
| 53 | /*---------------------------------------------------------------------------*\ | 54 | /*---------------------------------------------------------------------------*\ |
| 54 | 55 | ||
| 55 | FUNCTIONS | 56 | FUNCTIONS |
| 56 | 57 | ||
| 57 | \*---------------------------------------------------------------------------*/ | 58 | \*---------------------------------------------------------------------------*/ |
| 58 | 59 | ||
| 59 | C2CONST c2const_create(int Fs, float framelength_s) { | 60 | C2CONST c2const_create(int Fs, float framelength_s) { |
| 60 | C2CONST c2const; | 61 | C2CONST c2const; |
| 61 | 62 | ||
| 62 | assert((Fs == 8000) || (Fs = 16000)); | 63 | assert((Fs == 8000) || (Fs == 16000)); |
| 63 | c2const.Fs = Fs; | 64 | c2const.Fs = Fs; |
| 64 | c2const.n_samp = round(Fs*framelength_s); | 65 | c2const.n_samp = round(Fs * framelength_s); |
| 65 | c2const.max_amp = floor(Fs*P_MIN_S/2); | 66 | c2const.max_amp = floor(Fs * P_MAX_S / 2); |
| 66 | c2const.p_min = floor(Fs*P_MIN_S); | 67 | c2const.p_min = floor(Fs * P_MIN_S); |
| 67 | c2const.p_max = floor(Fs*P_MAX_S); | 68 | c2const.p_max = floor(Fs * P_MAX_S); |
| 68 | c2const.m_pitch = floor(Fs*M_PITCH_S); | 69 | c2const.m_pitch = floor(Fs * M_PITCH_S); |
| 69 | c2const.Wo_min = TWO_PI/c2const.p_max; | 70 | c2const.Wo_min = TWO_PI / c2const.p_max; |
| 70 | c2const.Wo_max = TWO_PI/c2const.p_min; | 71 | c2const.Wo_max = TWO_PI / c2const.p_min; |
| 71 | 72 | ||
| 72 | if (Fs == 8000) { | 73 | if (Fs == 8000) { |
| 73 | c2const.nw = 279; | 74 | c2const.nw = 279; |
| 74 | } else { | 75 | } else { |
| 75 | c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */ | 76 | c2const.nw = 511; /* actually a bit shorter in time but lets us maintain |
| 76 | } | 77 | constant FFT size */ |
| 78 | } | ||
| 77 | 79 | ||
| 78 | c2const.tw = Fs*TW_S; | 80 | c2const.tw = Fs * TW_S; |
| 79 | 81 | ||
| 80 | /* | 82 | /* |
| 81 | fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch); | 83 | fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch); |
| 82 | fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max); | 84 | fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max); |
| 83 | fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max); | 85 | fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max); |
| 84 | fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw); | 86 | fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw); |
| 85 | */ | 87 | */ |
| 86 | 88 | ||
| 87 | return c2const; | 89 | return c2const; |
| 88 | } | 90 | } |
| 89 | 91 | ||
| 90 | /*---------------------------------------------------------------------------*\ | 92 | /*---------------------------------------------------------------------------*\ |
| @@ -97,13 +99,13 @@ C2CONST c2const_create(int Fs, float framelength_s) { | |||
| 97 | 99 | ||
| 98 | \*---------------------------------------------------------------------------*/ | 100 | \*---------------------------------------------------------------------------*/ |
| 99 | 101 | ||
| 100 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], float W[]) | 102 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, |
| 101 | { | 103 | float w[], float W[]) { |
| 102 | float m; | 104 | float m; |
| 103 | COMP wshift[FFT_ENC]; | 105 | COMP wshift[FFT_ENC]; |
| 104 | int i,j; | 106 | int i, j; |
| 105 | int m_pitch = c2const->m_pitch; | 107 | int m_pitch = c2const->m_pitch; |
| 106 | int nw = c2const->nw; | 108 | int nw = c2const->nw; |
| 107 | 109 | ||
| 108 | /* | 110 | /* |
| 109 | Generate Hamming window centered on M-sample pitch analysis window | 111 | Generate Hamming window centered on M-sample pitch analysis window |
| @@ -117,20 +119,18 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
| 117 | */ | 119 | */ |
| 118 | 120 | ||
| 119 | m = 0.0; | 121 | m = 0.0; |
| 120 | for(i=0; i<m_pitch/2-nw/2; i++) | 122 | for (i = 0; i < m_pitch / 2 - nw / 2; i++) w[i] = 0.0; |
| 121 | w[i] = 0.0; | 123 | for (i = m_pitch / 2 - nw / 2, j = 0; i < m_pitch / 2 + nw / 2; i++, j++) { |
| 122 | for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++) { | 124 | w[i] = 0.5 - 0.5 * cosf(TWO_PI * j / (nw - 1)); |
| 123 | w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(nw-1)); | 125 | m += w[i] * w[i]; |
| 124 | m += w[i]*w[i]; | ||
| 125 | } | 126 | } |
| 126 | for(i=m_pitch/2+nw/2; i<m_pitch; i++) | 127 | for (i = m_pitch / 2 + nw / 2; i < m_pitch; i++) w[i] = 0.0; |
| 127 | w[i] = 0.0; | ||
| 128 | 128 | ||
| 129 | /* Normalise - makes freq domain amplitude estimation straight | 129 | /* Normalise - makes freq domain amplitude estimation straight |
| 130 | forward */ | 130 | forward */ |
| 131 | 131 | ||
| 132 | m = 1.0/sqrtf(m*FFT_ENC); | 132 | m = 1.0 / sqrtf(m * FFT_ENC); |
| 133 | for(i=0; i<m_pitch; i++) { | 133 | for (i = 0; i < m_pitch; i++) { |
| 134 | w[i] *= m; | 134 | w[i] *= m; |
| 135 | } | 135 | } |
| 136 | 136 | ||
| @@ -157,14 +157,13 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
| 157 | 157 | ||
| 158 | COMP temp[FFT_ENC]; | 158 | COMP temp[FFT_ENC]; |
| 159 | 159 | ||
| 160 | for(i=0; i<FFT_ENC; i++) { | 160 | for (i = 0; i < FFT_ENC; i++) { |
| 161 | wshift[i].real = 0.0; | 161 | wshift[i].real = 0.0; |
| 162 | wshift[i].imag = 0.0; | 162 | wshift[i].imag = 0.0; |
| 163 | } | 163 | } |
| 164 | for(i=0; i<nw/2; i++) | 164 | for (i = 0; i < nw / 2; i++) wshift[i].real = w[i + m_pitch / 2]; |
| 165 | wshift[i].real = w[i+m_pitch/2]; | 165 | for (i = FFT_ENC - nw / 2, j = m_pitch / 2 - nw / 2; i < FFT_ENC; i++, j++) |
| 166 | for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++) | 166 | wshift[i].real = w[j]; |
| 167 | wshift[i].real = w[j]; | ||
| 168 | 167 | ||
| 169 | codec2_fft(fft_fwd_cfg, wshift, temp); | 168 | codec2_fft(fft_fwd_cfg, wshift, temp); |
| 170 | 169 | ||
| @@ -191,12 +190,10 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
| 191 | 190 | ||
| 192 | */ | 191 | */ |
| 193 | 192 | ||
| 194 | 193 | for (i = 0; i < FFT_ENC / 2; i++) { | |
| 195 | for(i=0; i<FFT_ENC/2; i++) { | 194 | W[i] = temp[i + FFT_ENC / 2].real; |
| 196 | W[i] = temp[i + FFT_ENC / 2].real; | 195 | W[i + FFT_ENC / 2] = temp[i].real; |
| 197 | W[i + FFT_ENC / 2] = temp[i].real; | ||
| 198 | } | 196 | } |
| 199 | |||
| 200 | } | 197 | } |
| 201 | 198 | ||
| 202 | /*---------------------------------------------------------------------------*\ | 199 | /*---------------------------------------------------------------------------*\ |
| @@ -211,12 +208,11 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
| 211 | 208 | ||
| 212 | \*---------------------------------------------------------------------------*/ | 209 | \*---------------------------------------------------------------------------*/ |
| 213 | 210 | ||
| 214 | float hpf(float x, float states[]) | 211 | float hpf(float x, float states[]) { |
| 215 | { | 212 | states[0] = -HPF_BETA * states[0] + x - states[1]; |
| 216 | states[0] = -HPF_BETA*states[0] + x - states[1]; | 213 | states[1] = x; |
| 217 | states[1] = x; | ||
| 218 | 214 | ||
| 219 | return states[0]; | 215 | return states[0]; |
| 220 | } | 216 | } |
| 221 | 217 | ||
| 222 | /*---------------------------------------------------------------------------*\ | 218 | /*---------------------------------------------------------------------------*\ |
| @@ -230,41 +226,43 @@ float hpf(float x, float states[]) | |||
| 230 | \*---------------------------------------------------------------------------*/ | 226 | \*---------------------------------------------------------------------------*/ |
| 231 | 227 | ||
| 232 | // TODO: we can either go for a faster FFT using fftr and some stack usage | 228 | // TODO: we can either go for a faster FFT using fftr and some stack usage |
| 233 | // or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace | 229 | // or we can reduce stack usage to almost zero on STM32 by switching to |
| 230 | // fft_inplace | ||
| 234 | #if 1 | 231 | #if 1 |
| 235 | void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]) | 232 | void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], |
| 236 | { | 233 | float Sn[], float w[]) { |
| 237 | int i; | 234 | int i; |
| 238 | int m_pitch = c2const->m_pitch; | 235 | int m_pitch = c2const->m_pitch; |
| 239 | int nw = c2const->nw; | 236 | int nw = c2const->nw; |
| 240 | 237 | ||
| 241 | for(i=0; i<FFT_ENC; i++) { | 238 | for (i = 0; i < FFT_ENC; i++) { |
| 242 | Sw[i].real = 0.0; | 239 | Sw[i].real = 0.0; |
| 243 | Sw[i].imag = 0.0; | 240 | Sw[i].imag = 0.0; |
| 244 | } | 241 | } |
| 245 | 242 | ||
| 246 | /* Centre analysis window on time axis, we need to arrange input | 243 | /* Centre analysis window on time axis, we need to arrange input |
| 247 | to FFT this way to make FFT phases correct */ | 244 | to FFT this way to make FFT phases correct */ |
| 248 | 245 | ||
| 249 | /* move 2nd half to start of FFT input vector */ | 246 | /* move 2nd half to start of FFT input vector */ |
| 250 | 247 | ||
| 251 | for(i=0; i<nw/2; i++) | 248 | for (i = 0; i < nw / 2; i++) |
| 252 | Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2]; | 249 | Sw[i].real = Sn[i + m_pitch / 2] * w[i + m_pitch / 2]; |
| 253 | 250 | ||
| 254 | /* move 1st half to end of FFT input vector */ | 251 | /* move 1st half to end of FFT input vector */ |
| 255 | 252 | ||
| 256 | for(i=0; i<nw/2; i++) | 253 | for (i = 0; i < nw / 2; i++) |
| 257 | Sw[FFT_ENC-nw/2+i].real = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; | 254 | Sw[FFT_ENC - nw / 2 + i].real = |
| 255 | Sn[i + m_pitch / 2 - nw / 2] * w[i + m_pitch / 2 - nw / 2]; | ||
| 258 | 256 | ||
| 259 | codec2_fft_inplace(fft_fwd_cfg, Sw); | 257 | codec2_fft_inplace(fft_fwd_cfg, Sw); |
| 260 | } | 258 | } |
| 261 | #else | 259 | #else |
| 262 | void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[]) | 260 | void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], |
| 263 | { | 261 | float w[]) { |
| 264 | int i; | 262 | int i; |
| 265 | float sw[FFT_ENC]; | 263 | float sw[FFT_ENC]; |
| 266 | 264 | ||
| 267 | for(i=0; i<FFT_ENC; i++) { | 265 | for (i = 0; i < FFT_ENC; i++) { |
| 268 | sw[i] = 0.0; | 266 | sw[i] = 0.0; |
| 269 | } | 267 | } |
| 270 | 268 | ||
| @@ -273,19 +271,18 @@ void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[]) | |||
| 273 | 271 | ||
| 274 | /* move 2nd half to start of FFT input vector */ | 272 | /* move 2nd half to start of FFT input vector */ |
| 275 | 273 | ||
| 276 | for(i=0; i<nw/2; i++) | 274 | for (i = 0; i < nw / 2; i++) sw[i] = Sn[i + m_pitch / 2] * w[i + m_pitch / 2]; |
| 277 | sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2]; | ||
| 278 | 275 | ||
| 279 | /* move 1st half to end of FFT input vector */ | 276 | /* move 1st half to end of FFT input vector */ |
| 280 | 277 | ||
| 281 | for(i=0; i<nw/2; i++) | 278 | for (i = 0; i < nw / 2; i++) |
| 282 | sw[FFT_ENC-nw/2+i] = Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]; | 279 | sw[FFT_ENC - nw / 2 + i] = |
| 280 | Sn[i + m_pitch / 2 - nw / 2] * w[i + m_pitch / 2 - nw / 2]; | ||
| 283 | 281 | ||
| 284 | codec2_fftr(fftr_fwd_cfg, sw, Sw); | 282 | codec2_fftr(fftr_fwd_cfg, sw, Sw); |
| 285 | } | 283 | } |
| 286 | #endif | 284 | #endif |
| 287 | 285 | ||
| 288 | |||
| 289 | /*---------------------------------------------------------------------------*\ | 286 | /*---------------------------------------------------------------------------*\ |
| 290 | 287 | ||
| 291 | FUNCTION....: two_stage_pitch_refinement | 288 | FUNCTION....: two_stage_pitch_refinement |
| @@ -297,38 +294,35 @@ void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[]) | |||
| 297 | 294 | ||
| 298 | \*---------------------------------------------------------------------------*/ | 295 | \*---------------------------------------------------------------------------*/ |
| 299 | 296 | ||
| 300 | void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) | 297 | void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) { |
| 301 | { | 298 | float pmin, pmax, pstep; /* pitch refinement minimum, maximum and step */ |
| 302 | float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */ | ||
| 303 | 299 | ||
| 304 | /* Coarse refinement */ | 300 | /* Coarse refinement */ |
| 305 | 301 | ||
| 306 | pmax = TWO_PI/model->Wo + 5; | 302 | pmax = TWO_PI / model->Wo + 5; |
| 307 | pmin = TWO_PI/model->Wo - 5; | 303 | pmin = TWO_PI / model->Wo - 5; |
| 308 | pstep = 1.0; | 304 | pstep = 1.0; |
| 309 | hs_pitch_refinement(model,Sw,pmin,pmax,pstep); | 305 | hs_pitch_refinement(model, Sw, pmin, pmax, pstep); |
| 310 | 306 | ||
| 311 | /* Fine refinement */ | 307 | /* Fine refinement */ |
| 312 | 308 | ||
| 313 | pmax = TWO_PI/model->Wo + 1; | 309 | pmax = TWO_PI / model->Wo + 1; |
| 314 | pmin = TWO_PI/model->Wo - 1; | 310 | pmin = TWO_PI / model->Wo - 1; |
| 315 | pstep = 0.25; | 311 | pstep = 0.25; |
| 316 | hs_pitch_refinement(model,Sw,pmin,pmax,pstep); | 312 | hs_pitch_refinement(model, Sw, pmin, pmax, pstep); |
| 317 | 313 | ||
| 318 | /* Limit range */ | 314 | /* Limit range */ |
| 319 | 315 | ||
| 320 | if (model->Wo < TWO_PI/c2const->p_max) | 316 | if (model->Wo < TWO_PI / c2const->p_max) model->Wo = TWO_PI / c2const->p_max; |
| 321 | model->Wo = TWO_PI/c2const->p_max; | 317 | if (model->Wo > TWO_PI / c2const->p_min) model->Wo = TWO_PI / c2const->p_min; |
| 322 | if (model->Wo > TWO_PI/c2const->p_min) | ||
| 323 | model->Wo = TWO_PI/c2const->p_min; | ||
| 324 | 318 | ||
| 325 | model->L = floorf(PI/model->Wo); | 319 | model->L = floorf(PI / model->Wo); |
| 326 | 320 | ||
| 327 | /* trap occasional round off issues with floorf() */ | 321 | /* trap occasional round off issues with floorf() */ |
| 328 | if (model->Wo*model->L >= 0.95*PI) { | 322 | if (model->Wo * model->L >= 0.95 * PI) { |
| 329 | model->L--; | 323 | model->L--; |
| 330 | } | 324 | } |
| 331 | assert(model->Wo*model->L < PI); | 325 | assert(model->Wo * model->L < PI); |
| 332 | } | 326 | } |
| 333 | 327 | ||
| 334 | /*---------------------------------------------------------------------------*\ | 328 | /*---------------------------------------------------------------------------*\ |
| @@ -348,35 +342,39 @@ void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]) | |||
| 348 | 342 | ||
| 349 | \*---------------------------------------------------------------------------*/ | 343 | \*---------------------------------------------------------------------------*/ |
| 350 | 344 | ||
| 351 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep) | 345 | void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, |
| 352 | { | 346 | float pstep) { |
| 353 | int m; /* loop variable */ | 347 | int m; /* loop variable */ |
| 354 | int b; /* bin for current harmonic centre */ | 348 | int b; /* bin for current harmonic centre */ |
| 355 | float E; /* energy for current pitch*/ | 349 | float E; /* energy for current pitch*/ |
| 356 | float Wo; /* current "test" fundamental freq. */ | 350 | float Wo; /* current "test" fundamental freq. */ |
| 357 | float Wom; /* Wo that maximises E */ | 351 | float Wom; /* Wo that maximises E */ |
| 358 | float Em; /* mamimum energy */ | 352 | float Em; /* mamimum energy */ |
| 359 | float r, one_on_r; /* number of rads/bin */ | 353 | float r, one_on_r; /* number of rads/bin */ |
| 360 | float p; /* current pitch */ | 354 | float p; /* current pitch */ |
| 361 | 355 | ||
| 362 | /* Initialisation */ | 356 | /* Initialisation */ |
| 363 | 357 | ||
| 364 | model->L = PI/model->Wo; /* use initial pitch est. for L */ | 358 | model->L = PI / model->Wo; /* use initial pitch est. for L */ |
| 365 | Wom = model->Wo; | 359 | Wom = model->Wo; |
| 366 | Em = 0.0; | 360 | Em = 0.0; |
| 367 | r = TWO_PI/FFT_ENC; | 361 | r = TWO_PI / FFT_ENC; |
| 368 | one_on_r = 1.0/r; | 362 | one_on_r = 1.0 / r; |
| 369 | 363 | ||
| 370 | /* Determine harmonic sum for a range of Wo values */ | 364 | /* Determine harmonic sum for a range of Wo values */ |
| 371 | 365 | ||
| 372 | for(p=pmin; p<=pmax; p+=pstep) { | 366 | for (p = pmin; p <= pmax; p += pstep) { |
| 373 | E = 0.0; | 367 | E = 0.0; |
| 374 | Wo = TWO_PI/p; | 368 | Wo = TWO_PI / p; |
| 369 | |||
| 370 | float bFloat = Wo * one_on_r; | ||
| 371 | float currentBFloat = bFloat; | ||
| 375 | 372 | ||
| 376 | /* Sum harmonic magnitudes */ | 373 | /* Sum harmonic magnitudes */ |
| 377 | for(m=1; m<=model->L; m++) { | 374 | for (m = 1; m <= model->L; m++) { |
| 378 | b = (int)(m*Wo*one_on_r + 0.5); | 375 | b = (int)(currentBFloat + 0.5); |
| 379 | E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag; | 376 | E += Sw[b].real * Sw[b].real + Sw[b].imag * Sw[b].imag; |
| 377 | currentBFloat += bFloat; | ||
| 380 | } | 378 | } |
| 381 | /* Compare to see if this is a maximum */ | 379 | /* Compare to see if this is a maximum */ |
| 382 | 380 | ||
| @@ -399,35 +397,35 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float | |||
| 399 | 397 | ||
| 400 | \*---------------------------------------------------------------------------*/ | 398 | \*---------------------------------------------------------------------------*/ |
| 401 | 399 | ||
| 402 | void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase) | 400 | void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase) { |
| 403 | { | 401 | int i, m; /* loop variables */ |
| 404 | int i,m; /* loop variables */ | 402 | int am, bm; /* bounds of current harmonic */ |
| 405 | int am,bm; /* bounds of current harmonic */ | 403 | float den; /* denominator of amplitude expression */ |
| 406 | float den; /* denominator of amplitude expression */ | ||
| 407 | 404 | ||
| 408 | float r = TWO_PI/FFT_ENC; | 405 | float r = TWO_PI / FFT_ENC; |
| 409 | float one_on_r = 1.0/r; | 406 | float one_on_r = 1.0 / r; |
| 410 | 407 | ||
| 411 | for(m=1; m<=model->L; m++) { | 408 | for (m = 1; m <= model->L; m++) { |
| 412 | /* Estimate ampltude of harmonic */ | 409 | /* Estimate ampltude of harmonic */ |
| 413 | 410 | ||
| 414 | den = 0.0; | 411 | den = 0.0; |
| 415 | am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5); | 412 | am = (int)((m - 0.5) * model->Wo * one_on_r + 0.5); |
| 416 | bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5); | 413 | bm = (int)((m + 0.5) * model->Wo * one_on_r + 0.5); |
| 417 | 414 | ||
| 418 | for(i=am; i<bm; i++) { | 415 | for (i = am; i < bm; i++) { |
| 419 | den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag; | 416 | den += Sw[i].real * Sw[i].real + Sw[i].imag * Sw[i].imag; |
| 420 | } | 417 | } |
| 421 | 418 | ||
| 422 | model->A[m] = sqrtf(den); | 419 | model->A[m] = sqrtf(den); |
| 423 | 420 | ||
| 424 | if (est_phase) { | 421 | if (est_phase) { |
| 425 | int b = (int)(m*model->Wo/r + 0.5); /* DFT bin of centre of current harmonic */ | 422 | int b = (int)(m * model->Wo / r + |
| 423 | 0.5); /* DFT bin of centre of current harmonic */ | ||
| 426 | 424 | ||
| 427 | /* Estimate phase of harmonic, this is expensive in CPU for | 425 | /* Estimate phase of harmonic, this is expensive in CPU for |
| 428 | embedded devicesso we make it an option */ | 426 | embedded devicesso we make it an option */ |
| 429 | 427 | ||
| 430 | model->phi[m] = atan2f(Sw[b].imag,Sw[b].real); | 428 | model->phi[m] = atan2f(Sw[b].imag, Sw[b].real); |
| 431 | } | 429 | } |
| 432 | } | 430 | } |
| 433 | } | 431 | } |
| @@ -443,119 +441,110 @@ void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase) | |||
| 443 | 441 | ||
| 444 | \*---------------------------------------------------------------------------*/ | 442 | \*---------------------------------------------------------------------------*/ |
| 445 | 443 | ||
| 446 | float est_voicing_mbe( | 444 | float est_voicing_mbe(C2CONST *c2const, MODEL *model, COMP Sw[], float W[]) { |
| 447 | C2CONST *c2const, | 445 | int l, al, bl, m; /* loop variables */ |
| 448 | MODEL *model, | 446 | COMP Am; /* amplitude sample for this band */ |
| 449 | COMP Sw[], | 447 | int offset; /* centers Hw[] about current harmonic */ |
| 450 | float W[] | 448 | float den; /* denominator of Am expression */ |
| 451 | ) | 449 | float error; /* accumulated error between original and synthesised */ |
| 452 | { | 450 | float Wo; |
| 453 | int l,al,bl,m; /* loop variables */ | 451 | float sig, snr; |
| 454 | COMP Am; /* amplitude sample for this band */ | 452 | float elow, ehigh, eratio; |
| 455 | int offset; /* centers Hw[] about current harmonic */ | 453 | float sixty; |
| 456 | float den; /* denominator of Am expression */ | 454 | COMP Ew; |
| 457 | float error; /* accumulated error between original and synthesised */ | 455 | Ew.real = 0; |
| 458 | float Wo; | 456 | Ew.imag = 0; |
| 459 | float sig, snr; | 457 | |
| 460 | float elow, ehigh, eratio; | 458 | int l_1000hz = model->L * 1000.0 / (c2const->Fs / 2); |
| 461 | float sixty; | 459 | sig = 1E-4; |
| 462 | COMP Ew; | 460 | for (l = 1; l <= l_1000hz; l++) { |
| 463 | Ew.real = 0; | 461 | sig += model->A[l] * model->A[l]; |
| 464 | Ew.imag = 0; | 462 | } |
| 465 | |||
| 466 | int l_1000hz = model->L*1000.0/(c2const->Fs/2); | ||
| 467 | sig = 1E-4; | ||
| 468 | for(l=1; l<=l_1000hz; l++) { | ||
| 469 | sig += model->A[l]*model->A[l]; | ||
| 470 | } | ||
| 471 | 463 | ||
| 472 | Wo = model->Wo; | 464 | Wo = model->Wo; |
| 473 | error = 1E-4; | 465 | error = 1E-4; |
| 474 | 466 | ||
| 475 | /* Just test across the harmonics in the first 1000 Hz */ | 467 | /* Just test across the harmonics in the first 1000 Hz */ |
| 476 | 468 | ||
| 477 | for(l=1; l<=l_1000hz; l++) { | 469 | for (l = 1; l <= l_1000hz; l++) { |
| 478 | Am.real = 0.0; | 470 | Am.real = 0.0; |
| 479 | Am.imag = 0.0; | 471 | Am.imag = 0.0; |
| 480 | den = 0.0; | 472 | den = 0.0; |
| 481 | al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI); | 473 | al = ceilf((l - 0.5) * Wo * FFT_ENC / TWO_PI); |
| 482 | bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI); | 474 | bl = ceilf((l + 0.5) * Wo * FFT_ENC / TWO_PI); |
| 483 | 475 | ||
| 484 | /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ | 476 | /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ |
| 485 | 477 | ||
| 486 | offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; | 478 | offset = FFT_ENC / 2 - l * Wo * FFT_ENC / TWO_PI + 0.5; |
| 487 | for(m=al; m<bl; m++) { | 479 | for (m = al; m < bl; m++) { |
| 488 | Am.real += Sw[m].real*W[offset+m]; | 480 | Am.real += Sw[m].real * W[offset + m]; |
| 489 | Am.imag += Sw[m].imag*W[offset+m]; | 481 | Am.imag += Sw[m].imag * W[offset + m]; |
| 490 | den += W[offset+m]*W[offset+m]; | 482 | den += W[offset + m] * W[offset + m]; |
| 491 | } | 483 | } |
| 492 | 484 | ||
| 493 | Am.real = Am.real/den; | 485 | Am.real = Am.real / den; |
| 494 | Am.imag = Am.imag/den; | 486 | Am.imag = Am.imag / den; |
| 495 | 487 | ||
| 496 | /* Determine error between estimated harmonic and original */ | 488 | /* Determine error between estimated harmonic and original */ |
| 497 | 489 | ||
| 498 | for(m=al; m<bl; m++) { | 490 | for (m = al; m < bl; m++) { |
| 499 | Ew.real = Sw[m].real - Am.real*W[offset+m]; | 491 | Ew.real = Sw[m].real - Am.real * W[offset + m]; |
| 500 | Ew.imag = Sw[m].imag - Am.imag*W[offset+m]; | 492 | Ew.imag = Sw[m].imag - Am.imag * W[offset + m]; |
| 501 | error += Ew.real*Ew.real; | 493 | error += Ew.real * Ew.real; |
| 502 | error += Ew.imag*Ew.imag; | 494 | error += Ew.imag * Ew.imag; |
| 503 | } | ||
| 504 | } | 495 | } |
| 496 | } | ||
| 505 | 497 | ||
| 506 | snr = 10.0*log10f(sig/error); | 498 | snr = 10.0 * log10f(sig / error); |
| 507 | if (snr > V_THRESH) | 499 | if (snr > V_THRESH) |
| 508 | model->voiced = 1; | 500 | model->voiced = 1; |
| 509 | else | 501 | else |
| 510 | model->voiced = 0; | 502 | model->voiced = 0; |
| 511 | 503 | ||
| 512 | /* post processing, helps clean up some voicing errors ------------------*/ | 504 | /* post processing, helps clean up some voicing errors ------------------*/ |
| 513 | |||
| 514 | /* | ||
| 515 | Determine the ratio of low freqency to high frequency energy, | ||
| 516 | voiced speech tends to be dominated by low frequency energy, | ||
| 517 | unvoiced by high frequency. This measure can be used to | ||
| 518 | determine if we have made any gross errors. | ||
| 519 | */ | ||
| 520 | |||
| 521 | int l_2000hz = model->L*2000.0/(c2const->Fs/2); | ||
| 522 | int l_4000hz = model->L*4000.0/(c2const->Fs/2); | ||
| 523 | elow = ehigh = 1E-4; | ||
| 524 | for(l=1; l<=l_2000hz; l++) { | ||
| 525 | elow += model->A[l]*model->A[l]; | ||
| 526 | } | ||
| 527 | for(l=l_2000hz; l<=l_4000hz; l++) { | ||
| 528 | ehigh += model->A[l]*model->A[l]; | ||
| 529 | } | ||
| 530 | eratio = 10.0*log10f(elow/ehigh); | ||
| 531 | 505 | ||
| 532 | /* Look for Type 1 errors, strongly V speech that has been | 506 | /* |
| 533 | accidentally declared UV */ | 507 | Determine the ratio of low frequency to high frequency energy, |
| 508 | voiced speech tends to be dominated by low frequency energy, | ||
| 509 | unvoiced by high frequency. This measure can be used to | ||
| 510 | determine if we have made any gross errors. | ||
| 511 | */ | ||
| 512 | |||
| 513 | int l_2000hz = model->L * 2000.0 / (c2const->Fs / 2); | ||
| 514 | int l_4000hz = model->L * 4000.0 / (c2const->Fs / 2); | ||
| 515 | elow = ehigh = 1E-4; | ||
| 516 | for (l = 1; l <= l_2000hz; l++) { | ||
| 517 | elow += model->A[l] * model->A[l]; | ||
| 518 | } | ||
| 519 | for (l = l_2000hz; l <= l_4000hz; l++) { | ||
| 520 | ehigh += model->A[l] * model->A[l]; | ||
| 521 | } | ||
| 522 | eratio = 10.0 * log10f(elow / ehigh); | ||
| 534 | 523 | ||
| 535 | if (model->voiced == 0) | 524 | /* Look for Type 1 errors, strongly V speech that has been |
| 536 | if (eratio > 10.0) | 525 | accidentally declared UV */ |
| 537 | model->voiced = 1; | ||
| 538 | 526 | ||
| 539 | /* Look for Type 2 errors, strongly UV speech that has been | 527 | if (model->voiced == 0) |
| 540 | accidentally declared V */ | 528 | if (eratio > 10.0) model->voiced = 1; |
| 541 | 529 | ||
| 542 | if (model->voiced == 1) { | 530 | /* Look for Type 2 errors, strongly UV speech that has been |
| 543 | if (eratio < -10.0) | 531 | accidentally declared V */ |
| 544 | model->voiced = 0; | ||
| 545 | 532 | ||
| 546 | /* A common source of Type 2 errors is the pitch estimator | 533 | if (model->voiced == 1) { |
| 547 | gives a low (50Hz) estimate for UV speech, which gives a | 534 | if (eratio < -10.0) model->voiced = 0; |
| 548 | good match with noise due to the close harmoonic spacing. | ||
| 549 | These errors are much more common than people with 50Hz3 | ||
| 550 | pitch, so we have just a small eratio threshold. */ | ||
| 551 | 535 | ||
| 552 | sixty = 60.0*TWO_PI/c2const->Fs; | 536 | /* A common source of Type 2 errors is the pitch estimator |
| 553 | if ((eratio < -4.0) && (model->Wo <= sixty)) | 537 | gives a low (50Hz) estimate for UV speech, which gives a |
| 554 | model->voiced = 0; | 538 | good match with noise due to the close harmoonic spacing. |
| 555 | } | 539 | These errors are much more common than people with 50Hz3 |
| 556 | //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0); | 540 | pitch, so we have just a small eratio threshold. */ |
| 557 | 541 | ||
| 558 | return snr; | 542 | sixty = 60.0 * TWO_PI / c2const->Fs; |
| 543 | if ((eratio < -4.0) && (model->Wo <= sixty)) model->voiced = 0; | ||
| 544 | } | ||
| 545 | // printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0); | ||
| 546 | |||
| 547 | return snr; | ||
| 559 | } | 548 | } |
| 560 | 549 | ||
| 561 | /*---------------------------------------------------------------------------*\ | 550 | /*---------------------------------------------------------------------------*\ |
| @@ -564,32 +553,29 @@ float est_voicing_mbe( | |||
| 564 | AUTHOR......: David Rowe | 553 | AUTHOR......: David Rowe |
| 565 | DATE CREATED: 11/5/94 | 554 | DATE CREATED: 11/5/94 |
| 566 | 555 | ||
| 567 | Init function that generates the trapezoidal (Parzen) sythesis window. | 556 | Init function that generates the trapezoidal (Parzen) synthesis window. |
| 568 | 557 | ||
| 569 | \*---------------------------------------------------------------------------*/ | 558 | \*---------------------------------------------------------------------------*/ |
| 570 | 559 | ||
| 571 | void make_synthesis_window(C2CONST *c2const, float Pn[]) | 560 | void make_synthesis_window(C2CONST *c2const, float Pn[]) { |
| 572 | { | 561 | int i; |
| 573 | int i; | ||
| 574 | float win; | 562 | float win; |
| 575 | int n_samp = c2const->n_samp; | 563 | int n_samp = c2const->n_samp; |
| 576 | int tw = c2const->tw; | 564 | int tw = c2const->tw; |
| 577 | 565 | ||
| 578 | /* Generate Parzen window in time domain */ | 566 | /* Generate Parzen window in time domain */ |
| 579 | 567 | ||
| 580 | win = 0.0; | 568 | win = 0.0; |
| 581 | for(i=0; i<n_samp/2-tw; i++) | 569 | for (i = 0; i < n_samp / 2 - tw; i++) Pn[i] = 0.0; |
| 582 | Pn[i] = 0.0; | ||
| 583 | win = 0.0; | 570 | win = 0.0; |
| 584 | for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ ) | 571 | for (i = n_samp / 2 - tw; i < n_samp / 2 + tw; win += 1.0 / (2 * tw), i++) |
| 585 | Pn[i] = win; | 572 | Pn[i] = win; |
| 586 | for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++) | 573 | for (i = n_samp / 2 + tw; i < 3 * n_samp / 2 - tw; i++) Pn[i] = 1.0; |
| 587 | Pn[i] = 1.0; | ||
| 588 | win = 1.0; | 574 | win = 1.0; |
| 589 | for(i=3*n_samp/2-tw; i<3*n_samp/2+tw; win-=1.0/(2*tw), i++) | 575 | for (i = 3 * n_samp / 2 - tw; i < 3 * n_samp / 2 + tw; |
| 576 | win -= 1.0 / (2 * tw), i++) | ||
| 590 | Pn[i] = win; | 577 | Pn[i] = win; |
| 591 | for(i=3*n_samp/2+tw; i<2*n_samp; i++) | 578 | for (i = 3 * n_samp / 2 + tw; i < 2 * n_samp; i++) Pn[i] = 0.0; |
| 592 | Pn[i] = 0.0; | ||
| 593 | } | 579 | } |
| 594 | 580 | ||
| 595 | /*---------------------------------------------------------------------------*\ | 581 | /*---------------------------------------------------------------------------*\ |
| @@ -600,77 +586,72 @@ void make_synthesis_window(C2CONST *c2const, float Pn[]) | |||
| 600 | 586 | ||
| 601 | Synthesise a speech signal in the frequency domain from the | 587 | Synthesise a speech signal in the frequency domain from the |
| 602 | sinusodal model parameters. Uses overlap-add with a trapezoidal | 588 | sinusodal model parameters. Uses overlap-add with a trapezoidal |
| 603 | window to smoothly interpolate betwen frames. | 589 | window to smoothly interpolate between frames. |
| 604 | 590 | ||
| 605 | \*---------------------------------------------------------------------------*/ | 591 | \*---------------------------------------------------------------------------*/ |
| 606 | 592 | ||
| 607 | void synthesise( | 593 | void synthesise(int n_samp, codec2_fftr_cfg fftr_inv_cfg, |
| 608 | int n_samp, | 594 | float Sn_[], /* time domain synthesised signal */ |
| 609 | codec2_fftr_cfg fftr_inv_cfg, | 595 | MODEL *model, /* ptr to model parameters for this frame */ |
| 610 | float Sn_[], /* time domain synthesised signal */ | 596 | float Pn[], /* time domain Parzen window */ |
| 611 | MODEL *model, /* ptr to model parameters for this frame */ | 597 | int shift /* flag used to handle transition frames */ |
| 612 | float Pn[], /* time domain Parzen window */ | 598 | ) { |
| 613 | int shift /* flag used to handle transition frames */ | 599 | int i, l, j, b; /* loop variables */ |
| 614 | ) | 600 | COMP Sw_[FFT_DEC / 2 + 1]; /* DFT of synthesised signal */ |
| 615 | { | 601 | float sw_[FFT_DEC]; /* synthesised signal */ |
| 616 | int i,l,j,b; /* loop variables */ | 602 | |
| 617 | COMP Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */ | 603 | if (shift) { |
| 618 | float sw_[FFT_DEC]; /* synthesised signal */ | 604 | /* Update memories */ |
| 619 | 605 | for (i = 0; i < n_samp - 1; i++) { | |
| 620 | if (shift) { | 606 | Sn_[i] = Sn_[i + n_samp]; |
| 621 | /* Update memories */ | ||
| 622 | for(i=0; i<n_samp-1; i++) { | ||
| 623 | Sn_[i] = Sn_[i+n_samp]; | ||
| 624 | } | ||
| 625 | Sn_[n_samp-1] = 0.0; | ||
| 626 | } | 607 | } |
| 608 | Sn_[n_samp - 1] = 0.0; | ||
| 609 | } | ||
| 627 | 610 | ||
| 628 | for(i=0; i<FFT_DEC/2+1; i++) { | 611 | for (i = 0; i < FFT_DEC / 2 + 1; i++) { |
| 629 | Sw_[i].real = 0.0; | 612 | Sw_[i].real = 0.0; |
| 630 | Sw_[i].imag = 0.0; | 613 | Sw_[i].imag = 0.0; |
| 631 | } | 614 | } |
| 632 | 615 | ||
| 633 | /* Now set up frequency domain synthesised speech */ | 616 | /* Now set up frequency domain synthesised speech */ |
| 634 | 617 | ||
| 635 | for(l=1; l<=model->L; l++) { | 618 | for (l = 1; l <= model->L; l++) { |
| 636 | b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5); | 619 | b = (int)(l * model->Wo * FFT_DEC / TWO_PI + 0.5); |
| 637 | if (b > ((FFT_DEC/2)-1)) { | 620 | if (b > ((FFT_DEC / 2) - 1)) { |
| 638 | b = (FFT_DEC/2)-1; | 621 | b = (FFT_DEC / 2) - 1; |
| 639 | } | ||
| 640 | Sw_[b].real = model->A[l]*cosf(model->phi[l]); | ||
| 641 | Sw_[b].imag = model->A[l]*sinf(model->phi[l]); | ||
| 642 | } | 622 | } |
| 623 | Sw_[b].real = model->A[l] * cosf(model->phi[l]); | ||
| 624 | Sw_[b].imag = model->A[l] * sinf(model->phi[l]); | ||
| 625 | } | ||
| 643 | 626 | ||
| 644 | /* Perform inverse DFT */ | 627 | /* Perform inverse DFT */ |
| 645 | 628 | ||
| 646 | codec2_fftri(fftr_inv_cfg, Sw_,sw_); | 629 | codec2_fftri(fftr_inv_cfg, Sw_, sw_); |
| 647 | 630 | ||
| 648 | /* Overlap add to previous samples */ | 631 | /* Overlap add to previous samples */ |
| 649 | 632 | ||
| 650 | #ifdef USE_KISS_FFT | 633 | #ifdef USE_KISS_FFT |
| 651 | #define FFTI_FACTOR ((float)1.0) | 634 | #define FFTI_FACTOR ((float)1.0) |
| 652 | #else | 635 | #else |
| 653 | #define FFTI_FACTOR ((float32_t)FFT_DEC) | 636 | #define FFTI_FACTOR ((float32_t)FFT_DEC) |
| 654 | #endif | 637 | #endif |
| 655 | 638 | ||
| 656 | for(i=0; i<n_samp-1; i++) { | 639 | for (i = 0; i < n_samp - 1; i++) { |
| 657 | Sn_[i] += sw_[FFT_DEC-n_samp+1+i]*Pn[i] * FFTI_FACTOR; | 640 | Sn_[i] += sw_[FFT_DEC - n_samp + 1 + i] * Pn[i] * FFTI_FACTOR; |
| 658 | } | 641 | } |
| 659 | 642 | ||
| 660 | if (shift) | 643 | if (shift) |
| 661 | for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) | 644 | for (i = n_samp - 1, j = 0; i < 2 * n_samp; i++, j++) |
| 662 | Sn_[i] = sw_[j]*Pn[i] * FFTI_FACTOR; | 645 | Sn_[i] = sw_[j] * Pn[i] * FFTI_FACTOR; |
| 663 | else | 646 | else |
| 664 | for(i=n_samp-1,j=0; i<2*n_samp; i++,j++) | 647 | for (i = n_samp - 1, j = 0; i < 2 * n_samp; i++, j++) |
| 665 | Sn_[i] += sw_[j]*Pn[i] * FFTI_FACTOR; | 648 | Sn_[i] += sw_[j] * Pn[i] * FFTI_FACTOR; |
| 666 | } | 649 | } |
| 667 | 650 | ||
| 668 | |||
| 669 | /* todo: this should probably be in some states rather than a static */ | 651 | /* todo: this should probably be in some states rather than a static */ |
| 670 | static unsigned long next = 1; | 652 | static unsigned long next = 1; |
| 671 | 653 | ||
| 672 | int codec2_rand(void) { | 654 | int codec2_rand(void) { |
| 673 | next = next * 1103515245 + 12345; | 655 | next = next * 1103515245 + 12345; |
| 674 | return((unsigned)(next/65536) % 32768); | 656 | return ((unsigned)(next / 65536) % 32768); |
| 675 | } | 657 | } |
| 676 | |||
