diff options
| author | erdgeist <erdgeist@erdgeist.org> | 2025-08-15 12:42:40 +0200 |
|---|---|---|
| committer | erdgeist <erdgeist@erdgeist.org> | 2025-08-15 12:42:40 +0200 |
| commit | 30325d24d107dbf133da39f7c96d1510fd1c9449 (patch) | |
| tree | 932baa5b2a4475821f16dccf9e3e05011daa6d92 /codec2.c | |
| parent | 9022d768021bbe15c7815cc6f8b64218b46f0e10 (diff) | |
Bump to codec2 version 1.2.0erdgeist-bump-to-1.2.0
Diffstat (limited to 'codec2.c')
| -rw-r--r-- | codec2.c | 3331 |
1 files changed, 1259 insertions, 2072 deletions
| @@ -26,33 +26,31 @@ | |||
| 26 | along with this program; if not, see <http://www.gnu.org/licenses/>. | 26 | along with this program; if not, see <http://www.gnu.org/licenses/>. |
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| 29 | #include "codec2.h" | ||
| 30 | |||
| 29 | #include <assert.h> | 31 | #include <assert.h> |
| 32 | #include <math.h> | ||
| 33 | #include <stdbool.h> | ||
| 30 | #include <stdio.h> | 34 | #include <stdio.h> |
| 31 | #include <stdlib.h> | 35 | #include <stdlib.h> |
| 32 | #include <stdbool.h> | ||
| 33 | #include <string.h> | 36 | #include <string.h> |
| 34 | #include <math.h> | ||
| 35 | 37 | ||
| 36 | #include "defines.h" | 38 | #include "bpf.h" |
| 39 | #include "bpfb.h" | ||
| 37 | #include "codec2_fft.h" | 40 | #include "codec2_fft.h" |
| 38 | #include "sine.h" | 41 | #include "codec2_internal.h" |
| 39 | #include "nlp.h" | 42 | #include "debug_alloc.h" |
| 43 | #include "defines.h" | ||
| 40 | #include "dump.h" | 44 | #include "dump.h" |
| 41 | #include "lpc.h" | ||
| 42 | #include "quantise.h" | ||
| 43 | #include "phase.h" | ||
| 44 | #include "interp.h" | 45 | #include "interp.h" |
| 45 | #include "postfilter.h" | 46 | #include "lpc.h" |
| 46 | #include "codec2.h" | ||
| 47 | #include "lsp.h" | 47 | #include "lsp.h" |
| 48 | #include "newamp2.h" | ||
| 49 | #include "codec2_internal.h" | ||
| 50 | #include "machdep.h" | 48 | #include "machdep.h" |
| 51 | #include "bpf.h" | 49 | #include "nlp.h" |
| 52 | #include "bpfb.h" | 50 | #include "phase.h" |
| 53 | #include "c2wideband.h" | 51 | #include "postfilter.h" |
| 54 | 52 | #include "quantise.h" | |
| 55 | #include "debug_alloc.h" | 53 | #include "sine.h" |
| 56 | 54 | ||
| 57 | /*---------------------------------------------------------------------------* \ | 55 | /*---------------------------------------------------------------------------* \ |
| 58 | 56 | ||
| @@ -62,32 +60,30 @@ | |||
| 62 | 60 | ||
| 63 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]); | 61 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]); |
| 64 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, | 62 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, |
| 65 | COMP Aw[], float gain); | 63 | COMP Aw[], float gain); |
| 66 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 64 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
| 67 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 65 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], |
| 68 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 66 | const unsigned char *bits); |
| 69 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 67 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
| 70 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 68 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], |
| 71 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 69 | const unsigned char *bits); |
| 72 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 70 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
| 73 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 71 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], |
| 74 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 72 | const unsigned char *bits); |
| 75 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est); | 73 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
| 76 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 74 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], |
| 77 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 75 | const unsigned char *bits); |
| 78 | void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 76 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
| 79 | void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 77 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], |
| 80 | void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 78 | const unsigned char *bits, float ber_est); |
| 81 | void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 79 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
| 82 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 80 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], |
| 83 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 81 | const unsigned char *bits); |
| 84 | void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]); | 82 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char *bits, short speech[]); |
| 85 | void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 83 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], |
| 86 | void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits); | 84 | const unsigned char *bits); |
| 87 | static void ear_protection(float in_out[], int n); | 85 | static void ear_protection(float in_out[], int n); |
| 88 | 86 | ||
| 89 | |||
| 90 | |||
| 91 | /*---------------------------------------------------------------------------*\ | 87 | /*---------------------------------------------------------------------------*\ |
| 92 | 88 | ||
| 93 | FUNCTIONS | 89 | FUNCTIONS |
| @@ -108,247 +104,170 @@ static void ear_protection(float in_out[], int n); | |||
| 108 | 104 | ||
| 109 | \*---------------------------------------------------------------------------*/ | 105 | \*---------------------------------------------------------------------------*/ |
| 110 | 106 | ||
| 111 | 107 | struct CODEC2 *codec2_create(int mode) { | |
| 112 | //Don't create CODEC2_MODE_450PWB for Encoding as it has undefined behavior ! | 108 | struct CODEC2 *c2; |
| 113 | struct CODEC2 * codec2_create(int mode) | 109 | int i, l; |
| 114 | { | 110 | |
| 115 | struct CODEC2 *c2; | 111 | // ALL POSSIBLE MODES MUST BE CHECKED HERE! |
| 116 | int i,l; | 112 | // we test if the desired mode is enabled at compile time |
| 117 | 113 | // and return NULL if not | |
| 118 | // ALL POSSIBLE MODES MUST BE CHECKED HERE! | 114 | |
| 119 | // we test if the desired mode is enabled at compile time | 115 | if (false == (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, mode) || |
| 120 | // and return NULL if not | 116 | CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, mode) || |
| 121 | 117 | CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, mode) || | |
| 122 | if (false == ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, mode) | 118 | CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, mode) || |
| 123 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, mode) | 119 | CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, mode) || |
| 124 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, mode) | 120 | CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, mode) || |
| 125 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, mode) | 121 | CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode))) { |
| 126 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, mode) | 122 | return NULL; |
| 127 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, mode) | 123 | } |
| 128 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_700, mode) | 124 | |
| 129 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode) | 125 | c2 = (struct CODEC2 *)MALLOC(sizeof(struct CODEC2)); |
| 130 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode) | 126 | if (c2 == NULL) return NULL; |
| 131 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode) | 127 | |
| 132 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) | 128 | c2->mode = mode; |
| 133 | ) ) | 129 | |
| 134 | { | 130 | /* store constants in a few places for convenience */ |
| 135 | return NULL; | 131 | |
| 136 | } | 132 | c2->c2const = c2const_create(8000, N_S); |
| 137 | 133 | c2->Fs = c2->c2const.Fs; | |
| 138 | c2 = (struct CODEC2*)MALLOC(sizeof(struct CODEC2)); | 134 | int n_samp = c2->n_samp = c2->c2const.n_samp; |
| 139 | if (c2 == NULL) | 135 | int m_pitch = c2->m_pitch = c2->c2const.m_pitch; |
| 140 | return NULL; | 136 | |
| 141 | 137 | c2->Pn = (float *)MALLOC(2 * n_samp * sizeof(float)); | |
| 142 | c2->mode = mode; | 138 | if (c2->Pn == NULL) { |
| 143 | 139 | return NULL; | |
| 144 | /* store constants in a few places for convenience */ | 140 | } |
| 145 | 141 | c2->Sn_ = (float *)MALLOC(2 * n_samp * sizeof(float)); | |
| 146 | if( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) == 0){ | 142 | if (c2->Sn_ == NULL) { |
| 147 | c2->c2const = c2const_create(8000, N_S); | 143 | FREE(c2->Pn); |
| 148 | }else{ | 144 | return NULL; |
| 149 | c2->c2const = c2const_create(16000, N_S); | 145 | } |
| 150 | } | 146 | c2->w = (float *)MALLOC(m_pitch * sizeof(float)); |
| 151 | c2->Fs = c2->c2const.Fs; | 147 | if (c2->w == NULL) { |
| 152 | int n_samp = c2->n_samp = c2->c2const.n_samp; | 148 | FREE(c2->Pn); |
| 153 | int m_pitch = c2->m_pitch = c2->c2const.m_pitch; | 149 | FREE(c2->Sn_); |
| 154 | 150 | return NULL; | |
| 155 | c2->Pn = (float*)MALLOC(2*n_samp*sizeof(float)); | 151 | } |
| 156 | if (c2->Pn == NULL) { | 152 | c2->Sn = (float *)MALLOC(m_pitch * sizeof(float)); |
| 157 | return NULL; | 153 | if (c2->Sn == NULL) { |
| 158 | } | 154 | FREE(c2->Pn); |
| 159 | c2->Sn_ = (float*)MALLOC(2*n_samp*sizeof(float)); | 155 | FREE(c2->Sn_); |
| 160 | if (c2->Sn_ == NULL) { | 156 | FREE(c2->w); |
| 161 | FREE(c2->Pn); | 157 | return NULL; |
| 162 | return NULL; | 158 | } |
| 163 | } | 159 | |
| 164 | c2->w = (float*)MALLOC(m_pitch*sizeof(float)); | 160 | for (i = 0; i < m_pitch; i++) c2->Sn[i] = 1.0; |
| 165 | if (c2->w == NULL) { | 161 | c2->hpf_states[0] = c2->hpf_states[1] = 0.0; |
| 166 | FREE(c2->Pn); | 162 | for (i = 0; i < 2 * n_samp; i++) c2->Sn_[i] = 0; |
| 167 | FREE(c2->Sn_); | 163 | c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL); |
| 168 | return NULL; | 164 | c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL); |
| 169 | } | 165 | make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w, c2->W); |
| 170 | c2->Sn = (float*)MALLOC(m_pitch*sizeof(float)); | 166 | make_synthesis_window(&c2->c2const, c2->Pn); |
| 171 | if (c2->Sn == NULL) { | 167 | c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL); |
| 172 | FREE(c2->Pn); | 168 | c2->prev_f0_enc = 1 / P_MAX_S; |
| 173 | FREE(c2->Sn_); | 169 | c2->bg_est = 0.0; |
| 174 | FREE(c2->w); | 170 | c2->ex_phase = 0.0; |
| 175 | return NULL; | 171 | |
| 176 | } | 172 | for (l = 1; l <= MAX_AMP; l++) c2->prev_model_dec.A[l] = 0.0; |
| 177 | 173 | c2->prev_model_dec.Wo = TWO_PI / c2->c2const.p_max; | |
| 178 | for(i=0; i<m_pitch; i++) | 174 | c2->prev_model_dec.L = PI / c2->prev_model_dec.Wo; |
| 179 | c2->Sn[i] = 1.0; | 175 | c2->prev_model_dec.voiced = 0; |
| 180 | c2->hpf_states[0] = c2->hpf_states[1] = 0.0; | 176 | |
| 181 | for(i=0; i<2*n_samp; i++) | 177 | for (i = 0; i < LPC_ORD; i++) { |
| 182 | c2->Sn_[i] = 0; | 178 | c2->prev_lsps_dec[i] = i * PI / (LPC_ORD + 1); |
| 183 | c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL); | 179 | } |
| 184 | c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL); | 180 | c2->prev_e_dec = 1; |
| 185 | make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w,c2->W); | 181 | |
| 186 | make_synthesis_window(&c2->c2const, c2->Pn); | 182 | c2->nlp = nlp_create(&c2->c2const); |
| 187 | c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL); | 183 | if (c2->nlp == NULL) { |
| 188 | quantise_init(); | 184 | return NULL; |
| 189 | c2->prev_f0_enc = 1/P_MAX_S; | 185 | } |
| 190 | c2->bg_est = 0.0; | 186 | |
| 191 | c2->ex_phase = 0.0; | 187 | c2->lpc_pf = 1; |
| 192 | 188 | c2->bass_boost = 1; | |
| 193 | for(l=1; l<=MAX_AMP; l++) | 189 | c2->beta = LPCPF_BETA; |
| 194 | c2->prev_model_dec.A[l] = 0.0; | 190 | c2->gamma = LPCPF_GAMMA; |
| 195 | c2->prev_model_dec.Wo = TWO_PI/c2->c2const.p_max; | 191 | |
| 196 | c2->prev_model_dec.L = PI/c2->prev_model_dec.Wo; | 192 | c2->xq_enc[0] = c2->xq_enc[1] = 0.0; |
| 197 | c2->prev_model_dec.voiced = 0; | 193 | c2->xq_dec[0] = c2->xq_dec[1] = 0.0; |
| 198 | 194 | ||
| 199 | for(i=0; i<LPC_ORD; i++) { | 195 | c2->smoothing = 0; |
| 200 | c2->prev_lsps_dec[i] = i*PI/(LPC_ORD+1); | 196 | c2->se = 0.0; |
| 201 | } | 197 | c2->nse = 0; |
| 202 | c2->prev_e_dec = 1; | 198 | c2->user_rate_K_vec_no_mean_ = NULL; |
| 203 | 199 | c2->post_filter_en = true; | |
| 204 | c2->nlp = nlp_create(&c2->c2const); | 200 | |
| 205 | if (c2->nlp == NULL) { | 201 | c2->bpf_buf = (float *)MALLOC(sizeof(float) * (BPF_N + 4 * c2->n_samp)); |
| 206 | return NULL; | 202 | assert(c2->bpf_buf != NULL); |
| 207 | } | 203 | for (i = 0; i < BPF_N + 4 * c2->n_samp; i++) c2->bpf_buf[i] = 0.0; |
| 208 | 204 | ||
| 209 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode)) | 205 | c2->softdec = NULL; |
| 210 | c2->gray = 0; // natural binary better for trellis decoding (hopefully added later) | 206 | c2->gray = 1; |
| 211 | else | 207 | |
| 212 | c2->gray = 1; | 208 | /* newamp1 initialisation */ |
| 213 | 209 | ||
| 214 | c2->lpc_pf = 1; c2->bass_boost = 1; c2->beta = LPCPF_BETA; c2->gamma = LPCPF_GAMMA; | 210 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { |
| 215 | 211 | mel_sample_freqs_kHz(c2->rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), | |
| 216 | c2->xq_enc[0] = c2->xq_enc[1] = 0.0; | 212 | ftomel(3700.0)); |
| 217 | c2->xq_dec[0] = c2->xq_dec[1] = 0.0; | 213 | int k; |
| 218 | 214 | for (k = 0; k < NEWAMP1_K; k++) { | |
| 219 | c2->smoothing = 0; | 215 | c2->prev_rate_K_vec_[k] = 0.0; |
| 220 | c2->se = 0.0; c2->nse = 0; | 216 | c2->eq[k] = 0.0; |
| 221 | c2->user_rate_K_vec_no_mean_ = NULL; | 217 | } |
| 222 | c2->post_filter_en = 1; | 218 | c2->eq_en = false; |
| 223 | 219 | c2->Wo_left = 0.0; | |
| 224 | c2->bpf_buf = (float*)MALLOC(sizeof(float)*(BPF_N+4*c2->n_samp)); | 220 | c2->voicing_left = 0; |
| 225 | assert(c2->bpf_buf != NULL); | 221 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); |
| 226 | for(i=0; i<BPF_N+4*c2->n_samp; i++) | 222 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); |
| 227 | c2->bpf_buf[i] = 0.0; | 223 | } |
| 228 | 224 | ||
| 229 | c2->softdec = NULL; | 225 | c2->fmlfeat = NULL; |
| 230 | 226 | c2->fmlmodel = NULL; | |
| 231 | /* newamp1 initialisation */ | 227 | |
| 232 | 228 | // make sure that one of the two decode function pointers is empty | |
| 233 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | 229 | // for the encode function pointer this is not required since we always set it |
| 234 | mel_sample_freqs_kHz(c2->rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), ftomel(3700.0) ); | 230 | // to a meaningful value |
| 235 | int k; | 231 | |
| 236 | for(k=0; k<NEWAMP1_K; k++) { | 232 | c2->decode = NULL; |
| 237 | c2->prev_rate_K_vec_[k] = 0.0; | 233 | c2->decode_ber = NULL; |
| 238 | c2->eq[k] = 0.0; | 234 | |
| 239 | } | 235 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) { |
| 240 | c2->eq_en = 0; | 236 | c2->encode = codec2_encode_3200; |
| 241 | c2->Wo_left = 0.0; | 237 | c2->decode = codec2_decode_3200; |
| 242 | c2->voicing_left = 0;; | 238 | } |
| 243 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); | 239 | |
| 244 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); | 240 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) { |
| 245 | } | 241 | c2->encode = codec2_encode_2400; |
| 246 | 242 | c2->decode = codec2_decode_2400; | |
| 247 | /* newamp2 initialisation */ | 243 | } |
| 248 | 244 | ||
| 249 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { | 245 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) { |
| 250 | n2_mel_sample_freqs_kHz(c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K); | 246 | c2->encode = codec2_encode_1600; |
| 251 | int k; | 247 | c2->decode = codec2_decode_1600; |
| 252 | for(k=0; k<NEWAMP2_K; k++) { | 248 | } |
| 253 | c2->n2_prev_rate_K_vec_[k] = 0.0; | 249 | |
| 254 | } | 250 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) { |
| 255 | c2->Wo_left = 0.0; | 251 | c2->encode = codec2_encode_1400; |
| 256 | c2->voicing_left = 0;; | 252 | c2->decode = codec2_decode_1400; |
| 257 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); | 253 | } |
| 258 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); | 254 | |
| 259 | } | 255 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) { |
| 260 | /* newamp2 PWB initialisation */ | 256 | c2->encode = codec2_encode_1300; |
| 261 | 257 | c2->decode_ber = codec2_decode_1300; | |
| 262 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { | 258 | } |
| 263 | n2_mel_sample_freqs_kHz(c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K); | 259 | |
| 264 | int k; | 260 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) { |
| 265 | for(k=0; k<NEWAMP2_16K_K; k++) { | 261 | c2->encode = codec2_encode_1200; |
| 266 | c2->n2_pwb_prev_rate_K_vec_[k] = 0.0; | 262 | c2->decode = codec2_decode_1200; |
| 267 | } | 263 | } |
| 268 | c2->Wo_left = 0.0; | 264 | |
| 269 | c2->voicing_left = 0;; | 265 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { |
| 270 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); | 266 | c2->encode = codec2_encode_700c; |
| 271 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); | 267 | c2->decode = codec2_decode_700c; |
| 272 | } | 268 | } |
| 273 | 269 | ||
| 274 | c2->fmlfeat = NULL; | 270 | return c2; |
| 275 | |||
| 276 | // make sure that one of the two decode function pointers is empty | ||
| 277 | // for the encode function pointer this is not required since we always set it | ||
| 278 | // to a meaningful value | ||
| 279 | |||
| 280 | c2->decode = NULL; | ||
| 281 | c2->decode_ber = NULL; | ||
| 282 | |||
| 283 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) | ||
| 284 | { | ||
| 285 | c2->encode = codec2_encode_3200; | ||
| 286 | c2->decode = codec2_decode_3200; | ||
| 287 | } | ||
| 288 | |||
| 289 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) | ||
| 290 | { | ||
| 291 | c2->encode = codec2_encode_2400; | ||
| 292 | c2->decode = codec2_decode_2400; | ||
| 293 | } | ||
| 294 | |||
| 295 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) | ||
| 296 | { | ||
| 297 | c2->encode = codec2_encode_1600; | ||
| 298 | c2->decode = codec2_decode_1600; | ||
| 299 | } | ||
| 300 | |||
| 301 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) | ||
| 302 | { | ||
| 303 | c2->encode = codec2_encode_1400; | ||
| 304 | c2->decode = codec2_decode_1400; | ||
| 305 | } | ||
| 306 | |||
| 307 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) | ||
| 308 | { | ||
| 309 | c2->encode = codec2_encode_1300; | ||
| 310 | c2->decode_ber = codec2_decode_1300; | ||
| 311 | } | ||
| 312 | |||
| 313 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) | ||
| 314 | { | ||
| 315 | c2->encode = codec2_encode_1200; | ||
| 316 | c2->decode = codec2_decode_1200; | ||
| 317 | } | ||
| 318 | |||
| 319 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | ||
| 320 | { | ||
| 321 | c2->encode = codec2_encode_700; | ||
| 322 | c2->decode = codec2_decode_700; | ||
| 323 | } | ||
| 324 | |||
| 325 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) | ||
| 326 | { | ||
| 327 | c2->encode = codec2_encode_700b; | ||
| 328 | c2->decode = codec2_decode_700b; | ||
| 329 | } | ||
| 330 | |||
| 331 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | ||
| 332 | { | ||
| 333 | c2->encode = codec2_encode_700c; | ||
| 334 | c2->decode = codec2_decode_700c; | ||
| 335 | } | ||
| 336 | |||
| 337 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | ||
| 338 | { | ||
| 339 | c2->encode = codec2_encode_450; | ||
| 340 | c2->decode = codec2_decode_450; | ||
| 341 | } | ||
| 342 | |||
| 343 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
| 344 | { | ||
| 345 | //Encode PWB doesnt make sense | ||
| 346 | c2->encode = codec2_encode_450; | ||
| 347 | c2->decode = codec2_decode_450pwb; | ||
| 348 | } | ||
| 349 | |||
| 350 | |||
| 351 | return c2; | ||
| 352 | } | 271 | } |
| 353 | 272 | ||
| 354 | /*---------------------------------------------------------------------------*\ | 273 | /*---------------------------------------------------------------------------*\ |
| @@ -361,31 +280,22 @@ struct CODEC2 * codec2_create(int mode) | |||
| 361 | 280 | ||
| 362 | \*---------------------------------------------------------------------------*/ | 281 | \*---------------------------------------------------------------------------*/ |
| 363 | 282 | ||
| 364 | void codec2_destroy(struct CODEC2 *c2) | 283 | void codec2_destroy(struct CODEC2 *c2) { |
| 365 | { | 284 | assert(c2 != NULL); |
| 366 | assert(c2 != NULL); | 285 | FREE(c2->bpf_buf); |
| 367 | FREE(c2->bpf_buf); | 286 | nlp_destroy(c2->nlp); |
| 368 | nlp_destroy(c2->nlp); | 287 | codec2_fft_free(c2->fft_fwd_cfg); |
| 369 | codec2_fft_free(c2->fft_fwd_cfg); | 288 | codec2_fftr_free(c2->fftr_fwd_cfg); |
| 370 | codec2_fftr_free(c2->fftr_fwd_cfg); | 289 | codec2_fftr_free(c2->fftr_inv_cfg); |
| 371 | codec2_fftr_free(c2->fftr_inv_cfg); | 290 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { |
| 372 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | 291 | codec2_fft_free(c2->phase_fft_fwd_cfg); |
| 373 | codec2_fft_free(c2->phase_fft_fwd_cfg); | 292 | codec2_fft_free(c2->phase_fft_inv_cfg); |
| 374 | codec2_fft_free(c2->phase_fft_inv_cfg); | 293 | } |
| 375 | } | 294 | FREE(c2->Pn); |
| 376 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { | 295 | FREE(c2->Sn); |
| 377 | codec2_fft_free(c2->phase_fft_fwd_cfg); | 296 | FREE(c2->w); |
| 378 | codec2_fft_free(c2->phase_fft_inv_cfg); | 297 | FREE(c2->Sn_); |
| 379 | } | 298 | FREE(c2); |
| 380 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { | ||
| 381 | codec2_fft_free(c2->phase_fft_fwd_cfg); | ||
| 382 | codec2_fft_free(c2->phase_fft_inv_cfg); | ||
| 383 | } | ||
| 384 | FREE(c2->Pn); | ||
| 385 | FREE(c2->Sn); | ||
| 386 | FREE(c2->w); | ||
| 387 | FREE(c2->Sn_); | ||
| 388 | FREE(c2); | ||
| 389 | } | 299 | } |
| 390 | 300 | ||
| 391 | /*---------------------------------------------------------------------------*\ | 301 | /*---------------------------------------------------------------------------*\ |
| @@ -399,32 +309,31 @@ void codec2_destroy(struct CODEC2 *c2) | |||
| 399 | \*---------------------------------------------------------------------------*/ | 309 | \*---------------------------------------------------------------------------*/ |
| 400 | 310 | ||
| 401 | int codec2_bits_per_frame(struct CODEC2 *c2) { | 311 | int codec2_bits_per_frame(struct CODEC2 *c2) { |
| 402 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) | 312 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) return 64; |
| 403 | return 64; | 313 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) return 48; |
| 404 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) | 314 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) return 64; |
| 405 | return 48; | 315 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) return 56; |
| 406 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) | 316 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 52; |
| 407 | return 64; | 317 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 48; |
| 408 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) | 318 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 28; |
| 409 | return 56; | 319 | |
| 410 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) | 320 | return 0; /* shouldn't get here */ |
| 411 | return 52; | ||
| 412 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) | ||
| 413 | return 48; | ||
| 414 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | ||
| 415 | return 28; | ||
| 416 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) | ||
| 417 | return 28; | ||
| 418 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | ||
| 419 | return 28; | ||
| 420 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | ||
| 421 | return 18; | ||
| 422 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
| 423 | return 18; | ||
| 424 | |||
| 425 | return 0; /* shouldn't get here */ | ||
| 426 | } | 321 | } |
| 427 | 322 | ||
| 323 | /*---------------------------------------------------------------------------*\ | ||
| 324 | |||
| 325 | FUNCTION....: codec2_bytes_per_frame | ||
| 326 | DATE CREATED: April 2021 | ||
| 327 | |||
| 328 | Returns the number of bytes per frame. Useful for allocated storage for | ||
| 329 | codec2_encode()/codec2_decode(). Note the number of bits may not be a | ||
| 330 | multiple of 8, therefore some bits in the last byte may be unused. | ||
| 331 | |||
| 332 | \*---------------------------------------------------------------------------*/ | ||
| 333 | |||
| 334 | int codec2_bytes_per_frame(struct CODEC2 *c2) { | ||
| 335 | return (codec2_bits_per_frame(c2) + 7) / 8; | ||
| 336 | } | ||
| 428 | 337 | ||
| 429 | /*---------------------------------------------------------------------------*\ | 338 | /*---------------------------------------------------------------------------*\ |
| 430 | 339 | ||
| @@ -437,60 +346,61 @@ int codec2_bits_per_frame(struct CODEC2 *c2) { | |||
| 437 | \*---------------------------------------------------------------------------*/ | 346 | \*---------------------------------------------------------------------------*/ |
| 438 | 347 | ||
| 439 | int codec2_samples_per_frame(struct CODEC2 *c2) { | 348 | int codec2_samples_per_frame(struct CODEC2 *c2) { |
| 440 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) | 349 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) return 160; |
| 441 | return 160; | 350 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) return 160; |
| 442 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) | 351 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) return 320; |
| 443 | return 160; | 352 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) return 320; |
| 444 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) | 353 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 320; |
| 445 | return 320; | 354 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 320; |
| 446 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) | 355 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 320; |
| 447 | return 320; | 356 | return 0; /* shouldn't get here */ |
| 448 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) | ||
| 449 | return 320; | ||
| 450 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) | ||
| 451 | return 320; | ||
| 452 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | ||
| 453 | return 320; | ||
| 454 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) | ||
| 455 | return 320; | ||
| 456 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | ||
| 457 | return 320; | ||
| 458 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | ||
| 459 | return 320; | ||
| 460 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | ||
| 461 | return 640; | ||
| 462 | return 0; /* shouldnt get here */ | ||
| 463 | } | 357 | } |
| 464 | 358 | ||
| 465 | void codec2_encode(struct CODEC2 *c2, unsigned char *bits, short speech[]) | 359 | /*---------------------------------------------------------------------------*\ |
| 466 | { | 360 | |
| 467 | assert(c2 != NULL); | 361 | FUNCTION....: codec2_encode |
| 468 | assert(c2->encode != NULL); | 362 | AUTHOR......: David Rowe |
| 363 | DATE CREATED: Nov 14 2011 | ||
| 364 | |||
| 365 | Take an input buffer of speech samples, and compress them to a packed buffer | ||
| 366 | of bytes. | ||
| 469 | 367 | ||
| 470 | c2->encode(c2, bits, speech); | 368 | \*---------------------------------------------------------------------------*/ |
| 471 | 369 | ||
| 472 | } | 370 | void codec2_encode(struct CODEC2 *c2, unsigned char *bytes, short speech[]) { |
| 371 | assert(c2 != NULL); | ||
| 372 | assert(c2->encode != NULL); | ||
| 473 | 373 | ||
| 474 | void codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits) | 374 | c2->encode(c2, bytes, speech); |
| 475 | { | ||
| 476 | codec2_decode_ber(c2, speech, bits, 0.0); | ||
| 477 | } | 375 | } |
| 478 | 376 | ||
| 479 | void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *bits, float ber_est) | 377 | /*---------------------------------------------------------------------------*\ |
| 480 | { | ||
| 481 | assert(c2 != NULL); | ||
| 482 | assert(c2->decode != NULL || c2->decode_ber != NULL); | ||
| 483 | 378 | ||
| 484 | if (c2->decode != NULL) | 379 | FUNCTION....: codec2_decode |
| 485 | { | 380 | AUTHOR......: David Rowe |
| 486 | c2->decode(c2, speech, bits); | 381 | DATE CREATED: Nov 14 2011 |
| 487 | } | 382 | |
| 488 | else | 383 | Take an input packed buffer of bytes, and decode them to a buffer of speech |
| 489 | { | 384 | samples. |
| 490 | c2->decode_ber(c2, speech, bits, ber_est); | 385 | |
| 491 | } | 386 | \*---------------------------------------------------------------------------*/ |
| 387 | |||
| 388 | void codec2_decode(struct CODEC2 *c2, short speech[], | ||
| 389 | const unsigned char *bytes) { | ||
| 390 | codec2_decode_ber(c2, speech, bytes, 0.0); | ||
| 492 | } | 391 | } |
| 493 | 392 | ||
| 393 | void codec2_decode_ber(struct CODEC2 *c2, short speech[], | ||
| 394 | const unsigned char *bits, float ber_est) { | ||
| 395 | assert(c2 != NULL); | ||
| 396 | assert(c2->decode != NULL || c2->decode_ber != NULL); | ||
| 397 | |||
| 398 | if (c2->decode != NULL) { | ||
| 399 | c2->decode(c2, speech, bits); | ||
| 400 | } else { | ||
| 401 | c2->decode_ber(c2, speech, bits, ber_est); | ||
| 402 | } | ||
| 403 | } | ||
| 494 | 404 | ||
| 495 | /*---------------------------------------------------------------------------*\ | 405 | /*---------------------------------------------------------------------------*\ |
| 496 | 406 | ||
| @@ -503,60 +413,60 @@ void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *b | |||
| 503 | The codec2 algorithm actually operates internally on 10ms (80 | 413 | The codec2 algorithm actually operates internally on 10ms (80 |
| 504 | sample) frames, so we run the encoding algorithm twice. On the | 414 | sample) frames, so we run the encoding algorithm twice. On the |
| 505 | first frame we just send the voicing bits. On the second frame we | 415 | first frame we just send the voicing bits. On the second frame we |
| 506 | send all model parameters. Compared to 2400 we use a larger number | 416 | send all model parameters. Compared to 2400 we encode the LSP |
| 507 | of bits for the LSPs and non-VQ pitch and energy. | 417 | differences, a larger number of bits for the LSP(d)s and scalar |
| 418 | (non-VQ) quantisation for pitch and energy. | ||
| 508 | 419 | ||
| 509 | The bit allocation is: | 420 | The bit allocation is: |
| 510 | 421 | ||
| 511 | Parameter bits/frame | 422 | Parameter bits/frame |
| 512 | -------------------------------------- | 423 | ------------------------------------------------------ |
| 513 | Harmonic magnitudes (LSPs) 50 | 424 | Harmonic magnitudes (LSP differerences) 50 |
| 514 | Pitch (Wo) 7 | 425 | Pitch (Wo) 7 |
| 515 | Energy 5 | 426 | Energy 5 |
| 516 | Voicing (10ms update) 2 | 427 | Voicing (10ms update) 2 |
| 517 | TOTAL 64 | 428 | TOTAL 64 |
| 518 | 429 | ||
| 519 | \*---------------------------------------------------------------------------*/ | 430 | \*---------------------------------------------------------------------------*/ |
| 520 | 431 | ||
| 521 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 432 | void codec2_encode_3200(struct CODEC2 *c2, unsigned char *bits, |
| 522 | { | 433 | short speech[]) { |
| 523 | MODEL model; | 434 | MODEL model; |
| 524 | float ak[LPC_ORD+1]; | 435 | float ak[LPC_ORD + 1]; |
| 525 | float lsps[LPC_ORD]; | 436 | float lsps[LPC_ORD]; |
| 526 | float e; | 437 | float e; |
| 527 | int Wo_index, e_index; | 438 | int Wo_index, e_index; |
| 528 | int lspd_indexes[LPC_ORD]; | 439 | int lspd_indexes[LPC_ORD]; |
| 529 | int i; | 440 | int i; |
| 530 | unsigned int nbit = 0; | 441 | unsigned int nbit = 0; |
| 531 | 442 | ||
| 532 | assert(c2 != NULL); | 443 | assert(c2 != NULL); |
| 533 | 444 | ||
| 534 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 445 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
| 535 | 446 | ||
| 536 | /* first 10ms analysis frame - we just want voicing */ | 447 | /* first 10ms analysis frame - we just want voicing */ |
| 537 | 448 | ||
| 538 | analyse_one_frame(c2, &model, speech); | 449 | analyse_one_frame(c2, &model, speech); |
| 539 | pack(bits, &nbit, model.voiced, 1); | 450 | pack(bits, &nbit, model.voiced, 1); |
| 540 | 451 | ||
| 541 | /* second 10ms analysis frame */ | 452 | /* second 10ms analysis frame */ |
| 542 | 453 | ||
| 543 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 454 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
| 544 | pack(bits, &nbit, model.voiced, 1); | 455 | pack(bits, &nbit, model.voiced, 1); |
| 545 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | 456 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); |
| 546 | pack(bits, &nbit, Wo_index, WO_BITS); | 457 | pack(bits, &nbit, Wo_index, WO_BITS); |
| 547 | 458 | ||
| 548 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 459 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
| 549 | e_index = encode_energy(e, E_BITS); | 460 | e_index = encode_energy(e, E_BITS); |
| 550 | pack(bits, &nbit, e_index, E_BITS); | 461 | pack(bits, &nbit, e_index, E_BITS); |
| 551 | 462 | ||
| 552 | encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD); | 463 | encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD); |
| 553 | for(i=0; i<LSPD_SCALAR_INDEXES; i++) { | 464 | for (i = 0; i < LSPD_SCALAR_INDEXES; i++) { |
| 554 | pack(bits, &nbit, lspd_indexes[i], lspd_bits(i)); | 465 | pack(bits, &nbit, lspd_indexes[i], lspd_bits(i)); |
| 555 | } | 466 | } |
| 556 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 467 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
| 557 | } | 468 | } |
| 558 | 469 | ||
| 559 | |||
| 560 | /*---------------------------------------------------------------------------*\ | 470 | /*---------------------------------------------------------------------------*\ |
| 561 | 471 | ||
| 562 | FUNCTION....: codec2_decode_3200 | 472 | FUNCTION....: codec2_decode_3200 |
| @@ -567,77 +477,75 @@ void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
| 567 | 477 | ||
| 568 | \*---------------------------------------------------------------------------*/ | 478 | \*---------------------------------------------------------------------------*/ |
| 569 | 479 | ||
| 570 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 480 | void codec2_decode_3200(struct CODEC2 *c2, short speech[], |
| 571 | { | 481 | const unsigned char *bits) { |
| 572 | MODEL model[2]; | 482 | MODEL model[2]; |
| 573 | int lspd_indexes[LPC_ORD]; | 483 | int lspd_indexes[LPC_ORD]; |
| 574 | float lsps[2][LPC_ORD]; | 484 | float lsps[2][LPC_ORD]; |
| 575 | int Wo_index, e_index; | 485 | int Wo_index, e_index; |
| 576 | float e[2]; | 486 | float e[2]; |
| 577 | float snr; | 487 | float snr; |
| 578 | float ak[2][LPC_ORD+1]; | 488 | float ak[2][LPC_ORD + 1]; |
| 579 | int i,j; | 489 | int i, j; |
| 580 | unsigned int nbit = 0; | 490 | unsigned int nbit = 0; |
| 581 | COMP Aw[FFT_ENC]; | 491 | COMP Aw[FFT_ENC]; |
| 582 | 492 | ||
| 583 | assert(c2 != NULL); | 493 | assert(c2 != NULL); |
| 584 | 494 | ||
| 585 | /* only need to zero these out due to (unused) snr calculation */ | 495 | /* only need to zero these out due to (unused) snr calculation */ |
| 586 | 496 | ||
| 587 | for(i=0; i<2; i++) | 497 | for (i = 0; i < 2; i++) |
| 588 | for(j=1; j<=MAX_AMP; j++) | 498 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
| 589 | model[i].A[j] = 0.0; | ||
| 590 | 499 | ||
| 591 | /* unpack bits from channel ------------------------------------*/ | 500 | /* unpack bits from channel ------------------------------------*/ |
| 592 | 501 | ||
| 593 | /* this will partially fill the model params for the 2 x 10ms | 502 | /* this will partially fill the model params for the 2 x 10ms |
| 594 | frames */ | 503 | frames */ |
| 595 | 504 | ||
| 596 | model[0].voiced = unpack(bits, &nbit, 1); | 505 | model[0].voiced = unpack(bits, &nbit, 1); |
| 597 | model[1].voiced = unpack(bits, &nbit, 1); | 506 | model[1].voiced = unpack(bits, &nbit, 1); |
| 598 | 507 | ||
| 599 | Wo_index = unpack(bits, &nbit, WO_BITS); | 508 | Wo_index = unpack(bits, &nbit, WO_BITS); |
| 600 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | 509 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); |
| 601 | model[1].L = PI/model[1].Wo; | 510 | model[1].L = PI / model[1].Wo; |
| 602 | 511 | ||
| 603 | e_index = unpack(bits, &nbit, E_BITS); | 512 | e_index = unpack(bits, &nbit, E_BITS); |
| 604 | e[1] = decode_energy(e_index, E_BITS); | 513 | e[1] = decode_energy(e_index, E_BITS); |
| 605 | 514 | ||
| 606 | for(i=0; i<LSPD_SCALAR_INDEXES; i++) { | 515 | for (i = 0; i < LSPD_SCALAR_INDEXES; i++) { |
| 607 | lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i)); | 516 | lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i)); |
| 608 | } | 517 | } |
| 609 | decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD); | 518 | decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD); |
| 610 | 519 | ||
| 611 | /* interpolate ------------------------------------------------*/ | 520 | /* interpolate ------------------------------------------------*/ |
| 612 | 521 | ||
| 613 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 522 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 |
| 614 | 10ms frame between 20ms samples */ | 523 | 10ms frame between 20ms samples */ |
| 615 | 524 | ||
| 616 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 525 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); |
| 617 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 526 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
| 618 | 527 | ||
| 619 | /* LSPs are sampled every 20ms so we interpolate the frame in | 528 | /* LSPs are sampled every 20ms so we interpolate the frame in |
| 620 | between, then recover spectral amplitudes */ | 529 | between, then recover spectral amplitudes */ |
| 621 | 530 | ||
| 622 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); | 531 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, |
| 532 | LPC_ORD); | ||
| 623 | 533 | ||
| 624 | for(i=0; i<2; i++) { | 534 | for (i = 0; i < 2; i++) { |
| 625 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 535 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
| 626 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 536 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
| 627 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 537 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
| 628 | apply_lpc_correction(&model[i]); | 538 | apply_lpc_correction(&model[i]); |
| 629 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 539 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); |
| 630 | } | 540 | } |
| 631 | 541 | ||
| 632 | /* update memories for next frame ----------------------------*/ | 542 | /* update memories for next frame ----------------------------*/ |
| 633 | 543 | ||
| 634 | c2->prev_model_dec = model[1]; | 544 | c2->prev_model_dec = model[1]; |
| 635 | c2->prev_e_dec = e[1]; | 545 | c2->prev_e_dec = e[1]; |
| 636 | for(i=0; i<LPC_ORD; i++) | 546 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[1][i]; |
| 637 | c2->prev_lsps_dec[i] = lsps[1][i]; | ||
| 638 | } | 547 | } |
| 639 | 548 | ||
| 640 | |||
| 641 | /*---------------------------------------------------------------------------*\ | 549 | /*---------------------------------------------------------------------------*\ |
| 642 | 550 | ||
| 643 | FUNCTION....: codec2_encode_2400 | 551 | FUNCTION....: codec2_encode_2400 |
| @@ -663,46 +571,45 @@ void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * | |||
| 663 | 571 | ||
| 664 | \*---------------------------------------------------------------------------*/ | 572 | \*---------------------------------------------------------------------------*/ |
| 665 | 573 | ||
| 666 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 574 | void codec2_encode_2400(struct CODEC2 *c2, unsigned char *bits, |
| 667 | { | 575 | short speech[]) { |
| 668 | MODEL model; | 576 | MODEL model; |
| 669 | float ak[LPC_ORD+1]; | 577 | float ak[LPC_ORD + 1]; |
| 670 | float lsps[LPC_ORD]; | 578 | float lsps[LPC_ORD]; |
| 671 | float e; | 579 | float e; |
| 672 | int WoE_index; | 580 | int WoE_index; |
| 673 | int lsp_indexes[LPC_ORD]; | 581 | int lsp_indexes[LPC_ORD]; |
| 674 | int i; | 582 | int i; |
| 675 | int spare = 0; | 583 | int spare = 0; |
| 676 | unsigned int nbit = 0; | 584 | unsigned int nbit = 0; |
| 677 | 585 | ||
| 678 | assert(c2 != NULL); | 586 | assert(c2 != NULL); |
| 679 | 587 | ||
| 680 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 588 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
| 681 | 589 | ||
| 682 | /* first 10ms analysis frame - we just want voicing */ | 590 | /* first 10ms analysis frame - we just want voicing */ |
| 683 | 591 | ||
| 684 | analyse_one_frame(c2, &model, speech); | 592 | analyse_one_frame(c2, &model, speech); |
| 685 | pack(bits, &nbit, model.voiced, 1); | 593 | pack(bits, &nbit, model.voiced, 1); |
| 686 | 594 | ||
| 687 | /* second 10ms analysis frame */ | 595 | /* second 10ms analysis frame */ |
| 688 | 596 | ||
| 689 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 597 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
| 690 | pack(bits, &nbit, model.voiced, 1); | 598 | pack(bits, &nbit, model.voiced, 1); |
| 691 | 599 | ||
| 692 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 600 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
| 693 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 601 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
| 694 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 602 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
| 695 | 603 | ||
| 696 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | 604 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); |
| 697 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 605 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
| 698 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); | 606 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); |
| 699 | } | 607 | } |
| 700 | pack(bits, &nbit, spare, 2); | 608 | pack(bits, &nbit, spare, 2); |
| 701 | 609 | ||
| 702 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 610 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
| 703 | } | 611 | } |
| 704 | 612 | ||
| 705 | |||
| 706 | /*---------------------------------------------------------------------------*\ | 613 | /*---------------------------------------------------------------------------*\ |
| 707 | 614 | ||
| 708 | FUNCTION....: codec2_decode_2400 | 615 | FUNCTION....: codec2_decode_2400 |
| @@ -713,86 +620,84 @@ void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
| 713 | 620 | ||
| 714 | \*---------------------------------------------------------------------------*/ | 621 | \*---------------------------------------------------------------------------*/ |
| 715 | 622 | ||
| 716 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 623 | void codec2_decode_2400(struct CODEC2 *c2, short speech[], |
| 717 | { | 624 | const unsigned char *bits) { |
| 718 | MODEL model[2]; | 625 | MODEL model[2]; |
| 719 | int lsp_indexes[LPC_ORD]; | 626 | int lsp_indexes[LPC_ORD]; |
| 720 | float lsps[2][LPC_ORD]; | 627 | float lsps[2][LPC_ORD]; |
| 721 | int WoE_index; | 628 | int WoE_index; |
| 722 | float e[2]; | 629 | float e[2]; |
| 723 | float snr; | 630 | float snr; |
| 724 | float ak[2][LPC_ORD+1]; | 631 | float ak[2][LPC_ORD + 1]; |
| 725 | int i,j; | 632 | int i, j; |
| 726 | unsigned int nbit = 0; | 633 | unsigned int nbit = 0; |
| 727 | COMP Aw[FFT_ENC]; | 634 | COMP Aw[FFT_ENC]; |
| 728 | 635 | ||
| 729 | assert(c2 != NULL); | 636 | assert(c2 != NULL); |
| 730 | 637 | ||
| 731 | /* only need to zero these out due to (unused) snr calculation */ | 638 | /* only need to zero these out due to (unused) snr calculation */ |
| 732 | 639 | ||
| 733 | for(i=0; i<2; i++) | 640 | for (i = 0; i < 2; i++) |
| 734 | for(j=1; j<=MAX_AMP; j++) | 641 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
| 735 | model[i].A[j] = 0.0; | ||
| 736 | 642 | ||
| 737 | /* unpack bits from channel ------------------------------------*/ | 643 | /* unpack bits from channel ------------------------------------*/ |
| 738 | 644 | ||
| 739 | /* this will partially fill the model params for the 2 x 10ms | 645 | /* this will partially fill the model params for the 2 x 10ms |
| 740 | frames */ | 646 | frames */ |
| 741 | 647 | ||
| 742 | model[0].voiced = unpack(bits, &nbit, 1); | 648 | model[0].voiced = unpack(bits, &nbit, 1); |
| 743 | 649 | ||
| 744 | model[1].voiced = unpack(bits, &nbit, 1); | 650 | model[1].voiced = unpack(bits, &nbit, 1); |
| 745 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 651 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
| 746 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); | 652 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); |
| 747 | 653 | ||
| 748 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 654 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
| 749 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); | 655 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); |
| 750 | } | 656 | } |
| 751 | decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD); | 657 | decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD); |
| 752 | check_lsp_order(&lsps[1][0], LPC_ORD); | 658 | check_lsp_order(&lsps[1][0], LPC_ORD); |
| 753 | bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0); | 659 | bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0); |
| 754 | 660 | ||
| 755 | /* interpolate ------------------------------------------------*/ | 661 | /* interpolate ------------------------------------------------*/ |
| 756 | 662 | ||
| 757 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 663 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 |
| 758 | 10ms frame between 20ms samples */ | 664 | 10ms frame between 20ms samples */ |
| 759 | 665 | ||
| 760 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 666 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); |
| 761 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 667 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
| 762 | 668 | ||
| 763 | /* LSPs are sampled every 20ms so we interpolate the frame in | 669 | /* LSPs are sampled every 20ms so we interpolate the frame in |
| 764 | between, then recover spectral amplitudes */ | 670 | between, then recover spectral amplitudes */ |
| 765 | 671 | ||
| 766 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); | 672 | interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, |
| 767 | for(i=0; i<2; i++) { | 673 | LPC_ORD); |
| 768 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 674 | for (i = 0; i < 2; i++) { |
| 769 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 675 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
| 770 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 676 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
| 771 | apply_lpc_correction(&model[i]); | 677 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
| 772 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 678 | apply_lpc_correction(&model[i]); |
| 773 | 679 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); | |
| 774 | /* dump parameters for deep learning experiments */ | 680 | |
| 775 | 681 | /* dump parameters for deep learning experiments */ | |
| 776 | if (c2->fmlfeat != NULL) { | 682 | |
| 777 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ | 683 | if (c2->fmlfeat != NULL) { |
| 778 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); | 684 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ |
| 779 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); | 685 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); |
| 780 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); | 686 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); |
| 781 | float voiced_float = model[i].voiced; | 687 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); |
| 782 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); | 688 | float voiced_float = model[i].voiced; |
| 783 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); | 689 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); |
| 784 | } | 690 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); |
| 785 | } | 691 | } |
| 692 | } | ||
| 786 | 693 | ||
| 787 | /* update memories for next frame ----------------------------*/ | 694 | /* update memories for next frame ----------------------------*/ |
| 788 | 695 | ||
| 789 | c2->prev_model_dec = model[1]; | 696 | c2->prev_model_dec = model[1]; |
| 790 | c2->prev_e_dec = e[1]; | 697 | c2->prev_e_dec = e[1]; |
| 791 | for(i=0; i<LPC_ORD; i++) | 698 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[1][i]; |
| 792 | c2->prev_lsps_dec[i] = lsps[1][i]; | ||
| 793 | } | 699 | } |
| 794 | 700 | ||
| 795 | |||
| 796 | /*---------------------------------------------------------------------------*\ | 701 | /*---------------------------------------------------------------------------*\ |
| 797 | 702 | ||
| 798 | FUNCTION....: codec2_encode_1600 | 703 | FUNCTION....: codec2_encode_1600 |
| @@ -821,65 +726,64 @@ void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * | |||
| 821 | 726 | ||
| 822 | \*---------------------------------------------------------------------------*/ | 727 | \*---------------------------------------------------------------------------*/ |
| 823 | 728 | ||
| 824 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 729 | void codec2_encode_1600(struct CODEC2 *c2, unsigned char *bits, |
| 825 | { | 730 | short speech[]) { |
| 826 | MODEL model; | 731 | MODEL model; |
| 827 | float lsps[LPC_ORD]; | 732 | float lsps[LPC_ORD]; |
| 828 | float ak[LPC_ORD+1]; | 733 | float ak[LPC_ORD + 1]; |
| 829 | float e; | 734 | float e; |
| 830 | int lsp_indexes[LPC_ORD]; | 735 | int lsp_indexes[LPC_ORD]; |
| 831 | int Wo_index, e_index; | 736 | int Wo_index, e_index; |
| 832 | int i; | 737 | int i; |
| 833 | unsigned int nbit = 0; | 738 | unsigned int nbit = 0; |
| 834 | 739 | ||
| 835 | assert(c2 != NULL); | 740 | assert(c2 != NULL); |
| 836 | 741 | ||
| 837 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 742 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
| 838 | 743 | ||
| 839 | /* frame 1: - voicing ---------------------------------------------*/ | 744 | /* frame 1: - voicing ---------------------------------------------*/ |
| 840 | 745 | ||
| 841 | analyse_one_frame(c2, &model, speech); | 746 | analyse_one_frame(c2, &model, speech); |
| 842 | pack(bits, &nbit, model.voiced, 1); | 747 | pack(bits, &nbit, model.voiced, 1); |
| 843 | 748 | ||
| 844 | /* frame 2: - voicing, scalar Wo & E -------------------------------*/ | 749 | /* frame 2: - voicing, scalar Wo & E -------------------------------*/ |
| 845 | 750 | ||
| 846 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 751 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
| 847 | pack(bits, &nbit, model.voiced, 1); | 752 | pack(bits, &nbit, model.voiced, 1); |
| 848 | 753 | ||
| 849 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | 754 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); |
| 850 | pack(bits, &nbit, Wo_index, WO_BITS); | 755 | pack(bits, &nbit, Wo_index, WO_BITS); |
| 851 | 756 | ||
| 852 | /* need to run this just to get LPC energy */ | 757 | /* need to run this just to get LPC energy */ |
| 853 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 758 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
| 854 | e_index = encode_energy(e, E_BITS); | 759 | e_index = encode_energy(e, E_BITS); |
| 855 | pack(bits, &nbit, e_index, E_BITS); | 760 | pack(bits, &nbit, e_index, E_BITS); |
| 856 | 761 | ||
| 857 | /* frame 3: - voicing ---------------------------------------------*/ | 762 | /* frame 3: - voicing ---------------------------------------------*/ |
| 858 | 763 | ||
| 859 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | 764 | analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]); |
| 860 | pack(bits, &nbit, model.voiced, 1); | 765 | pack(bits, &nbit, model.voiced, 1); |
| 861 | 766 | ||
| 862 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ | 767 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ |
| 863 | 768 | ||
| 864 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | 769 | analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]); |
| 865 | pack(bits, &nbit, model.voiced, 1); | 770 | pack(bits, &nbit, model.voiced, 1); |
| 866 | 771 | ||
| 867 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | 772 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); |
| 868 | pack(bits, &nbit, Wo_index, WO_BITS); | 773 | pack(bits, &nbit, Wo_index, WO_BITS); |
| 869 | 774 | ||
| 870 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 775 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
| 871 | e_index = encode_energy(e, E_BITS); | 776 | e_index = encode_energy(e, E_BITS); |
| 872 | pack(bits, &nbit, e_index, E_BITS); | 777 | pack(bits, &nbit, e_index, E_BITS); |
| 873 | 778 | ||
| 874 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | 779 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); |
| 875 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 780 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
| 876 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); | 781 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); |
| 877 | } | 782 | } |
| 878 | 783 | ||
| 879 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 784 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
| 880 | } | 785 | } |
| 881 | 786 | ||
| 882 | |||
| 883 | /*---------------------------------------------------------------------------*\ | 787 | /*---------------------------------------------------------------------------*\ |
| 884 | 788 | ||
| 885 | FUNCTION....: codec2_decode_1600 | 789 | FUNCTION....: codec2_decode_1600 |
| @@ -890,91 +794,89 @@ void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
| 890 | 794 | ||
| 891 | \*---------------------------------------------------------------------------*/ | 795 | \*---------------------------------------------------------------------------*/ |
| 892 | 796 | ||
| 893 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 797 | void codec2_decode_1600(struct CODEC2 *c2, short speech[], |
| 894 | { | 798 | const unsigned char *bits) { |
| 895 | MODEL model[4]; | 799 | MODEL model[4]; |
| 896 | int lsp_indexes[LPC_ORD]; | 800 | int lsp_indexes[LPC_ORD]; |
| 897 | float lsps[4][LPC_ORD]; | 801 | float lsps[4][LPC_ORD]; |
| 898 | int Wo_index, e_index; | 802 | int Wo_index, e_index; |
| 899 | float e[4]; | 803 | float e[4]; |
| 900 | float snr; | 804 | float snr; |
| 901 | float ak[4][LPC_ORD+1]; | 805 | float ak[4][LPC_ORD + 1]; |
| 902 | int i,j; | 806 | int i, j; |
| 903 | unsigned int nbit = 0; | 807 | unsigned int nbit = 0; |
| 904 | float weight; | 808 | float weight; |
| 905 | COMP Aw[FFT_ENC]; | 809 | COMP Aw[FFT_ENC]; |
| 906 | 810 | ||
| 907 | assert(c2 != NULL); | 811 | assert(c2 != NULL); |
| 908 | 812 | ||
| 909 | /* only need to zero these out due to (unused) snr calculation */ | 813 | /* only need to zero these out due to (unused) snr calculation */ |
| 910 | 814 | ||
| 911 | for(i=0; i<4; i++) | 815 | for (i = 0; i < 4; i++) |
| 912 | for(j=1; j<=MAX_AMP; j++) | 816 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
| 913 | model[i].A[j] = 0.0; | 817 | |
| 914 | 818 | /* unpack bits from channel ------------------------------------*/ | |
| 915 | /* unpack bits from channel ------------------------------------*/ | 819 | |
| 916 | 820 | /* this will partially fill the model params for the 4 x 10ms | |
| 917 | /* this will partially fill the model params for the 4 x 10ms | 821 | frames */ |
| 918 | frames */ | 822 | |
| 919 | 823 | model[0].voiced = unpack(bits, &nbit, 1); | |
| 920 | model[0].voiced = unpack(bits, &nbit, 1); | 824 | |
| 921 | 825 | model[1].voiced = unpack(bits, &nbit, 1); | |
| 922 | model[1].voiced = unpack(bits, &nbit, 1); | 826 | Wo_index = unpack(bits, &nbit, WO_BITS); |
| 923 | Wo_index = unpack(bits, &nbit, WO_BITS); | 827 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); |
| 924 | model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | 828 | model[1].L = PI / model[1].Wo; |
| 925 | model[1].L = PI/model[1].Wo; | 829 | |
| 926 | 830 | e_index = unpack(bits, &nbit, E_BITS); | |
| 927 | e_index = unpack(bits, &nbit, E_BITS); | 831 | e[1] = decode_energy(e_index, E_BITS); |
| 928 | e[1] = decode_energy(e_index, E_BITS); | 832 | |
| 929 | 833 | model[2].voiced = unpack(bits, &nbit, 1); | |
| 930 | model[2].voiced = unpack(bits, &nbit, 1); | 834 | |
| 931 | 835 | model[3].voiced = unpack(bits, &nbit, 1); | |
| 932 | model[3].voiced = unpack(bits, &nbit, 1); | 836 | Wo_index = unpack(bits, &nbit, WO_BITS); |
| 933 | Wo_index = unpack(bits, &nbit, WO_BITS); | 837 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); |
| 934 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | 838 | model[3].L = PI / model[3].Wo; |
| 935 | model[3].L = PI/model[3].Wo; | 839 | |
| 936 | 840 | e_index = unpack(bits, &nbit, E_BITS); | |
| 937 | e_index = unpack(bits, &nbit, E_BITS); | 841 | e[3] = decode_energy(e_index, E_BITS); |
| 938 | e[3] = decode_energy(e_index, E_BITS); | 842 | |
| 939 | 843 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { | |
| 940 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 844 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); |
| 941 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); | 845 | } |
| 942 | } | 846 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); |
| 943 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | 847 | check_lsp_order(&lsps[3][0], LPC_ORD); |
| 944 | check_lsp_order(&lsps[3][0], LPC_ORD); | 848 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); |
| 945 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | 849 | |
| 946 | 850 | /* interpolate ------------------------------------------------*/ | |
| 947 | /* interpolate ------------------------------------------------*/ | 851 | |
| 948 | 852 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | |
| 949 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 853 | 10ms frame between 20ms samples */ |
| 950 | 10ms frame between 20ms samples */ | 854 | |
| 951 | 855 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | |
| 952 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 856 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
| 953 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 857 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); |
| 954 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); | 858 | e[2] = interp_energy(e[1], e[3]); |
| 955 | e[2] = interp_energy(e[1], e[3]); | 859 | |
| 956 | 860 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | |
| 957 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | 861 | between, then recover spectral amplitudes */ |
| 958 | between, then recover spectral amplitudes */ | 862 | |
| 959 | 863 | for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) { | |
| 960 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | 864 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, |
| 961 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | 865 | LPC_ORD); |
| 962 | } | 866 | } |
| 963 | for(i=0; i<4; i++) { | 867 | for (i = 0; i < 4; i++) { |
| 964 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 868 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
| 965 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 869 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
| 966 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 870 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
| 967 | apply_lpc_correction(&model[i]); | 871 | apply_lpc_correction(&model[i]); |
| 968 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 872 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); |
| 969 | } | 873 | } |
| 970 | 874 | ||
| 971 | /* update memories for next frame ----------------------------*/ | 875 | /* update memories for next frame ----------------------------*/ |
| 972 | 876 | ||
| 973 | c2->prev_model_dec = model[3]; | 877 | c2->prev_model_dec = model[3]; |
| 974 | c2->prev_e_dec = e[3]; | 878 | c2->prev_e_dec = e[3]; |
| 975 | for(i=0; i<LPC_ORD; i++) | 879 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i]; |
| 976 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
| 977 | |||
| 978 | } | 880 | } |
| 979 | 881 | ||
| 980 | /*---------------------------------------------------------------------------*\ | 882 | /*---------------------------------------------------------------------------*\ |
| @@ -1004,60 +906,59 @@ void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * | |||
| 1004 | 906 | ||
| 1005 | \*---------------------------------------------------------------------------*/ | 907 | \*---------------------------------------------------------------------------*/ |
| 1006 | 908 | ||
| 1007 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 909 | void codec2_encode_1400(struct CODEC2 *c2, unsigned char *bits, |
| 1008 | { | 910 | short speech[]) { |
| 1009 | MODEL model; | 911 | MODEL model; |
| 1010 | float lsps[LPC_ORD]; | 912 | float lsps[LPC_ORD]; |
| 1011 | float ak[LPC_ORD+1]; | 913 | float ak[LPC_ORD + 1]; |
| 1012 | float e; | 914 | float e; |
| 1013 | int lsp_indexes[LPC_ORD]; | 915 | int lsp_indexes[LPC_ORD]; |
| 1014 | int WoE_index; | 916 | int WoE_index; |
| 1015 | int i; | 917 | int i; |
| 1016 | unsigned int nbit = 0; | 918 | unsigned int nbit = 0; |
| 1017 | 919 | ||
| 1018 | assert(c2 != NULL); | 920 | assert(c2 != NULL); |
| 1019 | 921 | ||
| 1020 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 922 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
| 1021 | 923 | ||
| 1022 | /* frame 1: - voicing ---------------------------------------------*/ | 924 | /* frame 1: - voicing ---------------------------------------------*/ |
| 1023 | 925 | ||
| 1024 | analyse_one_frame(c2, &model, speech); | 926 | analyse_one_frame(c2, &model, speech); |
| 1025 | pack(bits, &nbit, model.voiced, 1); | 927 | pack(bits, &nbit, model.voiced, 1); |
| 1026 | 928 | ||
| 1027 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ | 929 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ |
| 1028 | 930 | ||
| 1029 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 931 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
| 1030 | pack(bits, &nbit, model.voiced, 1); | 932 | pack(bits, &nbit, model.voiced, 1); |
| 1031 | 933 | ||
| 1032 | /* need to run this just to get LPC energy */ | 934 | /* need to run this just to get LPC energy */ |
| 1033 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 935 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
| 1034 | 936 | ||
| 1035 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 937 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
| 1036 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 938 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
| 1037 | 939 | ||
| 1038 | /* frame 3: - voicing ---------------------------------------------*/ | 940 | /* frame 3: - voicing ---------------------------------------------*/ |
| 1039 | 941 | ||
| 1040 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | 942 | analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]); |
| 1041 | pack(bits, &nbit, model.voiced, 1); | 943 | pack(bits, &nbit, model.voiced, 1); |
| 1042 | 944 | ||
| 1043 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ | 945 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ |
| 1044 | 946 | ||
| 1045 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | 947 | analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]); |
| 1046 | pack(bits, &nbit, model.voiced, 1); | 948 | pack(bits, &nbit, model.voiced, 1); |
| 1047 | 949 | ||
| 1048 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 950 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
| 1049 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 951 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
| 1050 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 952 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
| 1051 | 953 | ||
| 1052 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | 954 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); |
| 1053 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 955 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
| 1054 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); | 956 | pack(bits, &nbit, lsp_indexes[i], lsp_bits(i)); |
| 1055 | } | 957 | } |
| 1056 | 958 | ||
| 1057 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 959 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
| 1058 | } | 960 | } |
| 1059 | 961 | ||
| 1060 | |||
| 1061 | /*---------------------------------------------------------------------------*\ | 962 | /*---------------------------------------------------------------------------*\ |
| 1062 | 963 | ||
| 1063 | FUNCTION....: codec2_decode_1400 | 964 | FUNCTION....: codec2_decode_1400 |
| @@ -1068,83 +969,81 @@ void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
| 1068 | 969 | ||
| 1069 | \*---------------------------------------------------------------------------*/ | 970 | \*---------------------------------------------------------------------------*/ |
| 1070 | 971 | ||
| 1071 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 972 | void codec2_decode_1400(struct CODEC2 *c2, short speech[], |
| 1072 | { | 973 | const unsigned char *bits) { |
| 1073 | MODEL model[4]; | 974 | MODEL model[4]; |
| 1074 | int lsp_indexes[LPC_ORD]; | 975 | int lsp_indexes[LPC_ORD]; |
| 1075 | float lsps[4][LPC_ORD]; | 976 | float lsps[4][LPC_ORD]; |
| 1076 | int WoE_index; | 977 | int WoE_index; |
| 1077 | float e[4]; | 978 | float e[4]; |
| 1078 | float snr; | 979 | float snr; |
| 1079 | float ak[4][LPC_ORD+1]; | 980 | float ak[4][LPC_ORD + 1]; |
| 1080 | int i,j; | 981 | int i, j; |
| 1081 | unsigned int nbit = 0; | 982 | unsigned int nbit = 0; |
| 1082 | float weight; | 983 | float weight; |
| 1083 | COMP Aw[FFT_ENC]; | 984 | COMP Aw[FFT_ENC]; |
| 1084 | 985 | ||
| 1085 | assert(c2 != NULL); | 986 | assert(c2 != NULL); |
| 1086 | 987 | ||
| 1087 | /* only need to zero these out due to (unused) snr calculation */ | 988 | /* only need to zero these out due to (unused) snr calculation */ |
| 1088 | 989 | ||
| 1089 | for(i=0; i<4; i++) | 990 | for (i = 0; i < 4; i++) |
| 1090 | for(j=1; j<=MAX_AMP; j++) | 991 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
| 1091 | model[i].A[j] = 0.0; | 992 | |
| 1092 | 993 | /* unpack bits from channel ------------------------------------*/ | |
| 1093 | /* unpack bits from channel ------------------------------------*/ | 994 | |
| 1094 | 995 | /* this will partially fill the model params for the 4 x 10ms | |
| 1095 | /* this will partially fill the model params for the 4 x 10ms | 996 | frames */ |
| 1096 | frames */ | 997 | |
| 1097 | 998 | model[0].voiced = unpack(bits, &nbit, 1); | |
| 1098 | model[0].voiced = unpack(bits, &nbit, 1); | 999 | |
| 1099 | 1000 | model[1].voiced = unpack(bits, &nbit, 1); | |
| 1100 | model[1].voiced = unpack(bits, &nbit, 1); | 1001 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
| 1101 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1002 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); |
| 1102 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); | 1003 | |
| 1103 | 1004 | model[2].voiced = unpack(bits, &nbit, 1); | |
| 1104 | model[2].voiced = unpack(bits, &nbit, 1); | 1005 | |
| 1105 | 1006 | model[3].voiced = unpack(bits, &nbit, 1); | |
| 1106 | model[3].voiced = unpack(bits, &nbit, 1); | 1007 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
| 1107 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1008 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); |
| 1108 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); | 1009 | |
| 1109 | 1010 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { | |
| 1110 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 1011 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); |
| 1111 | lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i)); | 1012 | } |
| 1112 | } | 1013 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); |
| 1113 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | 1014 | check_lsp_order(&lsps[3][0], LPC_ORD); |
| 1114 | check_lsp_order(&lsps[3][0], LPC_ORD); | 1015 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); |
| 1115 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | 1016 | |
| 1116 | 1017 | /* interpolate ------------------------------------------------*/ | |
| 1117 | /* interpolate ------------------------------------------------*/ | 1018 | |
| 1118 | 1019 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | |
| 1119 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 1020 | 10ms frame between 20ms samples */ |
| 1120 | 10ms frame between 20ms samples */ | 1021 | |
| 1121 | 1022 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | |
| 1122 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 1023 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
| 1123 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 1024 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); |
| 1124 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); | 1025 | e[2] = interp_energy(e[1], e[3]); |
| 1125 | e[2] = interp_energy(e[1], e[3]); | 1026 | |
| 1126 | 1027 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | |
| 1127 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | 1028 | between, then recover spectral amplitudes */ |
| 1128 | between, then recover spectral amplitudes */ | 1029 | |
| 1129 | 1030 | for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) { | |
| 1130 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | 1031 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, |
| 1131 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | 1032 | LPC_ORD); |
| 1132 | } | 1033 | } |
| 1133 | for(i=0; i<4; i++) { | 1034 | for (i = 0; i < 4; i++) { |
| 1134 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 1035 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
| 1135 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 1036 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
| 1136 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 1037 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
| 1137 | apply_lpc_correction(&model[i]); | 1038 | apply_lpc_correction(&model[i]); |
| 1138 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 1039 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); |
| 1139 | } | 1040 | } |
| 1140 | 1041 | ||
| 1141 | /* update memories for next frame ----------------------------*/ | 1042 | /* update memories for next frame ----------------------------*/ |
| 1142 | 1043 | ||
| 1143 | c2->prev_model_dec = model[3]; | 1044 | c2->prev_model_dec = model[3]; |
| 1144 | c2->prev_e_dec = e[3]; | 1045 | c2->prev_e_dec = e[3]; |
| 1145 | for(i=0; i<LPC_ORD; i++) | 1046 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i]; |
| 1146 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
| 1147 | |||
| 1148 | } | 1047 | } |
| 1149 | 1048 | ||
| 1150 | /*---------------------------------------------------------------------------*\ | 1049 | /*---------------------------------------------------------------------------*\ |
| @@ -1175,66 +1074,56 @@ void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * | |||
| 1175 | 1074 | ||
| 1176 | \*---------------------------------------------------------------------------*/ | 1075 | \*---------------------------------------------------------------------------*/ |
| 1177 | 1076 | ||
| 1178 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 1077 | void codec2_encode_1300(struct CODEC2 *c2, unsigned char *bits, |
| 1179 | { | 1078 | short speech[]) { |
| 1180 | MODEL model; | 1079 | MODEL model; |
| 1181 | float lsps[LPC_ORD]; | 1080 | float lsps[LPC_ORD]; |
| 1182 | float ak[LPC_ORD+1]; | 1081 | float ak[LPC_ORD + 1]; |
| 1183 | float e; | 1082 | float e; |
| 1184 | int lsp_indexes[LPC_ORD]; | 1083 | int lsp_indexes[LPC_ORD]; |
| 1185 | int Wo_index, e_index; | 1084 | int Wo_index, e_index; |
| 1186 | int i; | 1085 | int i; |
| 1187 | unsigned int nbit = 0; | 1086 | unsigned int nbit = 0; |
| 1188 | //#ifdef PROFILE | ||
| 1189 | //unsigned int quant_start; | ||
| 1190 | //#endif | ||
| 1191 | 1087 | ||
| 1192 | assert(c2 != NULL); | 1088 | assert(c2 != NULL); |
| 1193 | 1089 | ||
| 1194 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 1090 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
| 1195 | 1091 | ||
| 1196 | /* frame 1: - voicing ---------------------------------------------*/ | 1092 | /* frame 1: - voicing ---------------------------------------------*/ |
| 1197 | 1093 | ||
| 1198 | analyse_one_frame(c2, &model, speech); | 1094 | analyse_one_frame(c2, &model, speech); |
| 1199 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | 1095 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); |
| 1200 | 1096 | ||
| 1201 | /* frame 2: - voicing ---------------------------------------------*/ | 1097 | /* frame 2: - voicing ---------------------------------------------*/ |
| 1202 | 1098 | ||
| 1203 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 1099 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
| 1204 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | 1100 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); |
| 1205 | 1101 | ||
| 1206 | /* frame 3: - voicing ---------------------------------------------*/ | 1102 | /* frame 3: - voicing ---------------------------------------------*/ |
| 1207 | 1103 | ||
| 1208 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | 1104 | analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]); |
| 1209 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | 1105 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); |
| 1210 | 1106 | ||
| 1211 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ | 1107 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/ |
| 1212 | 1108 | ||
| 1213 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | 1109 | analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]); |
| 1214 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); | 1110 | pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); |
| 1215 | 1111 | ||
| 1216 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); | 1112 | Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); |
| 1217 | pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray); | 1113 | pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray); |
| 1218 | 1114 | ||
| 1219 | //#ifdef PROFILE | 1115 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
| 1220 | //quant_start = machdep_profile_sample(); | 1116 | e_index = encode_energy(e, E_BITS); |
| 1221 | //#endif | 1117 | pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray); |
| 1222 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | ||
| 1223 | e_index = encode_energy(e, E_BITS); | ||
| 1224 | pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray); | ||
| 1225 | 1118 | ||
| 1226 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); | 1119 | encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); |
| 1227 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 1120 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { |
| 1228 | pack_natural_or_gray(bits, &nbit, lsp_indexes[i], lsp_bits(i), c2->gray); | 1121 | pack_natural_or_gray(bits, &nbit, lsp_indexes[i], lsp_bits(i), c2->gray); |
| 1229 | } | 1122 | } |
| 1230 | //#ifdef PROFILE | ||
| 1231 | //machdep_profile_sample_and_log(quant_start, " quant/packing"); | ||
| 1232 | //#endif | ||
| 1233 | 1123 | ||
| 1234 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 1124 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
| 1235 | } | 1125 | } |
| 1236 | 1126 | ||
| 1237 | |||
| 1238 | /*---------------------------------------------------------------------------*\ | 1127 | /*---------------------------------------------------------------------------*\ |
| 1239 | 1128 | ||
| 1240 | FUNCTION....: codec2_decode_1300 | 1129 | FUNCTION....: codec2_decode_1300 |
| @@ -1244,118 +1133,106 @@ void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
| 1244 | Decodes frames of 52 bits into 320 samples (40ms) of speech. | 1133 | Decodes frames of 52 bits into 320 samples (40ms) of speech. |
| 1245 | 1134 | ||
| 1246 | \*---------------------------------------------------------------------------*/ | 1135 | \*---------------------------------------------------------------------------*/ |
| 1247 | static int frames; | ||
| 1248 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est) | ||
| 1249 | { | ||
| 1250 | MODEL model[4]; | ||
| 1251 | int lsp_indexes[LPC_ORD]; | ||
| 1252 | float lsps[4][LPC_ORD]; | ||
| 1253 | int Wo_index, e_index; | ||
| 1254 | float e[4]; | ||
| 1255 | float snr; | ||
| 1256 | float ak[4][LPC_ORD+1]; | ||
| 1257 | int i,j; | ||
| 1258 | unsigned int nbit = 0; | ||
| 1259 | float weight; | ||
| 1260 | COMP Aw[FFT_ENC]; | ||
| 1261 | //PROFILE_VAR(recover_start); | ||
| 1262 | |||
| 1263 | assert(c2 != NULL); | ||
| 1264 | frames+= 4; | ||
| 1265 | /* only need to zero these out due to (unused) snr calculation */ | ||
| 1266 | |||
| 1267 | for(i=0; i<4; i++) | ||
| 1268 | for(j=1; j<=MAX_AMP; j++) | ||
| 1269 | model[i].A[j] = 0.0; | ||
| 1270 | |||
| 1271 | /* unpack bits from channel ------------------------------------*/ | ||
| 1272 | |||
| 1273 | /* this will partially fill the model params for the 4 x 10ms | ||
| 1274 | frames */ | ||
| 1275 | |||
| 1276 | model[0].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
| 1277 | model[1].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
| 1278 | model[2].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
| 1279 | model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
| 1280 | |||
| 1281 | Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray); | ||
| 1282 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | ||
| 1283 | model[3].L = PI/model[3].Wo; | ||
| 1284 | 1136 | ||
| 1285 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); | 1137 | void codec2_decode_1300(struct CODEC2 *c2, short speech[], |
| 1286 | e[3] = decode_energy(e_index, E_BITS); | 1138 | const unsigned char *bits, float ber_est) { |
| 1287 | //fprintf(stderr, "%d %f\n", e_index, e[3]); | 1139 | MODEL model[4]; |
| 1140 | int lsp_indexes[LPC_ORD]; | ||
| 1141 | float lsps[4][LPC_ORD]; | ||
| 1142 | int Wo_index, e_index; | ||
| 1143 | float e[4]; | ||
| 1144 | float snr; | ||
| 1145 | float ak[4][LPC_ORD + 1]; | ||
| 1146 | int i, j; | ||
| 1147 | unsigned int nbit = 0; | ||
| 1148 | float weight; | ||
| 1149 | COMP Aw[FFT_ENC]; | ||
| 1150 | |||
| 1151 | assert(c2 != NULL); | ||
| 1152 | |||
| 1153 | /* only need to zero these out due to (unused) snr calculation */ | ||
| 1154 | |||
| 1155 | for (i = 0; i < 4; i++) | ||
| 1156 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; | ||
| 1157 | |||
| 1158 | /* unpack bits from channel ------------------------------------*/ | ||
| 1159 | |||
| 1160 | /* this will partially fill the model params for the 4 x 10ms | ||
| 1161 | frames */ | ||
| 1162 | |||
| 1163 | model[0].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
| 1164 | model[1].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
| 1165 | model[2].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
| 1166 | model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); | ||
| 1167 | |||
| 1168 | Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray); | ||
| 1169 | model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); | ||
| 1170 | model[3].L = PI / model[3].Wo; | ||
| 1171 | |||
| 1172 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); | ||
| 1173 | e[3] = decode_energy(e_index, E_BITS); | ||
| 1174 | |||
| 1175 | for (i = 0; i < LSP_SCALAR_INDEXES; i++) { | ||
| 1176 | lsp_indexes[i] = unpack_natural_or_gray(bits, &nbit, lsp_bits(i), c2->gray); | ||
| 1177 | } | ||
| 1178 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | ||
| 1179 | check_lsp_order(&lsps[3][0], LPC_ORD); | ||
| 1180 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | ||
| 1181 | |||
| 1182 | if (ber_est > 0.15) { | ||
| 1183 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0; | ||
| 1184 | e[3] = decode_energy(10, E_BITS); | ||
| 1185 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0); | ||
| 1186 | // fprintf(stderr, "soft mute\n"); | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | /* interpolate ------------------------------------------------*/ | ||
| 1190 | |||
| 1191 | /* Wo, energy, and LSPs are sampled every 40ms so we interpolate | ||
| 1192 | the 3 frames in between */ | ||
| 1193 | |||
| 1194 | for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) { | ||
| 1195 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, | ||
| 1196 | LPC_ORD); | ||
| 1197 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, | ||
| 1198 | c2->c2const.Wo_min); | ||
| 1199 | e[i] = interp_energy2(c2->prev_e_dec, e[3], weight); | ||
| 1200 | } | ||
| 1201 | |||
| 1202 | /* then recover spectral amplitudes */ | ||
| 1203 | |||
| 1204 | for (i = 0; i < 4; i++) { | ||
| 1205 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
| 1206 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
| 1207 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
| 1208 | apply_lpc_correction(&model[i]); | ||
| 1209 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); | ||
| 1210 | |||
| 1211 | /* dump parameters for deep learning experiments */ | ||
| 1288 | 1212 | ||
| 1289 | for(i=0; i<LSP_SCALAR_INDEXES; i++) { | 1213 | if (c2->fmlfeat != NULL) { |
| 1290 | lsp_indexes[i] = unpack_natural_or_gray(bits, &nbit, lsp_bits(i), c2->gray); | 1214 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ |
| 1291 | } | 1215 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); |
| 1292 | decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD); | 1216 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); |
| 1293 | check_lsp_order(&lsps[3][0], LPC_ORD); | 1217 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); |
| 1294 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | 1218 | float voiced_float = model[i].voiced; |
| 1295 | 1219 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); | |
| 1296 | if (ber_est > 0.15) { | 1220 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); |
| 1297 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0; | 1221 | } |
| 1298 | e[3] = decode_energy(10, E_BITS); | 1222 | } |
| 1299 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0); | 1223 | |
| 1300 | //fprintf(stderr, "soft mute\n"); | 1224 | #ifdef DUMP |
| 1301 | } | 1225 | dump_lsp_(&lsps[3][0]); |
| 1302 | 1226 | dump_ak_(&ak[3][0], LPC_ORD); | |
| 1303 | /* interpolate ------------------------------------------------*/ | 1227 | #endif |
| 1304 | |||
| 1305 | /* Wo, energy, and LSPs are sampled every 40ms so we interpolate | ||
| 1306 | the 3 frames in between */ | ||
| 1307 | |||
| 1308 | //PROFILE_SAMPLE(recover_start); | ||
| 1309 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
| 1310 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | ||
| 1311 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); | ||
| 1312 | e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); | ||
| 1313 | } | ||
| 1314 | 1228 | ||
| 1315 | /* then recover spectral amplitudes */ | 1229 | /* update memories for next frame ----------------------------*/ |
| 1316 | |||
| 1317 | for(i=0; i<4; i++) { | ||
| 1318 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | ||
| 1319 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | ||
| 1320 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
| 1321 | apply_lpc_correction(&model[i]); | ||
| 1322 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
| 1323 | |||
| 1324 | /* dump parameters for deep learning experiments */ | ||
| 1325 | |||
| 1326 | if (c2->fmlfeat != NULL) { | ||
| 1327 | /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */ | ||
| 1328 | fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->fmlfeat); | ||
| 1329 | fwrite(&e[i], 1, sizeof(float), c2->fmlfeat); | ||
| 1330 | fwrite(&model[i].Wo, 1, sizeof(float), c2->fmlfeat); | ||
| 1331 | float voiced_float = model[i].voiced; | ||
| 1332 | fwrite(&voiced_float, 1, sizeof(float), c2->fmlfeat); | ||
| 1333 | fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->fmlfeat); | ||
| 1334 | } | ||
| 1335 | } | ||
| 1336 | /* | ||
| 1337 | for(i=0; i<4; i++) { | ||
| 1338 | printf("%d Wo: %f L: %d v: %d\n", frames, model[i].Wo, model[i].L, model[i].voiced); | ||
| 1339 | } | ||
| 1340 | if (frames == 4*50) | ||
| 1341 | exit(0); | ||
| 1342 | */ | ||
| 1343 | //PROFILE_SAMPLE_AND_LOG2(recover_start, " recover"); | ||
| 1344 | #ifdef DUMP | ||
| 1345 | dump_lsp_(&lsps[3][0]); | ||
| 1346 | dump_ak_(&ak[3][0], LPC_ORD); | ||
| 1347 | #endif | ||
| 1348 | |||
| 1349 | /* update memories for next frame ----------------------------*/ | ||
| 1350 | |||
| 1351 | c2->prev_model_dec = model[3]; | ||
| 1352 | c2->prev_e_dec = e[3]; | ||
| 1353 | for(i=0; i<LPC_ORD; i++) | ||
| 1354 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
| 1355 | 1230 | ||
| 1231 | c2->prev_model_dec = model[3]; | ||
| 1232 | c2->prev_e_dec = e[3]; | ||
| 1233 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i]; | ||
| 1356 | } | 1234 | } |
| 1357 | 1235 | ||
| 1358 | |||
| 1359 | /*---------------------------------------------------------------------------*\ | 1236 | /*---------------------------------------------------------------------------*\ |
| 1360 | 1237 | ||
| 1361 | FUNCTION....: codec2_encode_1200 | 1238 | FUNCTION....: codec2_encode_1200 |
| @@ -1384,63 +1261,62 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * | |||
| 1384 | 1261 | ||
| 1385 | \*---------------------------------------------------------------------------*/ | 1262 | \*---------------------------------------------------------------------------*/ |
| 1386 | 1263 | ||
| 1387 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 1264 | void codec2_encode_1200(struct CODEC2 *c2, unsigned char *bits, |
| 1388 | { | 1265 | short speech[]) { |
| 1389 | MODEL model; | 1266 | MODEL model; |
| 1390 | float lsps[LPC_ORD]; | 1267 | float lsps[LPC_ORD]; |
| 1391 | float lsps_[LPC_ORD]; | 1268 | float lsps_[LPC_ORD]; |
| 1392 | float ak[LPC_ORD+1]; | 1269 | float ak[LPC_ORD + 1]; |
| 1393 | float e; | 1270 | float e; |
| 1394 | int lsp_indexes[LPC_ORD]; | 1271 | int lsp_indexes[LPC_ORD]; |
| 1395 | int WoE_index; | 1272 | int WoE_index; |
| 1396 | int i; | 1273 | int i; |
| 1397 | int spare = 0; | 1274 | int spare = 0; |
| 1398 | unsigned int nbit = 0; | 1275 | unsigned int nbit = 0; |
| 1399 | 1276 | ||
| 1400 | assert(c2 != NULL); | 1277 | assert(c2 != NULL); |
| 1401 | 1278 | ||
| 1402 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 1279 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
| 1403 | 1280 | ||
| 1404 | /* frame 1: - voicing ---------------------------------------------*/ | 1281 | /* frame 1: - voicing ---------------------------------------------*/ |
| 1405 | 1282 | ||
| 1406 | analyse_one_frame(c2, &model, speech); | 1283 | analyse_one_frame(c2, &model, speech); |
| 1407 | pack(bits, &nbit, model.voiced, 1); | 1284 | pack(bits, &nbit, model.voiced, 1); |
| 1408 | 1285 | ||
| 1409 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ | 1286 | /* frame 2: - voicing, joint Wo & E -------------------------------*/ |
| 1410 | 1287 | ||
| 1411 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); | 1288 | analyse_one_frame(c2, &model, &speech[c2->n_samp]); |
| 1412 | pack(bits, &nbit, model.voiced, 1); | 1289 | pack(bits, &nbit, model.voiced, 1); |
| 1413 | 1290 | ||
| 1414 | /* need to run this just to get LPC energy */ | 1291 | /* need to run this just to get LPC energy */ |
| 1415 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 1292 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
| 1416 | 1293 | ||
| 1417 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 1294 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
| 1418 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 1295 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
| 1419 | 1296 | ||
| 1420 | /* frame 3: - voicing ---------------------------------------------*/ | 1297 | /* frame 3: - voicing ---------------------------------------------*/ |
| 1421 | 1298 | ||
| 1422 | analyse_one_frame(c2, &model, &speech[2*c2->n_samp]); | 1299 | analyse_one_frame(c2, &model, &speech[2 * c2->n_samp]); |
| 1423 | pack(bits, &nbit, model.voiced, 1); | 1300 | pack(bits, &nbit, model.voiced, 1); |
| 1424 | 1301 | ||
| 1425 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ | 1302 | /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/ |
| 1426 | 1303 | ||
| 1427 | analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); | 1304 | analyse_one_frame(c2, &model, &speech[3 * c2->n_samp]); |
| 1428 | pack(bits, &nbit, model.voiced, 1); | 1305 | pack(bits, &nbit, model.voiced, 1); |
| 1429 | 1306 | ||
| 1430 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); | 1307 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); |
| 1431 | WoE_index = encode_WoE(&model, e, c2->xq_enc); | 1308 | WoE_index = encode_WoE(&model, e, c2->xq_enc); |
| 1432 | pack(bits, &nbit, WoE_index, WO_E_BITS); | 1309 | pack(bits, &nbit, WoE_index, WO_E_BITS); |
| 1433 | 1310 | ||
| 1434 | encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD); | 1311 | encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD); |
| 1435 | for(i=0; i<LSP_PRED_VQ_INDEXES; i++) { | 1312 | for (i = 0; i < LSP_PRED_VQ_INDEXES; i++) { |
| 1436 | pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i)); | 1313 | pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i)); |
| 1437 | } | 1314 | } |
| 1438 | pack(bits, &nbit, spare, 1); | 1315 | pack(bits, &nbit, spare, 1); |
| 1439 | 1316 | ||
| 1440 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 1317 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); |
| 1441 | } | 1318 | } |
| 1442 | 1319 | ||
| 1443 | |||
| 1444 | /*---------------------------------------------------------------------------*\ | 1320 | /*---------------------------------------------------------------------------*\ |
| 1445 | 1321 | ||
| 1446 | FUNCTION....: codec2_decode_1200 | 1322 | FUNCTION....: codec2_decode_1200 |
| @@ -1451,494 +1327,83 @@ void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
| 1451 | 1327 | ||
| 1452 | \*---------------------------------------------------------------------------*/ | 1328 | \*---------------------------------------------------------------------------*/ |
| 1453 | 1329 | ||
| 1454 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 1330 | void codec2_decode_1200(struct CODEC2 *c2, short speech[], |
| 1455 | { | 1331 | const unsigned char *bits) { |
| 1456 | MODEL model[4]; | 1332 | MODEL model[4]; |
| 1457 | int lsp_indexes[LPC_ORD]; | 1333 | int lsp_indexes[LPC_ORD]; |
| 1458 | float lsps[4][LPC_ORD]; | 1334 | float lsps[4][LPC_ORD]; |
| 1459 | int WoE_index; | 1335 | int WoE_index; |
| 1460 | float e[4]; | 1336 | float e[4]; |
| 1461 | float snr; | 1337 | float snr; |
| 1462 | float ak[4][LPC_ORD+1]; | 1338 | float ak[4][LPC_ORD + 1]; |
| 1463 | int i,j; | 1339 | int i, j; |
| 1464 | unsigned int nbit = 0; | 1340 | unsigned int nbit = 0; |
| 1465 | float weight; | 1341 | float weight; |
| 1466 | COMP Aw[FFT_ENC]; | 1342 | COMP Aw[FFT_ENC]; |
| 1467 | 1343 | ||
| 1468 | assert(c2 != NULL); | 1344 | assert(c2 != NULL); |
| 1469 | 1345 | ||
| 1470 | /* only need to zero these out due to (unused) snr calculation */ | 1346 | /* only need to zero these out due to (unused) snr calculation */ |
| 1471 | 1347 | ||
| 1472 | for(i=0; i<4; i++) | 1348 | for (i = 0; i < 4; i++) |
| 1473 | for(j=1; j<=MAX_AMP; j++) | 1349 | for (j = 1; j <= MAX_AMP; j++) model[i].A[j] = 0.0; |
| 1474 | model[i].A[j] = 0.0; | 1350 | |
| 1475 | 1351 | /* unpack bits from channel ------------------------------------*/ | |
| 1476 | /* unpack bits from channel ------------------------------------*/ | 1352 | |
| 1477 | 1353 | /* this will partially fill the model params for the 4 x 10ms | |
| 1478 | /* this will partially fill the model params for the 4 x 10ms | 1354 | frames */ |
| 1479 | frames */ | 1355 | |
| 1480 | 1356 | model[0].voiced = unpack(bits, &nbit, 1); | |
| 1481 | model[0].voiced = unpack(bits, &nbit, 1); | 1357 | |
| 1482 | 1358 | model[1].voiced = unpack(bits, &nbit, 1); | |
| 1483 | model[1].voiced = unpack(bits, &nbit, 1); | 1359 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
| 1484 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1360 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); |
| 1485 | decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); | 1361 | |
| 1486 | 1362 | model[2].voiced = unpack(bits, &nbit, 1); | |
| 1487 | model[2].voiced = unpack(bits, &nbit, 1); | 1363 | |
| 1488 | 1364 | model[3].voiced = unpack(bits, &nbit, 1); | |
| 1489 | model[3].voiced = unpack(bits, &nbit, 1); | 1365 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
| 1490 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1366 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); |
| 1491 | decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); | 1367 | |
| 1492 | 1368 | for (i = 0; i < LSP_PRED_VQ_INDEXES; i++) { | |
| 1493 | for(i=0; i<LSP_PRED_VQ_INDEXES; i++) { | 1369 | lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i)); |
| 1494 | lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i)); | 1370 | } |
| 1495 | } | 1371 | decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD, 0); |
| 1496 | decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD , 0); | 1372 | check_lsp_order(&lsps[3][0], LPC_ORD); |
| 1497 | check_lsp_order(&lsps[3][0], LPC_ORD); | 1373 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); |
| 1498 | bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); | 1374 | |
| 1499 | 1375 | /* interpolate ------------------------------------------------*/ | |
| 1500 | /* interpolate ------------------------------------------------*/ | 1376 | |
| 1501 | 1377 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | |
| 1502 | /* Wo and energy are sampled every 20ms, so we interpolate just 1 | 1378 | 10ms frame between 20ms samples */ |
| 1503 | 10ms frame between 20ms samples */ | 1379 | |
| 1504 | 1380 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | |
| 1505 | interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); | 1381 | e[0] = interp_energy(c2->prev_e_dec, e[1]); |
| 1506 | e[0] = interp_energy(c2->prev_e_dec, e[1]); | 1382 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); |
| 1507 | interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); | 1383 | e[2] = interp_energy(e[1], e[3]); |
| 1508 | e[2] = interp_energy(e[1], e[3]); | 1384 | |
| 1509 | 1385 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | |
| 1510 | /* LSPs are sampled every 40ms so we interpolate the 3 frames in | 1386 | between, then recover spectral amplitudes */ |
| 1511 | between, then recover spectral amplitudes */ | 1387 | |
| 1512 | 1388 | for (i = 0, weight = 0.25; i < 3; i++, weight += 0.25) { | |
| 1513 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | 1389 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, |
| 1514 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); | 1390 | LPC_ORD); |
| 1515 | } | 1391 | } |
| 1516 | for(i=0; i<4; i++) { | 1392 | for (i = 0; i < 4; i++) { |
| 1517 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); | 1393 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); |
| 1518 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, | 1394 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, |
| 1519 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | 1395 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); |
| 1520 | apply_lpc_correction(&model[i]); | 1396 | apply_lpc_correction(&model[i]); |
| 1521 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | 1397 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], Aw, 1.0); |
| 1522 | } | 1398 | } |
| 1523 | 1399 | ||
| 1524 | /* update memories for next frame ----------------------------*/ | 1400 | /* update memories for next frame ----------------------------*/ |
| 1525 | 1401 | ||
| 1526 | c2->prev_model_dec = model[3]; | 1402 | c2->prev_model_dec = model[3]; |
| 1527 | c2->prev_e_dec = e[3]; | 1403 | c2->prev_e_dec = e[3]; |
| 1528 | for(i=0; i<LPC_ORD; i++) | 1404 | for (i = 0; i < LPC_ORD; i++) c2->prev_lsps_dec[i] = lsps[3][i]; |
| 1529 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
| 1530 | } | 1405 | } |
| 1531 | 1406 | ||
| 1532 | |||
| 1533 | /*---------------------------------------------------------------------------*\ | ||
| 1534 | |||
| 1535 | FUNCTION....: codec2_encode_700 | ||
| 1536 | AUTHOR......: David Rowe | ||
| 1537 | DATE CREATED: April 2015 | ||
| 1538 | |||
| 1539 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
| 1540 | |||
| 1541 | The codec2 algorithm actually operates internally on 10ms (80 | ||
| 1542 | sample) frames, so we run the encoding algorithm four times: | ||
| 1543 | |||
| 1544 | frame 0: nothing | ||
| 1545 | frame 1: nothing | ||
| 1546 | frame 2: nothing | ||
| 1547 | frame 3: voicing bit, scalar Wo and E, 17 bit LSP MEL scalar, 2 spare | ||
| 1548 | |||
| 1549 | The bit allocation is: | ||
| 1550 | |||
| 1551 | Parameter frames 1-3 frame 4 Total | ||
| 1552 | ----------------------------------------------------------- | ||
| 1553 | Harmonic magnitudes (LSPs) 0 17 17 | ||
| 1554 | Energy 0 3 3 | ||
| 1555 | log Wo 0 5 5 | ||
| 1556 | Voicing 0 1 1 | ||
| 1557 | spare 0 2 2 | ||
| 1558 | TOTAL 0 28 28 | ||
| 1559 | |||
| 1560 | \*---------------------------------------------------------------------------*/ | ||
| 1561 | |||
| 1562 | void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
| 1563 | { | ||
| 1564 | MODEL model; | ||
| 1565 | float lsps[LPC_ORD_LOW]; | ||
| 1566 | float mel[LPC_ORD_LOW]; | ||
| 1567 | float ak[LPC_ORD_LOW+1]; | ||
| 1568 | float e, f; | ||
| 1569 | int indexes[LPC_ORD_LOW]; | ||
| 1570 | int Wo_index, e_index, i; | ||
| 1571 | unsigned int nbit = 0; | ||
| 1572 | float bpf_out[4*c2->n_samp]; | ||
| 1573 | short bpf_speech[4*c2->n_samp]; | ||
| 1574 | int spare = 0; | ||
| 1575 | |||
| 1576 | assert(c2 != NULL); | ||
| 1577 | |||
| 1578 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
| 1579 | |||
| 1580 | /* band pass filter */ | ||
| 1581 | |||
| 1582 | for(i=0; i<BPF_N; i++) | ||
| 1583 | c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i]; | ||
| 1584 | for(i=0; i<4*c2->n_samp; i++) | ||
| 1585 | c2->bpf_buf[BPF_N+i] = speech[i]; | ||
| 1586 | inverse_filter(&c2->bpf_buf[BPF_N], bpf, 4*c2->n_samp, bpf_out, BPF_N-1); | ||
| 1587 | for(i=0; i<4*c2->n_samp; i++) | ||
| 1588 | bpf_speech[i] = bpf_out[i]; | ||
| 1589 | |||
| 1590 | /* frame 1 --------------------------------------------------------*/ | ||
| 1591 | |||
| 1592 | analyse_one_frame(c2, &model, bpf_speech); | ||
| 1593 | |||
| 1594 | /* frame 2 --------------------------------------------------------*/ | ||
| 1595 | |||
| 1596 | analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]); | ||
| 1597 | |||
| 1598 | /* frame 3 --------------------------------------------------------*/ | ||
| 1599 | |||
| 1600 | analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]); | ||
| 1601 | |||
| 1602 | /* frame 4: - voicing, scalar Wo & E, scalar LSPs -----------------*/ | ||
| 1603 | |||
| 1604 | analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]); | ||
| 1605 | pack(bits, &nbit, model.voiced, 1); | ||
| 1606 | Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5); | ||
| 1607 | pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); | ||
| 1608 | |||
| 1609 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW); | ||
| 1610 | e_index = encode_energy(e, 3); | ||
| 1611 | pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); | ||
| 1612 | |||
| 1613 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
| 1614 | f = (4000.0/PI)*lsps[i]; | ||
| 1615 | mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5); | ||
| 1616 | } | ||
| 1617 | encode_mels_scalar(indexes, mel, LPC_ORD_LOW); | ||
| 1618 | |||
| 1619 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
| 1620 | pack_natural_or_gray(bits, &nbit, indexes[i], mel_bits(i), c2->gray); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | pack_natural_or_gray(bits, &nbit, spare, 2, c2->gray); | ||
| 1624 | |||
| 1625 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | |||
| 1629 | /*---------------------------------------------------------------------------*\ | ||
| 1630 | |||
| 1631 | FUNCTION....: codec2_decode_700 | ||
| 1632 | AUTHOR......: David Rowe | ||
| 1633 | DATE CREATED: April 2015 | ||
| 1634 | |||
| 1635 | Decodes frames of 28 bits into 320 samples (40ms) of speech. | ||
| 1636 | |||
| 1637 | \*---------------------------------------------------------------------------*/ | ||
| 1638 | |||
| 1639 | void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
| 1640 | { | ||
| 1641 | MODEL model[4]; | ||
| 1642 | int indexes[LPC_ORD_LOW]; | ||
| 1643 | float mel[LPC_ORD_LOW]; | ||
| 1644 | float lsps[4][LPC_ORD_LOW]; | ||
| 1645 | int Wo_index, e_index; | ||
| 1646 | float e[4]; | ||
| 1647 | float snr, f_; | ||
| 1648 | float ak[4][LPC_ORD_LOW+1]; | ||
| 1649 | int i,j; | ||
| 1650 | unsigned int nbit = 0; | ||
| 1651 | float weight; | ||
| 1652 | COMP Aw[FFT_ENC]; | ||
| 1653 | |||
| 1654 | assert(c2 != NULL); | ||
| 1655 | |||
| 1656 | /* only need to zero these out due to (unused) snr calculation */ | ||
| 1657 | |||
| 1658 | for(i=0; i<4; i++) | ||
| 1659 | for(j=1; j<=MAX_AMP; j++) | ||
| 1660 | model[i].A[j] = 0.0; | ||
| 1661 | |||
| 1662 | /* unpack bits from channel ------------------------------------*/ | ||
| 1663 | |||
| 1664 | model[3].voiced = unpack(bits, &nbit, 1); | ||
| 1665 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced; | ||
| 1666 | |||
| 1667 | Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray); | ||
| 1668 | model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5); | ||
| 1669 | model[3].L = PI/model[3].Wo; | ||
| 1670 | |||
| 1671 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
| 1672 | e[3] = decode_energy(e_index, 3); | ||
| 1673 | |||
| 1674 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
| 1675 | indexes[i] = unpack_natural_or_gray(bits, &nbit, mel_bits(i), c2->gray); | ||
| 1676 | } | ||
| 1677 | |||
| 1678 | decode_mels_scalar(mel, indexes, LPC_ORD_LOW); | ||
| 1679 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
| 1680 | f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0); | ||
| 1681 | lsps[3][i] = f_*(PI/4000.0); | ||
| 1682 | //printf("lsps[3][%d] %f\n", i, lsps[3][i]); | ||
| 1683 | } | ||
| 1684 | |||
| 1685 | check_lsp_order(&lsps[3][0], LPC_ORD_LOW); | ||
| 1686 | bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 50.0, 100.0); | ||
| 1687 | |||
| 1688 | #ifdef MASK_NOT_FOR_NOW | ||
| 1689 | /* first pass at soft decn error masking, needs further work */ | ||
| 1690 | /* If soft dec info available expand further for low power frames */ | ||
| 1691 | |||
| 1692 | if (c2->softdec) { | ||
| 1693 | float e = 0.0; | ||
| 1694 | for(i=9; i<9+17; i++) | ||
| 1695 | e += c2->softdec[i]*c2->softdec[i]; | ||
| 1696 | e /= 6.0; | ||
| 1697 | //fprintf(stderr, "e: %f\n", e); | ||
| 1698 | //if (e < 0.3) | ||
| 1699 | // bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 150.0, 300.0); | ||
| 1700 | } | ||
| 1701 | #endif | ||
| 1702 | |||
| 1703 | /* interpolate ------------------------------------------------*/ | ||
| 1704 | |||
| 1705 | /* LSPs, Wo, and energy are sampled every 40ms so we interpolate | ||
| 1706 | the 3 frames in between, then recover spectral amplitudes */ | ||
| 1707 | |||
| 1708 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
| 1709 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); | ||
| 1710 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); | ||
| 1711 | e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); | ||
| 1712 | } | ||
| 1713 | for(i=0; i<4; i++) { | ||
| 1714 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW); | ||
| 1715 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0, | ||
| 1716 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
| 1717 | apply_lpc_correction(&model[i]); | ||
| 1718 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
| 1719 | } | ||
| 1720 | |||
| 1721 | #ifdef DUMP | ||
| 1722 | dump_lsp_(&lsps[3][0]); | ||
| 1723 | dump_ak_(&ak[3][0], LPC_ORD_LOW); | ||
| 1724 | dump_model(&model[3]); | ||
| 1725 | if (c2->softdec) | ||
| 1726 | dump_softdec(c2->softdec, nbit); | ||
| 1727 | #endif | ||
| 1728 | |||
| 1729 | /* update memories for next frame ----------------------------*/ | ||
| 1730 | |||
| 1731 | c2->prev_model_dec = model[3]; | ||
| 1732 | c2->prev_e_dec = e[3]; | ||
| 1733 | for(i=0; i<LPC_ORD_LOW; i++) | ||
| 1734 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
| 1735 | } | ||
| 1736 | |||
| 1737 | |||
| 1738 | /*---------------------------------------------------------------------------*\ | ||
| 1739 | |||
| 1740 | FUNCTION....: codec2_encode_700b | ||
| 1741 | AUTHOR......: David Rowe | ||
| 1742 | DATE CREATED: August 2015 | ||
| 1743 | |||
| 1744 | Version b of 700 bit/s codec. After some experiments over the air I | ||
| 1745 | wanted was unhappy with the rate 700 codec so spent a few weeks | ||
| 1746 | trying to improve the speech quality. This version uses a wider BPF | ||
| 1747 | and vector quantised mel-lsps. | ||
| 1748 | |||
| 1749 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
| 1750 | |||
| 1751 | The codec2 algorithm actually operates internally on 10ms (80 | ||
| 1752 | sample) frames, so we run the encoding algorithm four times: | ||
| 1753 | |||
| 1754 | frame 0: nothing | ||
| 1755 | frame 1: nothing | ||
| 1756 | frame 2: nothing | ||
| 1757 | frame 3: voicing bit, 5 bit scalar Wo and 3 bit E, 18 bit LSP MEL VQ, | ||
| 1758 | 1 spare | ||
| 1759 | |||
| 1760 | The bit allocation is: | ||
| 1761 | |||
| 1762 | Parameter frames 1-3 frame 4 Total | ||
| 1763 | ----------------------------------------------------------- | ||
| 1764 | Harmonic magnitudes (LSPs) 0 18 18 | ||
| 1765 | Energy 0 3 3 | ||
| 1766 | log Wo 0 5 5 | ||
| 1767 | Voicing 0 1 1 | ||
| 1768 | spare 0 1 1 | ||
| 1769 | TOTAL 0 28 28 | ||
| 1770 | |||
| 1771 | \*---------------------------------------------------------------------------*/ | ||
| 1772 | |||
| 1773 | void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
| 1774 | { | ||
| 1775 | MODEL model; | ||
| 1776 | float lsps[LPC_ORD_LOW]; | ||
| 1777 | float mel[LPC_ORD_LOW]; | ||
| 1778 | float mel_[LPC_ORD_LOW]; | ||
| 1779 | float ak[LPC_ORD_LOW+1]; | ||
| 1780 | float e, f; | ||
| 1781 | int indexes[3]; | ||
| 1782 | int Wo_index, e_index, i; | ||
| 1783 | unsigned int nbit = 0; | ||
| 1784 | float bpf_out[4*c2->n_samp]; | ||
| 1785 | short bpf_speech[4*c2->n_samp]; | ||
| 1786 | int spare = 0; | ||
| 1787 | |||
| 1788 | assert(c2 != NULL); | ||
| 1789 | |||
| 1790 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
| 1791 | |||
| 1792 | /* band pass filter */ | ||
| 1793 | |||
| 1794 | for(i=0; i<BPF_N; i++) | ||
| 1795 | c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i]; | ||
| 1796 | for(i=0; i<4*c2->n_samp; i++) | ||
| 1797 | c2->bpf_buf[BPF_N+i] = speech[i]; | ||
| 1798 | inverse_filter(&c2->bpf_buf[BPF_N], bpfb, 4*c2->n_samp, bpf_out, BPF_N-1); | ||
| 1799 | for(i=0; i<4*c2->n_samp; i++) | ||
| 1800 | bpf_speech[i] = bpf_out[i]; | ||
| 1801 | |||
| 1802 | /* frame 1 --------------------------------------------------------*/ | ||
| 1803 | |||
| 1804 | analyse_one_frame(c2, &model, bpf_speech); | ||
| 1805 | |||
| 1806 | /* frame 2 --------------------------------------------------------*/ | ||
| 1807 | |||
| 1808 | analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]); | ||
| 1809 | |||
| 1810 | /* frame 3 --------------------------------------------------------*/ | ||
| 1811 | |||
| 1812 | analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]); | ||
| 1813 | |||
| 1814 | /* frame 4: - voicing, scalar Wo & E, VQ mel LSPs -----------------*/ | ||
| 1815 | |||
| 1816 | analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]); | ||
| 1817 | pack(bits, &nbit, model.voiced, 1); | ||
| 1818 | Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5); | ||
| 1819 | pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); | ||
| 1820 | |||
| 1821 | e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW); | ||
| 1822 | e_index = encode_energy(e, 3); | ||
| 1823 | pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); | ||
| 1824 | |||
| 1825 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
| 1826 | f = (4000.0/PI)*lsps[i]; | ||
| 1827 | mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5); | ||
| 1828 | } | ||
| 1829 | lspmelvq_mbest_encode(indexes, mel, mel_, LPC_ORD_LOW, 5); | ||
| 1830 | |||
| 1831 | for(i=0; i<3; i++) { | ||
| 1832 | pack_natural_or_gray(bits, &nbit, indexes[i], lspmelvq_cb_bits(i), c2->gray); | ||
| 1833 | } | ||
| 1834 | |||
| 1835 | pack_natural_or_gray(bits, &nbit, spare, 1, c2->gray); | ||
| 1836 | |||
| 1837 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
| 1838 | } | ||
| 1839 | |||
| 1840 | |||
| 1841 | /*---------------------------------------------------------------------------*\ | ||
| 1842 | |||
| 1843 | FUNCTION....: codec2_decode_700b | ||
| 1844 | AUTHOR......: David Rowe | ||
| 1845 | DATE CREATED: August 2015 | ||
| 1846 | |||
| 1847 | Decodes frames of 28 bits into 320 samples (40ms) of speech. | ||
| 1848 | |||
| 1849 | \*---------------------------------------------------------------------------*/ | ||
| 1850 | |||
| 1851 | void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
| 1852 | { | ||
| 1853 | MODEL model[4]; | ||
| 1854 | int indexes[3]; | ||
| 1855 | float mel[LPC_ORD_LOW]; | ||
| 1856 | float lsps[4][LPC_ORD_LOW]; | ||
| 1857 | int Wo_index, e_index; | ||
| 1858 | float e[4]; | ||
| 1859 | float snr, f_; | ||
| 1860 | float ak[4][LPC_ORD_LOW+1]; | ||
| 1861 | int i,j; | ||
| 1862 | unsigned int nbit = 0; | ||
| 1863 | float weight; | ||
| 1864 | COMP Aw[FFT_ENC]; | ||
| 1865 | |||
| 1866 | assert(c2 != NULL); | ||
| 1867 | |||
| 1868 | /* only need to zero these out due to (unused) snr calculation */ | ||
| 1869 | |||
| 1870 | for(i=0; i<4; i++) | ||
| 1871 | for(j=1; j<=MAX_AMP; j++) | ||
| 1872 | model[i].A[j] = 0.0; | ||
| 1873 | |||
| 1874 | /* unpack bits from channel ------------------------------------*/ | ||
| 1875 | |||
| 1876 | model[3].voiced = unpack(bits, &nbit, 1); | ||
| 1877 | model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced; | ||
| 1878 | |||
| 1879 | Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray); | ||
| 1880 | model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5); | ||
| 1881 | model[3].L = PI/model[3].Wo; | ||
| 1882 | |||
| 1883 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
| 1884 | e[3] = decode_energy(e_index, 3); | ||
| 1885 | |||
| 1886 | for(i=0; i<3; i++) { | ||
| 1887 | indexes[i] = unpack_natural_or_gray(bits, &nbit, lspmelvq_cb_bits(i), c2->gray); | ||
| 1888 | } | ||
| 1889 | |||
| 1890 | lspmelvq_decode(indexes, mel, LPC_ORD_LOW); | ||
| 1891 | |||
| 1892 | #define MEL_ROUND 10 | ||
| 1893 | for(i=1; i<LPC_ORD_LOW; i++) { | ||
| 1894 | if (mel[i] <= mel[i-1]+MEL_ROUND) { | ||
| 1895 | mel[i]+=MEL_ROUND/2; | ||
| 1896 | mel[i-1]-=MEL_ROUND/2; | ||
| 1897 | i = 1; | ||
| 1898 | } | ||
| 1899 | } | ||
| 1900 | |||
| 1901 | for(i=0; i<LPC_ORD_LOW; i++) { | ||
| 1902 | f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0); | ||
| 1903 | lsps[3][i] = f_*(PI/4000.0); | ||
| 1904 | //printf("lsps[3][%d] %f\n", i, lsps[3][i]); | ||
| 1905 | } | ||
| 1906 | |||
| 1907 | /* interpolate ------------------------------------------------*/ | ||
| 1908 | |||
| 1909 | /* LSPs, Wo, and energy are sampled every 40ms so we interpolate | ||
| 1910 | the 3 frames in between, then recover spectral amplitudes */ | ||
| 1911 | |||
| 1912 | for(i=0, weight=0.25; i<3; i++, weight += 0.25) { | ||
| 1913 | interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); | ||
| 1914 | interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); | ||
| 1915 | e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); | ||
| 1916 | } | ||
| 1917 | for(i=0; i<4; i++) { | ||
| 1918 | lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW); | ||
| 1919 | aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0, | ||
| 1920 | c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); | ||
| 1921 | apply_lpc_correction(&model[i]); | ||
| 1922 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0); | ||
| 1923 | } | ||
| 1924 | |||
| 1925 | #ifdef DUMP | ||
| 1926 | dump_lsp_(&lsps[3][0]); | ||
| 1927 | dump_ak_(&ak[3][0], LPC_ORD_LOW); | ||
| 1928 | dump_model(&model[3]); | ||
| 1929 | if (c2->softdec) | ||
| 1930 | dump_softdec(c2->softdec, nbit); | ||
| 1931 | #endif | ||
| 1932 | |||
| 1933 | /* update memories for next frame ----------------------------*/ | ||
| 1934 | |||
| 1935 | c2->prev_model_dec = model[3]; | ||
| 1936 | c2->prev_e_dec = e[3]; | ||
| 1937 | for(i=0; i<LPC_ORD_LOW; i++) | ||
| 1938 | c2->prev_lsps_dec[i] = lsps[3][i]; | ||
| 1939 | } | ||
| 1940 | |||
| 1941 | |||
| 1942 | /*---------------------------------------------------------------------------*\ | 1407 | /*---------------------------------------------------------------------------*\ |
| 1943 | 1408 | ||
| 1944 | FUNCTION....: codec2_encode_700c | 1409 | FUNCTION....: codec2_encode_700c |
| @@ -1955,7 +1420,7 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * | |||
| 1955 | frame 0: nothing | 1420 | frame 0: nothing |
| 1956 | frame 1: nothing | 1421 | frame 1: nothing |
| 1957 | frame 2: nothing | 1422 | frame 2: nothing |
| 1958 | frame 3: 18 bit 2 stage VQ (9 bits/stage), 4 bits energy, | 1423 | frame 3: 18 bit 2 stage VQ (9 bits/stage), 4 bits energy, |
| 1959 | 6 bit scalar Wo/voicing. No spare bits. | 1424 | 6 bit scalar Wo/voicing. No spare bits. |
| 1960 | 1425 | ||
| 1961 | Voicing is encoded using the 0 index of the Wo quantiser. | 1426 | Voicing is encoded using the 0 index of the Wo quantiser. |
| @@ -1971,52 +1436,54 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * | |||
| 1971 | 1436 | ||
| 1972 | \*---------------------------------------------------------------------------*/ | 1437 | \*---------------------------------------------------------------------------*/ |
| 1973 | 1438 | ||
| 1974 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]) | 1439 | void codec2_encode_700c(struct CODEC2 *c2, unsigned char *bits, |
| 1975 | { | 1440 | short speech[]) { |
| 1976 | MODEL model; | 1441 | MODEL model; |
| 1977 | int indexes[4], i, M=4; | 1442 | int indexes[4], i, M = 4; |
| 1978 | unsigned int nbit = 0; | 1443 | unsigned int nbit = 0; |
| 1979 | 1444 | ||
| 1980 | assert(c2 != NULL); | 1445 | assert(c2 != NULL); |
| 1981 | 1446 | ||
| 1982 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | 1447 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); |
| 1983 | 1448 | ||
| 1984 | for(i=0; i<M; i++) { | 1449 | for (i = 0; i < M; i++) { |
| 1985 | analyse_one_frame(c2, &model, &speech[i*c2->n_samp]); | 1450 | analyse_one_frame(c2, &model, &speech[i * c2->n_samp]); |
| 1986 | } | 1451 | } |
| 1987 | 1452 | ||
| 1988 | int K = 20; | 1453 | int K = 20; |
| 1989 | float rate_K_vec[K], mean; | 1454 | float rate_K_vec[K], mean; |
| 1990 | float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; | 1455 | float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; |
| 1991 | 1456 | ||
| 1992 | newamp1_model_to_indexes(&c2->c2const, | 1457 | newamp1_model_to_indexes(&c2->c2const, indexes, &model, rate_K_vec, |
| 1993 | indexes, | 1458 | c2->rate_K_sample_freqs_kHz, K, &mean, |
| 1994 | &model, | 1459 | rate_K_vec_no_mean, rate_K_vec_no_mean_, &c2->se, |
| 1995 | rate_K_vec, | 1460 | c2->eq, c2->eq_en); |
| 1996 | c2->rate_K_sample_freqs_kHz, | 1461 | c2->nse += K; |
| 1997 | K, | ||
| 1998 | &mean, | ||
| 1999 | rate_K_vec_no_mean, | ||
| 2000 | rate_K_vec_no_mean_, &c2->se, c2->eq, c2->eq_en); | ||
| 2001 | c2->nse += K; | ||
| 2002 | 1462 | ||
| 2003 | #ifndef CORTEX_M4 | 1463 | #ifndef CORTEX_M4 |
| 2004 | /* dump features for deep learning experiments */ | 1464 | /* dump features for deep learning experiments */ |
| 2005 | if (c2->fmlfeat != NULL) { | 1465 | if (c2->fmlfeat != NULL) { |
| 2006 | fwrite(&mean, 1, sizeof(float), c2->fmlfeat); | 1466 | fwrite(&mean, 1, sizeof(float), c2->fmlfeat); |
| 2007 | fwrite(rate_K_vec_no_mean, K, sizeof(float), c2->fmlfeat); | 1467 | fwrite(rate_K_vec_no_mean, K, sizeof(float), c2->fmlfeat); |
| 2008 | fwrite(rate_K_vec_no_mean_, K, sizeof(float), c2->fmlfeat); | 1468 | fwrite(rate_K_vec_no_mean_, K, sizeof(float), c2->fmlfeat); |
| 2009 | } | 1469 | MODEL model_; |
| 1470 | memcpy(&model_, &model, sizeof(model)); | ||
| 1471 | float rate_K_vec_[K]; | ||
| 1472 | for (int k = 0; k < K; k++) rate_K_vec_[k] = rate_K_vec_no_mean_[k] + mean; | ||
| 1473 | resample_rate_L(&c2->c2const, &model_, rate_K_vec_, | ||
| 1474 | c2->rate_K_sample_freqs_kHz, K); | ||
| 1475 | fwrite(&model_.A, MAX_AMP, sizeof(float), c2->fmlfeat); | ||
| 1476 | } | ||
| 1477 | if (c2->fmlmodel != NULL) fwrite(&model, sizeof(MODEL), 1, c2->fmlmodel); | ||
| 2010 | #endif | 1478 | #endif |
| 2011 | |||
| 2012 | pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0); | ||
| 2013 | pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0); | ||
| 2014 | pack_natural_or_gray(bits, &nbit, indexes[2], 4, 0); | ||
| 2015 | pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0); | ||
| 2016 | 1479 | ||
| 2017 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | 1480 | pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0); |
| 2018 | } | 1481 | pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0); |
| 1482 | pack_natural_or_gray(bits, &nbit, indexes[2], 4, 0); | ||
| 1483 | pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0); | ||
| 2019 | 1484 | ||
| 1485 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
| 1486 | } | ||
| 2020 | 1487 | ||
| 2021 | /*---------------------------------------------------------------------------*\ | 1488 | /*---------------------------------------------------------------------------*\ |
| 2022 | 1489 | ||
| @@ -2028,46 +1495,53 @@ void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
| 2028 | 1495 | ||
| 2029 | \*---------------------------------------------------------------------------*/ | 1496 | \*---------------------------------------------------------------------------*/ |
| 2030 | 1497 | ||
| 2031 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 1498 | void codec2_decode_700c(struct CODEC2 *c2, short speech[], |
| 2032 | { | 1499 | const unsigned char *bits) { |
| 2033 | MODEL model[4]; | 1500 | MODEL model[4]; |
| 2034 | int indexes[4]; | 1501 | int indexes[4]; |
| 2035 | int i; | 1502 | int i; |
| 2036 | unsigned int nbit = 0; | 1503 | unsigned int nbit = 0; |
| 2037 | 1504 | ||
| 2038 | assert(c2 != NULL); | 1505 | assert(c2 != NULL); |
| 2039 | 1506 | ||
| 2040 | /* unpack bits from channel ------------------------------------*/ | 1507 | /* unpack bits from channel ------------------------------------*/ |
| 2041 | 1508 | ||
| 2042 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | 1509 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); |
| 2043 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | 1510 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); |
| 2044 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); | 1511 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); |
| 2045 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | 1512 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); |
| 2046 | 1513 | ||
| 2047 | int M = 4; | 1514 | int M = 4; |
| 2048 | COMP HH[M][MAX_AMP+1]; | 1515 | COMP HH[M][MAX_AMP + 1]; |
| 2049 | float interpolated_surface_[M][NEWAMP1_K]; | 1516 | float interpolated_surface_[M][NEWAMP1_K]; |
| 2050 | 1517 | ||
| 2051 | newamp1_indexes_to_model(&c2->c2const, | 1518 | newamp1_indexes_to_model( |
| 2052 | model, | 1519 | &c2->c2const, model, (COMP *)HH, (float *)interpolated_surface_, |
| 2053 | (COMP*)HH, | 1520 | c2->prev_rate_K_vec_, &c2->Wo_left, &c2->voicing_left, |
| 2054 | (float*)interpolated_surface_, | 1521 | c2->rate_K_sample_freqs_kHz, NEWAMP1_K, c2->phase_fft_fwd_cfg, |
| 2055 | c2->prev_rate_K_vec_, | 1522 | c2->phase_fft_inv_cfg, indexes, c2->user_rate_K_vec_no_mean_, |
| 2056 | &c2->Wo_left, | 1523 | c2->post_filter_en); |
| 2057 | &c2->voicing_left, | 1524 | |
| 2058 | c2->rate_K_sample_freqs_kHz, | 1525 | for (i = 0; i < M; i++) { |
| 2059 | NEWAMP1_K, | 1526 | if (c2->fmlfeat != NULL) { |
| 2060 | c2->phase_fft_fwd_cfg, | 1527 | /* We use standard nb_features=55 feature records for compatibility with |
| 2061 | c2->phase_fft_inv_cfg, | 1528 | * train_lpcnet.py */ |
| 2062 | indexes, | 1529 | float features[55] = {0}; |
| 2063 | c2->user_rate_K_vec_no_mean_, | 1530 | /* just using 18/20 for compatibility with LPCNet, coarse scaling for NN |
| 2064 | c2->post_filter_en); | 1531 | * input */ |
| 2065 | 1532 | for (int j = 0; j < 18; j++) | |
| 2066 | 1533 | features[j] = (interpolated_surface_[i][j] - 30) / 40; | |
| 2067 | for(i=0; i<M; i++) { | 1534 | int pitch_index = 21 + 2.0 * M_PI / model[i].Wo; |
| 2068 | /* 700C is a little quiter so lets apply some experimentally derived audio gain */ | 1535 | features[36] = 0.02 * (pitch_index - 100); |
| 2069 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); | 1536 | features[37] = model[i].voiced; |
| 2070 | } | 1537 | fwrite(features, 55, sizeof(float), c2->fmlfeat); |
| 1538 | } | ||
| 1539 | |||
| 1540 | /* 700C is a little quieter so lets apply some experimentally derived audio | ||
| 1541 | * gain */ | ||
| 1542 | synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], &HH[i][0], | ||
| 1543 | 1.5); | ||
| 1544 | } | ||
| 2071 | } | 1545 | } |
| 2072 | 1546 | ||
| 2073 | /*---------------------------------------------------------------------------*\ | 1547 | /*---------------------------------------------------------------------------*\ |
| @@ -2080,48 +1554,24 @@ void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * | |||
| 2080 | 1554 | ||
| 2081 | \*---------------------------------------------------------------------------*/ | 1555 | \*---------------------------------------------------------------------------*/ |
| 2082 | 1556 | ||
| 2083 | float codec2_energy_700c(struct CODEC2 *c2, const unsigned char * bits) | 1557 | float codec2_energy_700c(struct CODEC2 *c2, const unsigned char *bits) { |
| 2084 | { | 1558 | int indexes[4]; |
| 2085 | int indexes[4]; | 1559 | unsigned int nbit = 0; |
| 2086 | unsigned int nbit = 0; | ||
| 2087 | 1560 | ||
| 2088 | assert(c2 != NULL); | 1561 | assert(c2 != NULL); |
| 2089 | 1562 | ||
| 2090 | /* unpack bits from channel ------------------------------------*/ | 1563 | /* unpack bits from channel ------------------------------------*/ |
| 2091 | 1564 | ||
| 2092 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | 1565 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); |
| 2093 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | 1566 | indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); |
| 2094 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); | 1567 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0); |
| 2095 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | 1568 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); |
| 2096 | 1569 | ||
| 2097 | float mean = newamp1_energy_cb[0].cb[indexes[2]]; | 1570 | float mean = newamp1_energy_cb[0].cb[indexes[2]]; |
| 2098 | mean -= 10; | 1571 | mean -= 10; |
| 2099 | if (indexes[3] == 0) | 1572 | if (indexes[3] == 0) mean -= 10; |
| 2100 | mean -= 10; | ||
| 2101 | 1573 | ||
| 2102 | return POW10F(mean/10.0); | 1574 | return POW10F(mean / 10.0); |
| 2103 | } | ||
| 2104 | |||
| 2105 | float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits) | ||
| 2106 | { | ||
| 2107 | int indexes[4]; | ||
| 2108 | unsigned int nbit = 0; | ||
| 2109 | |||
| 2110 | assert(c2 != NULL); | ||
| 2111 | |||
| 2112 | /* unpack bits from channel ------------------------------------*/ | ||
| 2113 | |||
| 2114 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
| 2115 | //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
| 2116 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); | ||
| 2117 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
| 2118 | |||
| 2119 | float mean = newamp2_energy_cb[0].cb[indexes[2]]; | ||
| 2120 | mean -= 10; | ||
| 2121 | if (indexes[3] == 0) | ||
| 2122 | mean -= 10; | ||
| 2123 | |||
| 2124 | return POW10F(mean/10.0); | ||
| 2125 | } | 1575 | } |
| 2126 | 1576 | ||
| 2127 | /*---------------------------------------------------------------------------*\ | 1577 | /*---------------------------------------------------------------------------*\ |
| @@ -2134,300 +1584,58 @@ float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits) | |||
| 2134 | 1584 | ||
| 2135 | \*---------------------------------------------------------------------------*/ | 1585 | \*---------------------------------------------------------------------------*/ |
| 2136 | 1586 | ||
| 2137 | float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) | 1587 | float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) { |
| 2138 | { | 1588 | assert(c2 != NULL); |
| 2139 | assert(c2 != NULL); | 1589 | assert((CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) || |
| 2140 | assert( | 1590 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) || |
| 2141 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) || | 1591 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) || |
| 2142 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) || | 1592 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) || |
| 2143 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) || | 1593 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) || |
| 2144 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) || | 1594 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) || |
| 2145 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) || | 1595 | (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))); |
| 2146 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) || | 1596 | MODEL model; |
| 2147 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) || | 1597 | float xq_dec[2] = {}; |
| 2148 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) || | 1598 | int e_index, WoE_index; |
| 2149 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) || | 1599 | float e = 0.0f; |
| 2150 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) || | 1600 | unsigned int nbit; |
| 2151 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) | 1601 | |
| 2152 | ); | 1602 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) { |
| 2153 | MODEL model; | 1603 | nbit = 1 + 1 + WO_BITS; |
| 2154 | float xq_dec[2] = {}; | 1604 | e_index = unpack(bits, &nbit, E_BITS); |
| 2155 | int e_index, WoE_index; | 1605 | e = decode_energy(e_index, E_BITS); |
| 2156 | float e; | 1606 | } |
| 2157 | unsigned int nbit; | 1607 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) { |
| 2158 | 1608 | nbit = 1 + 1; | |
| 2159 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) { | 1609 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
| 2160 | nbit = 1 + 1 + WO_BITS; | 1610 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); |
| 2161 | e_index = unpack(bits, &nbit, E_BITS); | 1611 | } |
| 2162 | e = decode_energy(e_index, E_BITS); | 1612 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) { |
| 2163 | } | 1613 | nbit = 1 + 1 + WO_BITS; |
| 2164 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) { | 1614 | e_index = unpack(bits, &nbit, E_BITS); |
| 2165 | nbit = 1 + 1; | 1615 | e = decode_energy(e_index, E_BITS); |
| 2166 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1616 | } |
| 2167 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); | 1617 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) { |
| 2168 | } | 1618 | nbit = 1 + 1; |
| 2169 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) { | 1619 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
| 2170 | nbit = 1 + 1 + WO_BITS; | 1620 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); |
| 2171 | e_index = unpack(bits, &nbit, E_BITS); | 1621 | } |
| 2172 | e = decode_energy(e_index, E_BITS); | 1622 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) { |
| 2173 | } | 1623 | nbit = 1 + 1 + 1 + 1 + WO_BITS; |
| 2174 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) { | 1624 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); |
| 2175 | nbit = 1 + 1; | 1625 | e = decode_energy(e_index, E_BITS); |
| 2176 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | 1626 | } |
| 2177 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); | 1627 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) { |
| 2178 | } | 1628 | nbit = 1 + 1; |
| 2179 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) { | 1629 | WoE_index = unpack(bits, &nbit, WO_E_BITS); |
| 2180 | nbit = 1 + 1 + 1 + 1 + WO_BITS; | 1630 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); |
| 2181 | e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); | 1631 | } |
| 2182 | e = decode_energy(e_index, E_BITS); | 1632 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { |
| 2183 | } | 1633 | e = codec2_energy_700c(c2, bits); |
| 2184 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) { | 1634 | } |
| 2185 | nbit = 1 + 1; | ||
| 2186 | WoE_index = unpack(bits, &nbit, WO_E_BITS); | ||
| 2187 | decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); | ||
| 2188 | } | ||
| 2189 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) { | ||
| 2190 | nbit = 1 + 5; | ||
| 2191 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
| 2192 | e = decode_energy(e_index, 3); | ||
| 2193 | } | ||
| 2194 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) { | ||
| 2195 | nbit = 1 + 5; | ||
| 2196 | e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); | ||
| 2197 | e = decode_energy(e_index, 3); | ||
| 2198 | } | ||
| 2199 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | ||
| 2200 | e = codec2_energy_700c(c2, bits); | ||
| 2201 | } | ||
| 2202 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { | ||
| 2203 | e = codec2_energy_450(c2, bits); | ||
| 2204 | } | ||
| 2205 | |||
| 2206 | return e; | ||
| 2207 | } | ||
| 2208 | |||
| 2209 | |||
| 2210 | /*---------------------------------------------------------------------------*\ | ||
| 2211 | |||
| 2212 | FUNCTION....: codec2_encode_450 | ||
| 2213 | AUTHOR......: Thomas Kurin and Stefan Erhardt | ||
| 2214 | INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg | ||
| 2215 | DATE CREATED: July 2018 | ||
| 2216 | |||
| 2217 | 450 bit/s codec that uses newamp2 fixed rate VQ of amplitudes. | ||
| 2218 | |||
| 2219 | Encodes 320 speech samples (40ms of speech) into 28 bits. | ||
| 2220 | |||
| 2221 | The codec2 algorithm actually operates internally on 10ms (80 | ||
| 2222 | sample) frames, so we run the encoding algorithm four times: | ||
| 2223 | |||
| 2224 | frame 0: nothing | ||
| 2225 | frame 1: nothing | ||
| 2226 | frame 2: nothing | ||
| 2227 | frame 3: 9 bit 1 stage VQ, 3 bits energy, | ||
| 2228 | 6 bit scalar Wo/voicing/plosive. No spare bits. | ||
| 2229 | |||
| 2230 | If a plosive is detected the frame at the energy-step is encoded. | ||
| 2231 | |||
| 2232 | Voicing is encoded using the 000000 index of the Wo quantiser. | ||
| 2233 | Plosive is encoded using the 111111 index of the Wo quantiser. | ||
| 2234 | |||
| 2235 | The bit allocation is: | ||
| 2236 | |||
| 2237 | Parameter frames 1-3 frame 4 Total | ||
| 2238 | ----------------------------------------------------------- | ||
| 2239 | Harmonic magnitudes (rate k VQ) 0 9 9 | ||
| 2240 | Energy 0 3 3 | ||
| 2241 | log Wo/voicing/plosive 0 6 6 | ||
| 2242 | TOTAL 0 18 18 | ||
| 2243 | |||
| 2244 | |||
| 2245 | \*---------------------------------------------------------------------------*/ | ||
| 2246 | |||
| 2247 | void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]) | ||
| 2248 | { | ||
| 2249 | MODEL model; | ||
| 2250 | int indexes[4], i,h, M=4; | ||
| 2251 | unsigned int nbit = 0; | ||
| 2252 | int plosiv = 0; | ||
| 2253 | float energydelta[M]; | ||
| 2254 | int spectralCounter; | ||
| 2255 | |||
| 2256 | assert(c2 != NULL); | ||
| 2257 | |||
| 2258 | memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); | ||
| 2259 | for(i=0; i<M; i++){ | ||
| 2260 | analyse_one_frame(c2, &model, &speech[i*c2->n_samp]); | ||
| 2261 | energydelta[i] = 0; | ||
| 2262 | spectralCounter = 0; | ||
| 2263 | for(h = 0;h<(model.L);h++){ | ||
| 2264 | //only detect above 300 Hz | ||
| 2265 | if(h*model.Wo*(c2->c2const.Fs/2000.0)/M_PI > 0.3){ | ||
| 2266 | energydelta[i] = energydelta[i] + 20.0*log10(model.A[10]+1E-16); | ||
| 2267 | spectralCounter = spectralCounter+1; | ||
| 2268 | } | ||
| 2269 | |||
| 2270 | } | ||
| 2271 | energydelta[i] = energydelta[i] / spectralCounter ; | ||
| 2272 | } | ||
| 2273 | //Constants for plosive Detection tdB = threshold; minPwr = from below this level plosives have to rise | ||
| 2274 | float tdB = 15; //not fixed can be changed | ||
| 2275 | float minPwr = 15; //not fixed can be changed | ||
| 2276 | if((c2->energy_prev)<minPwr && energydelta[0]>((c2->energy_prev)+tdB)){ | ||
| 2277 | |||
| 2278 | plosiv = 1; | ||
| 2279 | } | ||
| 2280 | if(energydelta[0]<minPwr && energydelta[1]>(energydelta[0]+tdB)){ | ||
| 2281 | |||
| 2282 | plosiv = 2; | ||
| 2283 | } | ||
| 2284 | if(energydelta[1]<minPwr &&energydelta[2]>(energydelta[1]+tdB)){ | ||
| 2285 | |||
| 2286 | plosiv = 3; | ||
| 2287 | } | ||
| 2288 | if(energydelta[2]<minPwr &&energydelta[3]>(energydelta[2]+tdB)){ | ||
| 2289 | |||
| 2290 | plosiv = 4; | ||
| 2291 | } | ||
| 2292 | if(plosiv != 0 && plosiv != 4){ | ||
| 2293 | analyse_one_frame(c2, &model, &speech[(plosiv-1)*c2->n_samp]); | ||
| 2294 | } | ||
| 2295 | |||
| 2296 | c2->energy_prev = energydelta[3]; | ||
| 2297 | |||
| 2298 | |||
| 2299 | int K = 29; | ||
| 2300 | float rate_K_vec[K], mean; | ||
| 2301 | float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; | ||
| 2302 | if(plosiv > 0){ | ||
| 2303 | plosiv = 1; | ||
| 2304 | } | ||
| 2305 | newamp2_model_to_indexes(&c2->c2const, | ||
| 2306 | indexes, | ||
| 2307 | &model, | ||
| 2308 | rate_K_vec, | ||
| 2309 | c2->n2_rate_K_sample_freqs_kHz, | ||
| 2310 | K, | ||
| 2311 | &mean, | ||
| 2312 | rate_K_vec_no_mean, | ||
| 2313 | rate_K_vec_no_mean_, | ||
| 2314 | plosiv); | ||
| 2315 | |||
| 2316 | |||
| 2317 | pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0); | ||
| 2318 | //pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0); | ||
| 2319 | pack_natural_or_gray(bits, &nbit, indexes[2], 3, 0); | ||
| 2320 | pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0); | ||
| 2321 | |||
| 2322 | assert(nbit == (unsigned)codec2_bits_per_frame(c2)); | ||
| 2323 | } | ||
| 2324 | |||
| 2325 | |||
| 2326 | /*---------------------------------------------------------------------------*\ | ||
| 2327 | |||
| 2328 | FUNCTION....: codec2_decode_450 | ||
| 2329 | AUTHOR......: Thomas Kurin and Stefan Erhardt | ||
| 2330 | INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg | ||
| 2331 | DATE CREATED: July 2018 | ||
| 2332 | |||
| 2333 | \*---------------------------------------------------------------------------*/ | ||
| 2334 | |||
| 2335 | void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits) | ||
| 2336 | { | ||
| 2337 | MODEL model[4]; | ||
| 2338 | int indexes[4]; | ||
| 2339 | int i; | ||
| 2340 | unsigned int nbit = 0; | ||
| 2341 | |||
| 2342 | assert(c2 != NULL); | ||
| 2343 | |||
| 2344 | /* unpack bits from channel ------------------------------------*/ | ||
| 2345 | |||
| 2346 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
| 2347 | //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
| 2348 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); | ||
| 2349 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
| 2350 | |||
| 2351 | int M = 4; | ||
| 2352 | COMP HH[M][MAX_AMP+1]; | ||
| 2353 | float interpolated_surface_[M][NEWAMP2_K]; | ||
| 2354 | int pwbFlag = 0; | ||
| 2355 | |||
| 2356 | newamp2_indexes_to_model(&c2->c2const, | ||
| 2357 | model, | ||
| 2358 | (COMP*)HH, | ||
| 2359 | (float*)interpolated_surface_, | ||
| 2360 | c2->n2_prev_rate_K_vec_, | ||
| 2361 | &c2->Wo_left, | ||
| 2362 | &c2->voicing_left, | ||
| 2363 | c2->n2_rate_K_sample_freqs_kHz, | ||
| 2364 | NEWAMP2_K, | ||
| 2365 | c2->phase_fft_fwd_cfg, | ||
| 2366 | c2->phase_fft_inv_cfg, | ||
| 2367 | indexes, | ||
| 2368 | 1.5, | ||
| 2369 | pwbFlag); | ||
| 2370 | |||
| 2371 | |||
| 2372 | for(i=0; i<M; i++) { | ||
| 2373 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); | ||
| 2374 | } | ||
| 2375 | } | ||
| 2376 | |||
| 2377 | /*---------------------------------------------------------------------------*\ | ||
| 2378 | |||
| 2379 | FUNCTION....: codec2_decode_450pwb | ||
| 2380 | AUTHOR......: Thomas Kurin and Stefan Erhardt | ||
| 2381 | INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg | ||
| 2382 | DATE CREATED: July 2018 | ||
| 2383 | |||
| 2384 | Decodes the 450 codec data in pseudo wideband at 16kHz samplerate. | ||
| 2385 | |||
| 2386 | \*---------------------------------------------------------------------------*/ | ||
| 2387 | 1635 | ||
| 2388 | void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits) | 1636 | return e; |
| 2389 | { | ||
| 2390 | MODEL model[4]; | ||
| 2391 | int indexes[4]; | ||
| 2392 | int i; | ||
| 2393 | unsigned int nbit = 0; | ||
| 2394 | |||
| 2395 | assert(c2 != NULL); | ||
| 2396 | |||
| 2397 | /* unpack bits from channel ------------------------------------*/ | ||
| 2398 | |||
| 2399 | indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
| 2400 | //indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); | ||
| 2401 | indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); | ||
| 2402 | indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); | ||
| 2403 | |||
| 2404 | int M = 4; | ||
| 2405 | COMP HH[M][MAX_AMP+1]; | ||
| 2406 | float interpolated_surface_[M][NEWAMP2_16K_K]; | ||
| 2407 | int pwbFlag = 1; | ||
| 2408 | |||
| 2409 | newamp2_indexes_to_model(&c2->c2const, | ||
| 2410 | model, | ||
| 2411 | (COMP*)HH, | ||
| 2412 | (float*)interpolated_surface_, | ||
| 2413 | c2->n2_pwb_prev_rate_K_vec_, | ||
| 2414 | &c2->Wo_left, | ||
| 2415 | &c2->voicing_left, | ||
| 2416 | c2->n2_pwb_rate_K_sample_freqs_kHz, | ||
| 2417 | NEWAMP2_16K_K, | ||
| 2418 | c2->phase_fft_fwd_cfg, | ||
| 2419 | c2->phase_fft_inv_cfg, | ||
| 2420 | indexes, | ||
| 2421 | 1.5, | ||
| 2422 | pwbFlag); | ||
| 2423 | |||
| 2424 | |||
| 2425 | for(i=0; i<M; i++) { | ||
| 2426 | synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5); | ||
| 2427 | } | ||
| 2428 | } | 1637 | } |
| 2429 | 1638 | ||
| 2430 | |||
| 2431 | /*---------------------------------------------------------------------------* \ | 1639 | /*---------------------------------------------------------------------------* \ |
| 2432 | 1640 | ||
| 2433 | FUNCTION....: synthesise_one_frame() | 1641 | FUNCTION....: synthesise_one_frame() |
| @@ -2438,56 +1646,41 @@ void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char | |||
| 2438 | 1646 | ||
| 2439 | \*---------------------------------------------------------------------------*/ | 1647 | \*---------------------------------------------------------------------------*/ |
| 2440 | 1648 | ||
| 2441 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP Aw[], float gain) | 1649 | void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, |
| 2442 | { | 1650 | COMP Aw[], float gain) { |
| 2443 | int i; | 1651 | int i; |
| 2444 | //PROFILE_VAR(phase_start, pf_start, synth_start); | 1652 | |
| 2445 | 1653 | if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { | |
| 2446 | //#ifdef DUMP | 1654 | /* newamp1, we've already worked out rate L phase */ |
| 2447 | //dump_quantised_model(model); | 1655 | COMP *H = Aw; |
| 2448 | //#endif | 1656 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); |
| 2449 | 1657 | } else { | |
| 2450 | //PROFILE_SAMPLE(phase_start); | 1658 | /* LPC based phase synthesis */ |
| 2451 | 1659 | COMP H[MAX_AMP + 1]; | |
| 2452 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode) ) { | 1660 | sample_phase(model, H, Aw); |
| 2453 | /* newamp1/2, we've already worked out rate L phase */ | 1661 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); |
| 2454 | COMP *H = Aw; | 1662 | } |
| 2455 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); | 1663 | |
| 2456 | } else { | 1664 | postfilter(model, &c2->bg_est); |
| 2457 | /* LPC based phase synthesis */ | 1665 | synthesise(c2->n_samp, c2->fftr_inv_cfg, c2->Sn_, model, c2->Pn, 1); |
| 2458 | COMP H[MAX_AMP+1]; | 1666 | |
| 2459 | sample_phase(model, H, Aw); | 1667 | for (i = 0; i < c2->n_samp; i++) { |
| 2460 | phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); | 1668 | c2->Sn_[i] *= gain; |
| 2461 | } | 1669 | } |
| 2462 | 1670 | ||
| 2463 | //PROFILE_SAMPLE_AND_LOG(pf_start, phase_start, " phase_synth"); | 1671 | ear_protection(c2->Sn_, c2->n_samp); |
| 2464 | 1672 | ||
| 2465 | postfilter(model, &c2->bg_est); | 1673 | for (i = 0; i < c2->n_samp; i++) { |
| 2466 | 1674 | if (c2->Sn_[i] > 32767.0) | |
| 2467 | //PROFILE_SAMPLE_AND_LOG(synth_start, pf_start, " postfilter"); | 1675 | speech[i] = 32767; |
| 2468 | 1676 | else if (c2->Sn_[i] < -32767.0) | |
| 2469 | synthesise(c2->n_samp, c2->fftr_inv_cfg, c2->Sn_, model, c2->Pn, 1); | 1677 | speech[i] = -32767; |
| 2470 | 1678 | else | |
| 2471 | for(i=0; i<c2->n_samp; i++) { | 1679 | speech[i] = c2->Sn_[i]; |
| 2472 | c2->Sn_[i] *= gain; | 1680 | } |
| 2473 | } | ||
| 2474 | |||
| 2475 | //PROFILE_SAMPLE_AND_LOG2(synth_start, " synth"); | ||
| 2476 | |||
| 2477 | ear_protection(c2->Sn_, c2->n_samp); | ||
| 2478 | |||
| 2479 | for(i=0; i<c2->n_samp; i++) { | ||
| 2480 | if (c2->Sn_[i] > 32767.0) | ||
| 2481 | speech[i] = 32767; | ||
| 2482 | else if (c2->Sn_[i] < -32767.0) | ||
| 2483 | speech[i] = -32767; | ||
| 2484 | else | ||
| 2485 | speech[i] = c2->Sn_[i]; | ||
| 2486 | } | ||
| 2487 | |||
| 2488 | } | 1681 | } |
| 2489 | 1682 | ||
| 2490 | /*---------------------------------------------------------------------------*\ | 1683 | /*---------------------------------------------------------------------------* \ |
| 2491 | 1684 | ||
| 2492 | FUNCTION....: analyse_one_frame() | 1685 | FUNCTION....: analyse_one_frame() |
| 2493 | AUTHOR......: David Rowe | 1686 | AUTHOR......: David Rowe |
| @@ -2498,48 +1691,40 @@ void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP | |||
| 2498 | 1691 | ||
| 2499 | \*---------------------------------------------------------------------------*/ | 1692 | \*---------------------------------------------------------------------------*/ |
| 2500 | 1693 | ||
| 2501 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) | 1694 | void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) { |
| 2502 | { | 1695 | COMP Sw[FFT_ENC]; |
| 2503 | COMP Sw[FFT_ENC]; | 1696 | float pitch; |
| 2504 | float pitch; | 1697 | int i; |
| 2505 | int i; | 1698 | int n_samp = c2->n_samp; |
| 2506 | //PROFILE_VAR(dft_start, nlp_start, model_start, two_stage, estamps); | 1699 | int m_pitch = c2->m_pitch; |
| 2507 | int n_samp = c2->n_samp; | ||
| 2508 | int m_pitch = c2->m_pitch; | ||
| 2509 | 1700 | ||
| 2510 | /* Read input speech */ | 1701 | /* Read input speech */ |
| 2511 | 1702 | ||
| 2512 | for(i=0; i<m_pitch-n_samp; i++) | 1703 | for (i = 0; i < m_pitch - n_samp; i++) c2->Sn[i] = c2->Sn[i + n_samp]; |
| 2513 | c2->Sn[i] = c2->Sn[i+n_samp]; | 1704 | for (i = 0; i < n_samp; i++) c2->Sn[i + m_pitch - n_samp] = speech[i]; |
| 2514 | for(i=0; i<n_samp; i++) | ||
| 2515 | c2->Sn[i+m_pitch-n_samp] = speech[i]; | ||
| 2516 | 1705 | ||
| 2517 | //PROFILE_SAMPLE(dft_start); | 1706 | dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w); |
| 2518 | dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w); | ||
| 2519 | //PROFILE_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech"); | ||
| 2520 | 1707 | ||
| 2521 | /* Estimate pitch */ | 1708 | /* Estimate pitch */ |
| 1709 | nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc); | ||
| 1710 | model->Wo = TWO_PI / pitch; | ||
| 1711 | model->L = PI / model->Wo; | ||
| 2522 | 1712 | ||
| 2523 | nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc); | 1713 | /* estimate model parameters */ |
| 2524 | //PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, " nlp"); | 1714 | two_stage_pitch_refinement(&c2->c2const, model, Sw); |
| 2525 | 1715 | ||
| 2526 | model->Wo = TWO_PI/pitch; | 1716 | /* estimate phases when doing ML experiments */ |
| 2527 | model->L = PI/model->Wo; | 1717 | if (c2->fmlfeat != NULL) |
| 2528 | 1718 | estimate_amplitudes(model, Sw, c2->W, 1); | |
| 2529 | /* estimate model parameters */ | 1719 | else |
| 2530 | |||
| 2531 | two_stage_pitch_refinement(&c2->c2const, model, Sw); | ||
| 2532 | //PROFILE_SAMPLE_AND_LOG(two_stage, model_start, " two_stage"); | ||
| 2533 | estimate_amplitudes(model, Sw, c2->W, 0); | 1720 | estimate_amplitudes(model, Sw, c2->W, 0); |
| 2534 | //PROFILE_SAMPLE_AND_LOG(estamps, two_stage, " est_amps"); | 1721 | est_voicing_mbe(&c2->c2const, model, Sw, c2->W); |
| 2535 | est_voicing_mbe(&c2->c2const, model, Sw, c2->W); | 1722 | #ifdef DUMP |
| 2536 | //PROFILE_SAMPLE_AND_LOG2(estamps, " est_voicing"); | 1723 | dump_model(model); |
| 2537 | #ifdef DUMP | 1724 | #endif |
| 2538 | dump_model(model); | ||
| 2539 | #endif | ||
| 2540 | } | 1725 | } |
| 2541 | 1726 | ||
| 2542 | /*---------------------------------------------------------------------------*\ | 1727 | /*---------------------------------------------------------------------------* \ |
| 2543 | 1728 | ||
| 2544 | FUNCTION....: ear_protection() | 1729 | FUNCTION....: ear_protection() |
| 2545 | AUTHOR......: David Rowe | 1730 | AUTHOR......: David Rowe |
| @@ -2552,40 +1737,37 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) | |||
| 2552 | \*---------------------------------------------------------------------------*/ | 1737 | \*---------------------------------------------------------------------------*/ |
| 2553 | 1738 | ||
| 2554 | static void ear_protection(float in_out[], int n) { | 1739 | static void ear_protection(float in_out[], int n) { |
| 2555 | float max_sample, over, gain; | 1740 | float max_sample, over, gain; |
| 2556 | int i; | 1741 | int i; |
| 2557 | 1742 | ||
| 2558 | /* find maximum sample in frame */ | 1743 | /* find maximum sample in frame */ |
| 2559 | 1744 | ||
| 2560 | max_sample = 0.0; | 1745 | max_sample = 0.0; |
| 2561 | for(i=0; i<n; i++) | 1746 | for (i = 0; i < n; i++) |
| 2562 | if (in_out[i] > max_sample) | 1747 | if (in_out[i] > max_sample) max_sample = in_out[i]; |
| 2563 | max_sample = in_out[i]; | ||
| 2564 | 1748 | ||
| 2565 | /* determine how far above set point */ | 1749 | /* determine how far above set point */ |
| 2566 | 1750 | ||
| 2567 | over = max_sample/30000.0; | 1751 | over = max_sample / 30000.0; |
| 2568 | 1752 | ||
| 2569 | /* If we are x dB over set point we reduce level by 2x dB, this | 1753 | /* If we are x dB over set point we reduce level by 2x dB, this |
| 2570 | attenuates major excursions in amplitude (likely to be caused | 1754 | attenuates major excursions in amplitude (likely to be caused |
| 2571 | by bit errors) more than smaller ones */ | 1755 | by bit errors) more than smaller ones */ |
| 2572 | 1756 | ||
| 2573 | if (over > 1.0) { | 1757 | if (over > 1.0) { |
| 2574 | gain = 1.0/(over*over); | 1758 | gain = 1.0 / (over * over); |
| 2575 | //fprintf(stderr, "gain: %f\n", gain); | 1759 | for (i = 0; i < n; i++) in_out[i] *= gain; |
| 2576 | for(i=0; i<n; i++) | 1760 | } |
| 2577 | in_out[i] *= gain; | ||
| 2578 | } | ||
| 2579 | } | 1761 | } |
| 2580 | 1762 | ||
| 2581 | void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, float beta, float gamma) | 1763 | void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, |
| 2582 | { | 1764 | float beta, float gamma) { |
| 2583 | assert((beta >= 0.0) && (beta <= 1.0)); | 1765 | assert((beta >= 0.0) && (beta <= 1.0)); |
| 2584 | assert((gamma >= 0.0) && (gamma <= 1.0)); | 1766 | assert((gamma >= 0.0) && (gamma <= 1.0)); |
| 2585 | c2->lpc_pf = enable; | 1767 | c2->lpc_pf = enable; |
| 2586 | c2->bass_boost = bass_boost; | 1768 | c2->bass_boost = bass_boost; |
| 2587 | c2->beta = beta; | 1769 | c2->beta = beta; |
| 2588 | c2->gamma = gamma; | 1770 | c2->gamma = gamma; |
| 2589 | } | 1771 | } |
| 2590 | 1772 | ||
| 2591 | /* | 1773 | /* |
| @@ -2594,29 +1776,22 @@ void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, f | |||
| 2594 | Experimental method of sending voice/data frames for FreeDV. | 1776 | Experimental method of sending voice/data frames for FreeDV. |
| 2595 | */ | 1777 | */ |
| 2596 | 1778 | ||
| 2597 | int codec2_get_spare_bit_index(struct CODEC2 *c2) | 1779 | int codec2_get_spare_bit_index(struct CODEC2 *c2) { |
| 2598 | { | 1780 | assert(c2 != NULL); |
| 2599 | assert(c2 != NULL); | ||
| 2600 | 1781 | ||
| 2601 | switch(c2->mode) { | 1782 | switch (c2->mode) { |
| 2602 | case CODEC2_MODE_1300: | 1783 | case CODEC2_MODE_1300: |
| 2603 | return 2; // bit 2 (3th bit) is v2 (third voicing bit) | 1784 | return 2; // bit 2 (3th bit) is v2 (third voicing bit) |
| 2604 | break; | 1785 | break; |
| 2605 | case CODEC2_MODE_1400: | 1786 | case CODEC2_MODE_1400: |
| 2606 | return 10; // bit 10 (11th bit) is v2 (third voicing bit) | 1787 | return 10; // bit 10 (11th bit) is v2 (third voicing bit) |
| 2607 | break; | 1788 | break; |
| 2608 | case CODEC2_MODE_1600: | 1789 | case CODEC2_MODE_1600: |
| 2609 | return 15; // bit 15 (16th bit) is v2 (third voicing bit) | 1790 | return 15; // bit 15 (16th bit) is v2 (third voicing bit) |
| 2610 | break; | 1791 | break; |
| 2611 | case CODEC2_MODE_700: | 1792 | } |
| 2612 | return 26; // bits 26 and 27 are spare | ||
| 2613 | break; | ||
| 2614 | case CODEC2_MODE_700B: | ||
| 2615 | return 27; // bit 27 is spare | ||
| 2616 | break; | ||
| 2617 | } | ||
| 2618 | 1793 | ||
| 2619 | return -1; | 1794 | return -1; |
| 2620 | } | 1795 | } |
| 2621 | 1796 | ||
| 2622 | /* | 1797 | /* |
| @@ -2624,111 +1799,123 @@ int codec2_get_spare_bit_index(struct CODEC2 *c2) | |||
| 2624 | for convenience. | 1799 | for convenience. |
| 2625 | */ | 1800 | */ |
| 2626 | 1801 | ||
| 2627 | int codec2_rebuild_spare_bit(struct CODEC2 *c2, int unpacked_bits[]) | 1802 | int codec2_rebuild_spare_bit(struct CODEC2 *c2, char unpacked_bits[]) { |
| 2628 | { | 1803 | int v1, v3; |
| 2629 | int v1,v3; | ||
| 2630 | 1804 | ||
| 2631 | assert(c2 != NULL); | 1805 | assert(c2 != NULL); |
| 2632 | 1806 | ||
| 2633 | v1 = unpacked_bits[1]; | 1807 | v1 = unpacked_bits[1]; |
| 2634 | 1808 | ||
| 2635 | switch(c2->mode) { | 1809 | switch (c2->mode) { |
| 2636 | case CODEC2_MODE_1300: | 1810 | case CODEC2_MODE_1300: |
| 2637 | 1811 | ||
| 2638 | v3 = unpacked_bits[1+1+1]; | 1812 | v3 = unpacked_bits[1 + 1 + 1]; |
| 2639 | 1813 | ||
| 2640 | /* if either adjacent frame is voiced, make this one voiced */ | 1814 | /* if either adjacent frame is voiced, make this one voiced */ |
| 2641 | 1815 | ||
| 2642 | unpacked_bits[2] = (v1 || v3); | 1816 | unpacked_bits[2] = (v1 || v3); |
| 2643 | 1817 | ||
| 2644 | return 0; | 1818 | return 0; |
| 2645 | 1819 | ||
| 2646 | break; | 1820 | break; |
| 2647 | 1821 | ||
| 2648 | case CODEC2_MODE_1400: | 1822 | case CODEC2_MODE_1400: |
| 2649 | 1823 | ||
| 2650 | v3 = unpacked_bits[1+1+8+1]; | 1824 | v3 = unpacked_bits[1 + 1 + 8 + 1]; |
| 2651 | 1825 | ||
| 2652 | /* if either adjacent frame is voiced, make this one voiced */ | 1826 | /* if either adjacent frame is voiced, make this one voiced */ |
| 2653 | 1827 | ||
| 2654 | unpacked_bits[10] = (v1 || v3); | 1828 | unpacked_bits[10] = (v1 || v3); |
| 2655 | 1829 | ||
| 2656 | return 0; | 1830 | return 0; |
| 2657 | 1831 | ||
| 2658 | break; | 1832 | break; |
| 2659 | 1833 | ||
| 2660 | case CODEC2_MODE_1600: | 1834 | case CODEC2_MODE_1600: |
| 2661 | v3 = unpacked_bits[1+1+8+5+1]; | 1835 | v3 = unpacked_bits[1 + 1 + 8 + 5 + 1]; |
| 2662 | 1836 | ||
| 2663 | /* if either adjacent frame is voiced, make this one voiced */ | 1837 | /* if either adjacent frame is voiced, make this one voiced */ |
| 2664 | 1838 | ||
| 2665 | unpacked_bits[15] = (v1 || v3); | 1839 | unpacked_bits[15] = (v1 || v3); |
| 2666 | 1840 | ||
| 2667 | return 0; | 1841 | return 0; |
| 2668 | 1842 | ||
| 2669 | break; | 1843 | break; |
| 2670 | } | 1844 | } |
| 2671 | 1845 | ||
| 2672 | return -1; | 1846 | return -1; |
| 2673 | } | 1847 | } |
| 2674 | 1848 | ||
| 2675 | void codec2_set_natural_or_gray(struct CODEC2 *c2, int gray) | 1849 | void codec2_set_natural_or_gray(struct CODEC2 *c2, int gray) { |
| 2676 | { | 1850 | assert(c2 != NULL); |
| 2677 | assert(c2 != NULL); | 1851 | c2->gray = gray; |
| 2678 | c2->gray = gray; | ||
| 2679 | } | 1852 | } |
| 2680 | 1853 | ||
| 2681 | void codec2_set_softdec(struct CODEC2 *c2, float *softdec) | 1854 | void codec2_set_softdec(struct CODEC2 *c2, float *softdec) { |
| 2682 | { | 1855 | assert(c2 != NULL); |
| 2683 | assert(c2 != NULL); | 1856 | c2->softdec = softdec; |
| 2684 | c2->softdec = softdec; | ||
| 2685 | } | 1857 | } |
| 2686 | 1858 | ||
| 2687 | void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *filename) { | 1859 | void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *feat_fn, |
| 2688 | if ((codec2_state->fmlfeat = fopen(filename, "wb")) == NULL) { | 1860 | char *model_fn) { |
| 2689 | fprintf(stderr, "error opening machine learning feature file: %s\n", filename); | 1861 | if ((codec2_state->fmlfeat = fopen(feat_fn, "wb")) == NULL) { |
| 2690 | exit(1); | 1862 | fprintf(stderr, "error opening machine learning feature file: %s\n", |
| 2691 | } | 1863 | feat_fn); |
| 1864 | exit(1); | ||
| 1865 | } | ||
| 1866 | if (model_fn) { | ||
| 1867 | if ((codec2_state->fmlmodel = fopen(model_fn, "wb")) == NULL) { | ||
| 1868 | fprintf(stderr, "error opening machine learning Codec 2 model file: %s\n", | ||
| 1869 | feat_fn); | ||
| 1870 | exit(1); | ||
| 1871 | } | ||
| 1872 | } | ||
| 2692 | } | 1873 | } |
| 2693 | 1874 | ||
| 2694 | #ifndef __EMBEDDED__ | 1875 | #ifndef __EMBEDDED__ |
| 2695 | void codec2_load_codebook(struct CODEC2 *codec2_state, int num, char *filename) { | 1876 | void codec2_load_codebook(struct CODEC2 *codec2_state, int num, |
| 2696 | FILE *f; | 1877 | char *filename) { |
| 2697 | 1878 | FILE *f; | |
| 2698 | if ((f = fopen(filename, "rb")) == NULL) { | 1879 | |
| 2699 | fprintf(stderr, "error opening codebook file: %s\n", filename); | 1880 | if ((f = fopen(filename, "rb")) == NULL) { |
| 2700 | exit(1); | 1881 | fprintf(stderr, "error opening codebook file: %s\n", filename); |
| 2701 | } | 1882 | exit(1); |
| 2702 | //fprintf(stderr, "reading newamp1vq_cb[%d] k=%d m=%d\n", num, newamp1vq_cb[num].k, newamp1vq_cb[num].m); | 1883 | } |
| 2703 | float tmp[newamp1vq_cb[num].k*newamp1vq_cb[num].m]; | 1884 | // fprintf(stderr, "reading newamp1vq_cb[%d] k=%d m=%d\n", num, |
| 2704 | int nread = fread(tmp, sizeof(float), newamp1vq_cb[num].k*newamp1vq_cb[num].m, f); | 1885 | // newamp1vq_cb[num].k, newamp1vq_cb[num].m); |
| 2705 | float *p = (float*)newamp1vq_cb[num].cb; | 1886 | float tmp[newamp1vq_cb[num].k * newamp1vq_cb[num].m]; |
| 2706 | for(int i=0; i<newamp1vq_cb[num].k*newamp1vq_cb[num].m; i++) | 1887 | int nread = |
| 2707 | p[i] = tmp[i]; | 1888 | fread(tmp, sizeof(float), newamp1vq_cb[num].k * newamp1vq_cb[num].m, f); |
| 2708 | // fprintf(stderr, "nread = %d %f %f\n", nread, newamp1vq_cb[num].cb[0], newamp1vq_cb[num].cb[1]); | 1889 | float *p = (float *)newamp1vq_cb[num].cb; |
| 2709 | assert(nread == newamp1vq_cb[num].k*newamp1vq_cb[num].m); | 1890 | for (int i = 0; i < newamp1vq_cb[num].k * newamp1vq_cb[num].m; i++) |
| 2710 | fclose(f); | 1891 | p[i] = tmp[i]; |
| 1892 | // fprintf(stderr, "nread = %d %f %f\n", nread, newamp1vq_cb[num].cb[0], | ||
| 1893 | // newamp1vq_cb[num].cb[1]); | ||
| 1894 | assert(nread == newamp1vq_cb[num].k * newamp1vq_cb[num].m); | ||
| 1895 | fclose(f); | ||
| 2711 | } | 1896 | } |
| 2712 | #endif | 1897 | #endif |
| 2713 | 1898 | ||
| 2714 | float codec2_get_var(struct CODEC2 *codec2_state) { | 1899 | float codec2_get_var(struct CODEC2 *codec2_state) { |
| 2715 | if (codec2_state->nse) | 1900 | if (codec2_state->nse) |
| 2716 | return codec2_state->se/codec2_state->nse; | 1901 | return codec2_state->se / codec2_state->nse; |
| 2717 | else | 1902 | else |
| 2718 | return 0; | 1903 | return 0; |
| 2719 | } | 1904 | } |
| 2720 | 1905 | ||
| 2721 | float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K) { | 1906 | float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K) { |
| 2722 | codec2_state->user_rate_K_vec_no_mean_ = (float*)malloc(sizeof(float)*NEWAMP1_K); | 1907 | codec2_state->user_rate_K_vec_no_mean_ = |
| 2723 | *K = NEWAMP1_K; | 1908 | (float *)malloc(sizeof(float) * NEWAMP1_K); |
| 2724 | return codec2_state->user_rate_K_vec_no_mean_; | 1909 | *K = NEWAMP1_K; |
| 1910 | return codec2_state->user_rate_K_vec_no_mean_; | ||
| 2725 | } | 1911 | } |
| 2726 | 1912 | ||
| 2727 | void codec2_700c_post_filter(struct CODEC2 *codec2_state, int en) { | 1913 | void codec2_700c_post_filter(struct CODEC2 *codec2_state, bool en) { |
| 2728 | codec2_state->post_filter_en = en; | 1914 | codec2_state->post_filter_en = en; |
| 2729 | } | 1915 | } |
| 2730 | 1916 | ||
| 2731 | void codec2_700c_eq(struct CODEC2 *codec2_state, int en) { | 1917 | void codec2_700c_eq(struct CODEC2 *codec2_state, bool en) { |
| 2732 | codec2_state->eq_en = en; | 1918 | codec2_state->eq_en = en; |
| 2733 | codec2_state->se = 0.0; codec2_state->nse = 0; | 1919 | codec2_state->se = 0.0; |
| 1920 | codec2_state->nse = 0; | ||
| 2734 | } | 1921 | } |
