diff options
author | Dirk Engling <erdgeist@bauklotz.fritz.box> | 2020-10-15 16:27:49 +0200 |
---|---|---|
committer | Dirk Engling <erdgeist@bauklotz.fritz.box> | 2020-10-15 16:27:49 +0200 |
commit | 9022d768021bbe15c7815cc6f8b64218b46f0e10 (patch) | |
tree | e60ed1ea72a21b3873684a5afae67b26a798f927 | |
parent | f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (diff) |
-rw-r--r-- | codec2.c | 43 | ||||
-rw-r--r-- | codec2.h | 5 | ||||
-rw-r--r-- | codec2_internal.h | 4 | ||||
-rw-r--r-- | newamp1.c | 27 | ||||
-rw-r--r-- | newamp1.h | 4 | ||||
-rw-r--r-- | nlp.c | 183 | ||||
-rw-r--r-- | nlp.h | 2 | ||||
-rw-r--r-- | sine.c | 49 | ||||
-rw-r--r-- | sine.h | 6 | ||||
-rwxr-xr-x | stripdown.sh | 4 | ||||
-rw-r--r-- | version.h | 4 |
11 files changed, 85 insertions, 246 deletions
@@ -29,6 +29,7 @@ | |||
29 | #include <assert.h> | 29 | #include <assert.h> |
30 | #include <stdio.h> | 30 | #include <stdio.h> |
31 | #include <stdlib.h> | 31 | #include <stdlib.h> |
32 | #include <stdbool.h> | ||
32 | #include <string.h> | 33 | #include <string.h> |
33 | #include <math.h> | 34 | #include <math.h> |
34 | 35 | ||
@@ -114,12 +115,25 @@ struct CODEC2 * codec2_create(int mode) | |||
114 | struct CODEC2 *c2; | 115 | struct CODEC2 *c2; |
115 | int i,l; | 116 | int i,l; |
116 | 117 | ||
117 | #ifndef CORTEX_M4 | 118 | // ALL POSSIBLE MODES MUST BE CHECKED HERE! |
118 | if (( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode)) || | 119 | // we test if the desired mode is enabled at compile time |
119 | ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode)) ) { | 120 | // and return NULL if not |
121 | |||
122 | if (false == ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, mode) | ||
123 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, mode) | ||
124 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, mode) | ||
125 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, mode) | ||
126 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, mode) | ||
127 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, mode) | ||
128 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_700, mode) | ||
129 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode) | ||
130 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode) | ||
131 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode) | ||
132 | || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) | ||
133 | ) ) | ||
134 | { | ||
120 | return NULL; | 135 | return NULL; |
121 | } | 136 | } |
122 | #endif | ||
123 | 137 | ||
124 | c2 = (struct CODEC2*)MALLOC(sizeof(struct CODEC2)); | 138 | c2 = (struct CODEC2*)MALLOC(sizeof(struct CODEC2)); |
125 | if (c2 == NULL) | 139 | if (c2 == NULL) |
@@ -221,14 +235,15 @@ struct CODEC2 * codec2_create(int mode) | |||
221 | int k; | 235 | int k; |
222 | for(k=0; k<NEWAMP1_K; k++) { | 236 | for(k=0; k<NEWAMP1_K; k++) { |
223 | c2->prev_rate_K_vec_[k] = 0.0; | 237 | c2->prev_rate_K_vec_[k] = 0.0; |
238 | c2->eq[k] = 0.0; | ||
224 | } | 239 | } |
240 | c2->eq_en = 0; | ||
225 | c2->Wo_left = 0.0; | 241 | c2->Wo_left = 0.0; |
226 | c2->voicing_left = 0;; | 242 | c2->voicing_left = 0;; |
227 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); | 243 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); |
228 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); | 244 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); |
229 | } | 245 | } |
230 | 246 | ||
231 | #ifndef CORTEX_M4 | ||
232 | /* newamp2 initialisation */ | 247 | /* newamp2 initialisation */ |
233 | 248 | ||
234 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { | 249 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { |
@@ -255,7 +270,6 @@ struct CODEC2 * codec2_create(int mode) | |||
255 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); | 270 | c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); |
256 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); | 271 | c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); |
257 | } | 272 | } |
258 | #endif | ||
259 | 273 | ||
260 | c2->fmlfeat = NULL; | 274 | c2->fmlfeat = NULL; |
261 | 275 | ||
@@ -302,7 +316,6 @@ struct CODEC2 * codec2_create(int mode) | |||
302 | c2->decode = codec2_decode_1200; | 316 | c2->decode = codec2_decode_1200; |
303 | } | 317 | } |
304 | 318 | ||
305 | #ifndef CORTEX_M4 | ||
306 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) | 319 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) |
307 | { | 320 | { |
308 | c2->encode = codec2_encode_700; | 321 | c2->encode = codec2_encode_700; |
@@ -314,13 +327,13 @@ struct CODEC2 * codec2_create(int mode) | |||
314 | c2->encode = codec2_encode_700b; | 327 | c2->encode = codec2_encode_700b; |
315 | c2->decode = codec2_decode_700b; | 328 | c2->decode = codec2_decode_700b; |
316 | } | 329 | } |
317 | #endif | 330 | |
318 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) | 331 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) |
319 | { | 332 | { |
320 | c2->encode = codec2_encode_700c; | 333 | c2->encode = codec2_encode_700c; |
321 | c2->decode = codec2_decode_700c; | 334 | c2->decode = codec2_decode_700c; |
322 | } | 335 | } |
323 | #ifndef CORTEX_M4 | 336 | |
324 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) | 337 | if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) |
325 | { | 338 | { |
326 | c2->encode = codec2_encode_450; | 339 | c2->encode = codec2_encode_450; |
@@ -334,7 +347,6 @@ struct CODEC2 * codec2_create(int mode) | |||
334 | c2->decode = codec2_decode_450pwb; | 347 | c2->decode = codec2_decode_450pwb; |
335 | } | 348 | } |
336 | 349 | ||
337 | #endif | ||
338 | 350 | ||
339 | return c2; | 351 | return c2; |
340 | } | 352 | } |
@@ -1518,7 +1530,6 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * | |||
1518 | } | 1530 | } |
1519 | 1531 | ||
1520 | 1532 | ||
1521 | #ifndef CORTEX_M4 | ||
1522 | /*---------------------------------------------------------------------------*\ | 1533 | /*---------------------------------------------------------------------------*\ |
1523 | 1534 | ||
1524 | FUNCTION....: codec2_encode_700 | 1535 | FUNCTION....: codec2_encode_700 |
@@ -1926,7 +1937,6 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * | |||
1926 | for(i=0; i<LPC_ORD_LOW; i++) | 1937 | for(i=0; i<LPC_ORD_LOW; i++) |
1927 | c2->prev_lsps_dec[i] = lsps[3][i]; | 1938 | c2->prev_lsps_dec[i] = lsps[3][i]; |
1928 | } | 1939 | } |
1929 | #endif | ||
1930 | 1940 | ||
1931 | 1941 | ||
1932 | /*---------------------------------------------------------------------------*\ | 1942 | /*---------------------------------------------------------------------------*\ |
@@ -1987,7 +1997,7 @@ void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]) | |||
1987 | K, | 1997 | K, |
1988 | &mean, | 1998 | &mean, |
1989 | rate_K_vec_no_mean, | 1999 | rate_K_vec_no_mean, |
1990 | rate_K_vec_no_mean_, &c2->se); | 2000 | rate_K_vec_no_mean_, &c2->se, c2->eq, c2->eq_en); |
1991 | c2->nse += K; | 2001 | c2->nse += K; |
1992 | 2002 | ||
1993 | #ifndef CORTEX_M4 | 2003 | #ifndef CORTEX_M4 |
@@ -2092,7 +2102,6 @@ float codec2_energy_700c(struct CODEC2 *c2, const unsigned char * bits) | |||
2092 | return POW10F(mean/10.0); | 2102 | return POW10F(mean/10.0); |
2093 | } | 2103 | } |
2094 | 2104 | ||
2095 | #ifndef CORTEX_M4 | ||
2096 | float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits) | 2105 | float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits) |
2097 | { | 2106 | { |
2098 | int indexes[4]; | 2107 | int indexes[4]; |
@@ -2418,7 +2427,6 @@ void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char | |||
2418 | } | 2427 | } |
2419 | } | 2428 | } |
2420 | 2429 | ||
2421 | #endif | ||
2422 | 2430 | ||
2423 | /*---------------------------------------------------------------------------* \ | 2431 | /*---------------------------------------------------------------------------* \ |
2424 | 2432 | ||
@@ -2719,3 +2727,8 @@ float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K) { | |||
2719 | void codec2_700c_post_filter(struct CODEC2 *codec2_state, int en) { | 2727 | void codec2_700c_post_filter(struct CODEC2 *codec2_state, int en) { |
2720 | codec2_state->post_filter_en = en; | 2728 | codec2_state->post_filter_en = en; |
2721 | } | 2729 | } |
2730 | |||
2731 | void codec2_700c_eq(struct CODEC2 *codec2_state, int en) { | ||
2732 | codec2_state->eq_en = en; | ||
2733 | codec2_state->se = 0.0; codec2_state->nse = 0; | ||
2734 | } | ||
@@ -111,13 +111,16 @@ int codec2_rebuild_spare_bit(struct CODEC2 *codec2_state, int unpacked_bits[]); | |||
111 | void codec2_set_natural_or_gray(struct CODEC2 *codec2_state, int gray); | 111 | void codec2_set_natural_or_gray(struct CODEC2 *codec2_state, int gray); |
112 | void codec2_set_softdec(struct CODEC2 *c2, float *softdec); | 112 | void codec2_set_softdec(struct CODEC2 *c2, float *softdec); |
113 | float codec2_get_energy(struct CODEC2 *codec2_state, const unsigned char *bits); | 113 | float codec2_get_energy(struct CODEC2 *codec2_state, const unsigned char *bits); |
114 | 114 | ||
115 | // support for ML and VQ experiments | 115 | // support for ML and VQ experiments |
116 | void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *filename); | 116 | void codec2_open_mlfeat(struct CODEC2 *codec2_state, char *filename); |
117 | void codec2_load_codebook(struct CODEC2 *codec2_state, int num, char *filename); | 117 | void codec2_load_codebook(struct CODEC2 *codec2_state, int num, char *filename); |
118 | float codec2_get_var(struct CODEC2 *codec2_state); | 118 | float codec2_get_var(struct CODEC2 *codec2_state); |
119 | float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K); | 119 | float *codec2_enable_user_ratek(struct CODEC2 *codec2_state, int *K); |
120 | |||
121 | // 700C post filter and equaliser | ||
120 | void codec2_700c_post_filter(struct CODEC2 *codec2_state, int en); | 122 | void codec2_700c_post_filter(struct CODEC2 *codec2_state, int en); |
123 | void codec2_700c_eq(struct CODEC2 *codec2_state, int en); | ||
121 | 124 | ||
122 | #endif | 125 | #endif |
123 | 126 | ||
diff --git a/codec2_internal.h b/codec2_internal.h index 498a6c4..b46e358 100644 --- a/codec2_internal.h +++ b/codec2_internal.h | |||
@@ -42,7 +42,7 @@ struct CODEC2 { | |||
42 | codec2_fft_cfg fft_fwd_cfg; /* forward FFT config */ | 42 | codec2_fft_cfg fft_fwd_cfg; /* forward FFT config */ |
43 | codec2_fftr_cfg fftr_fwd_cfg; /* forward real FFT config */ | 43 | codec2_fftr_cfg fftr_fwd_cfg; /* forward real FFT config */ |
44 | float *w; /* [m_pitch] time domain hamming window */ | 44 | float *w; /* [m_pitch] time domain hamming window */ |
45 | COMP W[FFT_ENC]; /* DFT of w[] */ | 45 | float W[FFT_ENC]; /* DFT of w[] */ |
46 | float *Pn; /* [2*n_samp] trapezoidal synthesis window */ | 46 | float *Pn; /* [2*n_samp] trapezoidal synthesis window */ |
47 | float *bpf_buf; /* buffer for band pass filter */ | 47 | float *bpf_buf; /* buffer for band pass filter */ |
48 | float *Sn; /* [m_pitch] input speech */ | 48 | float *Sn; /* [m_pitch] input speech */ |
@@ -82,6 +82,8 @@ struct CODEC2 { | |||
82 | unsigned int nse; /* number of terms in sum */ | 82 | unsigned int nse; /* number of terms in sum */ |
83 | float *user_rate_K_vec_no_mean_; /* optional, user supplied vector for quantisation experiments */ | 83 | float *user_rate_K_vec_no_mean_; /* optional, user supplied vector for quantisation experiments */ |
84 | int post_filter_en; | 84 | int post_filter_en; |
85 | float eq[NEWAMP1_K]; /* optional equaliser */ | ||
86 | int eq_en; | ||
85 | 87 | ||
86 | /*newamp2 states (also uses newamp1 states )*/ | 88 | /*newamp2 states (also uses newamp1 states )*/ |
87 | float energy_prev; | 89 | float energy_prev; |
@@ -415,23 +415,38 @@ void newamp1_model_to_indexes(C2CONST *c2const, | |||
415 | float *mean, | 415 | float *mean, |
416 | float rate_K_vec_no_mean[], | 416 | float rate_K_vec_no_mean[], |
417 | float rate_K_vec_no_mean_[], | 417 | float rate_K_vec_no_mean_[], |
418 | float *se | 418 | float *se, |
419 | float *eq, | ||
420 | int eq_en | ||
419 | ) | 421 | ) |
420 | { | 422 | { |
421 | int k; | 423 | int k; |
422 | 424 | ||
423 | /* convert variable rate L to fixed rate K */ | 425 | /* convert variable rate L to fixed rate K */ |
424 | |||
425 | resample_const_rate_f(c2const, model, rate_K_vec, rate_K_sample_freqs_kHz, K); | 426 | resample_const_rate_f(c2const, model, rate_K_vec, rate_K_sample_freqs_kHz, K); |
426 | 427 | ||
427 | /* remove mean and two stage VQ */ | 428 | /* remove mean */ |
428 | |||
429 | float sum = 0.0; | 429 | float sum = 0.0; |
430 | for(k=0; k<K; k++) | 430 | for(k=0; k<K; k++) |
431 | sum += rate_K_vec[k]; | 431 | sum += rate_K_vec[k]; |
432 | *mean = sum/K;; | 432 | *mean = sum/K; |
433 | for(k=0; k<K; k++) | 433 | for(k=0; k<K; k++) |
434 | rate_K_vec_no_mean[k] = rate_K_vec[k] - *mean; | 434 | rate_K_vec_no_mean[k] = rate_K_vec[k] - *mean; |
435 | |||
436 | /* update and optionally run "front eq" equaliser on before VQ */ | ||
437 | static float ideal[] = {8,10,12,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,-20}; | ||
438 | float gain = 0.02; | ||
439 | float update; | ||
440 | |||
441 | for(k=0; k<K; k++) { | ||
442 | update = rate_K_vec_no_mean[k] - ideal[k]; | ||
443 | eq[k] = (1.0-gain)*eq[k] + gain*update; | ||
444 | if (eq[k] < 0.0) eq[k] = 0.0; | ||
445 | if (eq_en) | ||
446 | rate_K_vec_no_mean[k] -= eq[k]; | ||
447 | } | ||
448 | |||
449 | /* two stage VQ */ | ||
435 | rate_K_mbest_encode(indexes, rate_K_vec_no_mean, rate_K_vec_no_mean_, K, NEWAMP1_VQ_MBEST_DEPTH); | 450 | rate_K_mbest_encode(indexes, rate_K_vec_no_mean, rate_K_vec_no_mean_, K, NEWAMP1_VQ_MBEST_DEPTH); |
436 | 451 | ||
437 | /* running sum of squared error for variance calculation */ | 452 | /* running sum of squared error for variance calculation */ |
@@ -439,7 +454,6 @@ void newamp1_model_to_indexes(C2CONST *c2const, | |||
439 | *se += pow(rate_K_vec_no_mean[k]-rate_K_vec_no_mean_[k],2.0); | 454 | *se += pow(rate_K_vec_no_mean[k]-rate_K_vec_no_mean_[k],2.0); |
440 | 455 | ||
441 | /* scalar quantise mean (effectively the frame energy) */ | 456 | /* scalar quantise mean (effectively the frame energy) */ |
442 | |||
443 | float w[1] = {1.0}; | 457 | float w[1] = {1.0}; |
444 | float se_mean; | 458 | float se_mean; |
445 | indexes[2] = quantise(newamp1_energy_cb[0].cb, | 459 | indexes[2] = quantise(newamp1_energy_cb[0].cb, |
@@ -451,7 +465,6 @@ void newamp1_model_to_indexes(C2CONST *c2const, | |||
451 | 465 | ||
452 | /* scalar quantise Wo. We steal the smallest Wo index to signal | 466 | /* scalar quantise Wo. We steal the smallest Wo index to signal |
453 | an unvoiced frame */ | 467 | an unvoiced frame */ |
454 | |||
455 | if (model->voiced) { | 468 | if (model->voiced) { |
456 | int index = encode_log_Wo(c2const, model->Wo, 6); | 469 | int index = encode_log_Wo(c2const, model->Wo, 6); |
457 | if (index == 0) { | 470 | if (index == 0) { |
@@ -55,7 +55,9 @@ void newamp1_model_to_indexes(C2CONST *c2const, | |||
55 | float *mean, | 55 | float *mean, |
56 | float rate_K_vec_no_mean[], | 56 | float rate_K_vec_no_mean[], |
57 | float rate_K_vec_no_mean_[], | 57 | float rate_K_vec_no_mean_[], |
58 | float *se); | 58 | float *se, |
59 | float *eq, | ||
60 | int eq_en); | ||
59 | void newamp1_indexes_to_rate_K_vec(float rate_K_vec_[], | 61 | void newamp1_indexes_to_rate_K_vec(float rate_K_vec_[], |
60 | float rate_K_vec_no_mean_[], | 62 | float rate_K_vec_no_mean_[], |
61 | float rate_K_sample_freqs_kHz[], | 63 | float rate_K_sample_freqs_kHz[], |
@@ -53,7 +53,6 @@ | |||
53 | #define F0_MAX 500 | 53 | #define F0_MAX 500 |
54 | #define CNLP 0.3 /* post processor constant */ | 54 | #define CNLP 0.3 /* post processor constant */ |
55 | #define NLP_NTAP 48 /* Decimation LPF order */ | 55 | #define NLP_NTAP 48 /* Decimation LPF order */ |
56 | #undef POST_PROCESS_MBE /* choose post processor */ | ||
57 | 56 | ||
58 | /* 8 to 16 kHz sample rate conversion */ | 57 | /* 8 to 16 kHz sample rate conversion */ |
59 | 58 | ||
@@ -132,10 +131,6 @@ typedef struct { | |||
132 | FILE *f; | 131 | FILE *f; |
133 | } NLP; | 132 | } NLP; |
134 | 133 | ||
135 | #ifdef POST_PROCESS_MBE | ||
136 | float test_candidate_mbe(COMP Sw[], COMP W[], float f0); | ||
137 | float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo); | ||
138 | #endif | ||
139 | float post_process_sub_multiples(COMP Fw[], | 134 | float post_process_sub_multiples(COMP Fw[], |
140 | int pmin, int pmax, float gmax, int gmax_bin, | 135 | int pmin, int pmax, float gmax, int gmax_bin, |
141 | float *prev_f0); | 136 | float *prev_f0); |
@@ -258,7 +253,7 @@ float nlp( | |||
258 | int n, /* frames shift (no. new samples in Sn[]) */ | 253 | int n, /* frames shift (no. new samples in Sn[]) */ |
259 | float *pitch, /* estimated pitch period in samples at current Fs */ | 254 | float *pitch, /* estimated pitch period in samples at current Fs */ |
260 | COMP Sw[], /* Freq domain version of Sn[] */ | 255 | COMP Sw[], /* Freq domain version of Sn[] */ |
261 | COMP W[], /* Freq domain window */ | 256 | float W[], /* Freq domain window */ |
262 | float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ | 257 | float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ |
263 | ) | 258 | ) |
264 | { | 259 | { |
@@ -389,11 +384,7 @@ float nlp( | |||
389 | 384 | ||
390 | PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); | 385 | PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); |
391 | 386 | ||
392 | #ifdef POST_PROCESS_MBE | ||
393 | best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_f0); | ||
394 | #else | ||
395 | best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); | 387 | best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); |
396 | #endif | ||
397 | 388 | ||
398 | PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); | 389 | PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); |
399 | 390 | ||
@@ -491,178 +482,6 @@ float post_process_sub_multiples(COMP Fw[], | |||
491 | return best_f0; | 482 | return best_f0; |
492 | } | 483 | } |
493 | 484 | ||
494 | #ifdef POST_PROCESS_MBE | ||
495 | |||
496 | /*---------------------------------------------------------------------------*\ | ||
497 | |||
498 | post_process_mbe() | ||
499 | |||
500 | Use the MBE pitch estimation algorithm to evaluate pitch candidates. This | ||
501 | works OK but the accuracy at low F0 is affected by NW, the analysis window | ||
502 | size used for the DFT of the input speech Sw[]. Also favours high F0 in | ||
503 | the presence of background noise which causes periodic artifacts in the | ||
504 | synthesised speech. | ||
505 | |||
506 | \*---------------------------------------------------------------------------*/ | ||
507 | |||
508 | float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo) | ||
509 | { | ||
510 | float candidate_f0; | ||
511 | float f0,best_f0; /* fundamental frequency */ | ||
512 | float e,e_min; /* MBE cost function */ | ||
513 | int i; | ||
514 | #ifdef DUMP | ||
515 | float e_hz[F0_MAX]; | ||
516 | #endif | ||
517 | #if !defined(NDEBUG) || defined(DUMP) | ||
518 | int bin; | ||
519 | #endif | ||
520 | float f0_min, f0_max; | ||
521 | float f0_start, f0_end; | ||
522 | |||
523 | f0_min = (float)SAMPLE_RATE/pmax; | ||
524 | f0_max = (float)SAMPLE_RATE/pmin; | ||
525 | |||
526 | /* Now look for local maxima. Each local maxima is a candidate | ||
527 | that we test using the MBE pitch estimation algotithm */ | ||
528 | |||
529 | #ifdef DUMP | ||
530 | for(i=0; i<F0_MAX; i++) | ||
531 | e_hz[i] = -1; | ||
532 | #endif | ||
533 | e_min = 1E32; | ||
534 | best_f0 = 50; | ||
535 | for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) { | ||
536 | if ((Fw[i].real > Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) { | ||
537 | |||
538 | /* local maxima found, lets test if it's big enough */ | ||
539 | |||
540 | if (Fw[i].real > T*gmax) { | ||
541 | |||
542 | /* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */ | ||
543 | |||
544 | candidate_f0 = (float)i*SAMPLE_RATE/(PE_FFT_SIZE*DEC); | ||
545 | f0_start = candidate_f0-20; | ||
546 | f0_end = candidate_f0+20; | ||
547 | if (f0_start < f0_min) f0_start = f0_min; | ||
548 | if (f0_end > f0_max) f0_end = f0_max; | ||
549 | |||
550 | for(f0=f0_start; f0<=f0_end; f0+= 2.5) { | ||
551 | e = test_candidate_mbe(Sw, W, f0); | ||
552 | #if !defined(NDEBUG) || defined(DUMP) | ||
553 | bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX)); | ||
554 | #endif | ||
555 | #ifdef DUMP | ||
556 | e_hz[bin] = e; | ||
557 | #endif | ||
558 | if (e < e_min) { | ||
559 | e_min = e; | ||
560 | best_f0 = f0; | ||
561 | } | ||
562 | } | ||
563 | |||
564 | } | ||
565 | } | ||
566 | } | ||
567 | |||
568 | /* finally sample MBE cost function around previous pitch estimate | ||
569 | (form of pitch tracking) */ | ||
570 | |||
571 | candidate_f0 = *prev_Wo * SAMPLE_RATE/TWO_PI; | ||
572 | f0_start = candidate_f0-20; | ||
573 | f0_end = candidate_f0+20; | ||
574 | if (f0_start < f0_min) f0_start = f0_min; | ||
575 | if (f0_end > f0_max) f0_end = f0_max; | ||
576 | |||
577 | for(f0=f0_start; f0<=f0_end; f0+= 2.5) { | ||
578 | e = test_candidate_mbe(Sw, W, f0); | ||
579 | #if !defined(NDEBUG) || defined(DUMP) | ||
580 | bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX)); | ||
581 | #endif | ||
582 | #ifdef DUMP | ||
583 | e_hz[bin] = e; | ||
584 | #endif | ||
585 | if (e < e_min) { | ||
586 | e_min = e; | ||
587 | best_f0 = f0; | ||
588 | } | ||
589 | } | ||
590 | |||
591 | #ifdef DUMP | ||
592 | dump_e(e_hz); | ||
593 | #endif | ||
594 | |||
595 | return best_f0; | ||
596 | } | ||
597 | |||
598 | /*---------------------------------------------------------------------------*\ | ||
599 | |||
600 | test_candidate_mbe() | ||
601 | |||
602 | Returns the error of the MBE cost function for the input f0. | ||
603 | |||
604 | Note: I think a lot of the operations below can be simplified as | ||
605 | W[].imag = 0 and has been normalised such that den always equals 1. | ||
606 | |||
607 | \*---------------------------------------------------------------------------*/ | ||
608 | |||
609 | float test_candidate_mbe( | ||
610 | COMP Sw[], | ||
611 | COMP W[], | ||
612 | float f0 | ||
613 | ) | ||
614 | { | ||
615 | COMP Sw_[FFT_ENC]; /* DFT of all voiced synthesised signal */ | ||
616 | int l,al,bl,m; /* loop variables */ | ||
617 | COMP Am; /* amplitude sample for this band */ | ||
618 | int offset; /* centers Hw[] about current harmonic */ | ||
619 | float den; /* denominator of Am expression */ | ||
620 | float error; /* accumulated error between originl and synthesised */ | ||
621 | float Wo; /* current "test" fundamental freq. */ | ||
622 | int L; | ||
623 | |||
624 | L = floorf((SAMPLE_RATE/2.0)/f0); | ||
625 | Wo = f0*(2*PI/SAMPLE_RATE); | ||
626 | |||
627 | error = 0.0; | ||
628 | |||
629 | /* Just test across the harmonics in the first 1000 Hz (L/4) */ | ||
630 | |||
631 | for(l=1; l<L/4; l++) { | ||
632 | Am.real = 0.0; | ||
633 | Am.imag = 0.0; | ||
634 | den = 0.0; | ||
635 | al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI); | ||
636 | bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI); | ||
637 | |||
638 | /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ | ||
639 | |||
640 | for(m=al; m<bl; m++) { | ||
641 | offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
642 | Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag; | ||
643 | Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag; | ||
644 | den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag; | ||
645 | } | ||
646 | |||
647 | Am.real = Am.real/den; | ||
648 | Am.imag = Am.imag/den; | ||
649 | |||
650 | /* Determine error between estimated harmonic and original */ | ||
651 | |||
652 | for(m=al; m<bl; m++) { | ||
653 | offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
654 | Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag; | ||
655 | Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real; | ||
656 | error += (Sw[m].real - Sw_[m].real)*(Sw[m].real - Sw_[m].real); | ||
657 | error += (Sw[m].imag - Sw_[m].imag)*(Sw[m].imag - Sw_[m].imag); | ||
658 | } | ||
659 | } | ||
660 | |||
661 | return error; | ||
662 | } | ||
663 | |||
664 | #endif | ||
665 | |||
666 | /*---------------------------------------------------------------------------*\ | 485 | /*---------------------------------------------------------------------------*\ |
667 | 486 | ||
668 | FUNCTION....: fdmdv_16_to_8() | 487 | FUNCTION....: fdmdv_16_to_8() |
@@ -33,6 +33,6 @@ | |||
33 | void *nlp_create(C2CONST *c2const); | 33 | void *nlp_create(C2CONST *c2const); |
34 | void nlp_destroy(void *nlp_state); | 34 | void nlp_destroy(void *nlp_state); |
35 | float nlp(void *nlp_state, float Sn[], int n, | 35 | float nlp(void *nlp_state, float Sn[], int n, |
36 | float *pitch_samples, COMP Sw[], COMP W[], float *prev_f0); | 36 | float *pitch_samples, COMP Sw[], float W[], float *prev_f0); |
37 | 37 | ||
38 | #endif | 38 | #endif |
@@ -97,11 +97,10 @@ C2CONST c2const_create(int Fs, float framelength_s) { | |||
97 | 97 | ||
98 | \*---------------------------------------------------------------------------*/ | 98 | \*---------------------------------------------------------------------------*/ |
99 | 99 | ||
100 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], COMP W[]) | 100 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], float W[]) |
101 | { | 101 | { |
102 | float m; | 102 | float m; |
103 | COMP wshift[FFT_ENC]; | 103 | COMP wshift[FFT_ENC]; |
104 | COMP temp; | ||
105 | int i,j; | 104 | int i,j; |
106 | int m_pitch = c2const->m_pitch; | 105 | int m_pitch = c2const->m_pitch; |
107 | int nw = c2const->nw; | 106 | int nw = c2const->nw; |
@@ -156,6 +155,8 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
156 | nw/2 nw/2 | 155 | nw/2 nw/2 |
157 | */ | 156 | */ |
158 | 157 | ||
158 | COMP temp[FFT_ENC]; | ||
159 | |||
159 | for(i=0; i<FFT_ENC; i++) { | 160 | for(i=0; i<FFT_ENC; i++) { |
160 | wshift[i].real = 0.0; | 161 | wshift[i].real = 0.0; |
161 | wshift[i].imag = 0.0; | 162 | wshift[i].imag = 0.0; |
@@ -165,7 +166,7 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
165 | for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++) | 166 | for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++) |
166 | wshift[i].real = w[j]; | 167 | wshift[i].real = w[j]; |
167 | 168 | ||
168 | codec2_fft(fft_fwd_cfg, wshift, W); | 169 | codec2_fft(fft_fwd_cfg, wshift, temp); |
169 | 170 | ||
170 | /* | 171 | /* |
171 | Re-arrange W[] to be symmetrical about FFT_ENC/2. Makes later | 172 | Re-arrange W[] to be symmetrical about FFT_ENC/2. Makes later |
@@ -192,12 +193,8 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ | |||
192 | 193 | ||
193 | 194 | ||
194 | for(i=0; i<FFT_ENC/2; i++) { | 195 | for(i=0; i<FFT_ENC/2; i++) { |
195 | temp.real = W[i].real; | 196 | W[i] = temp[i + FFT_ENC / 2].real; |
196 | temp.imag = W[i].imag; | 197 | W[i + FFT_ENC / 2] = temp[i].real; |
197 | W[i].real = W[i+FFT_ENC/2].real; | ||
198 | W[i].imag = W[i+FFT_ENC/2].imag; | ||
199 | W[i+FFT_ENC/2].real = temp.real; | ||
200 | W[i+FFT_ENC/2].imag = temp.imag; | ||
201 | } | 198 | } |
202 | 199 | ||
203 | } | 200 | } |
@@ -402,39 +399,30 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float | |||
402 | 399 | ||
403 | \*---------------------------------------------------------------------------*/ | 400 | \*---------------------------------------------------------------------------*/ |
404 | 401 | ||
405 | void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase) | 402 | void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase) |
406 | { | 403 | { |
407 | int i,m; /* loop variables */ | 404 | int i,m; /* loop variables */ |
408 | int am,bm; /* bounds of current harmonic */ | 405 | int am,bm; /* bounds of current harmonic */ |
409 | int b; /* DFT bin of centre of current harmonic */ | ||
410 | float den; /* denominator of amplitude expression */ | 406 | float den; /* denominator of amplitude expression */ |
411 | float r, one_on_r; /* number of rads/bin */ | ||
412 | int offset; | ||
413 | COMP Am; | ||
414 | 407 | ||
415 | r = TWO_PI/FFT_ENC; | 408 | float r = TWO_PI/FFT_ENC; |
416 | one_on_r = 1.0/r; | 409 | float one_on_r = 1.0/r; |
417 | 410 | ||
418 | for(m=1; m<=model->L; m++) { | 411 | for(m=1; m<=model->L; m++) { |
412 | /* Estimate ampltude of harmonic */ | ||
413 | |||
419 | den = 0.0; | 414 | den = 0.0; |
420 | am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5); | 415 | am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5); |
421 | bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5); | 416 | bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5); |
422 | b = (int)(m*model->Wo/r + 0.5); | ||
423 | |||
424 | /* Estimate ampltude of harmonic */ | ||
425 | 417 | ||
426 | den = 0.0; | ||
427 | Am.real = Am.imag = 0.0; | ||
428 | offset = FFT_ENC/2 - (int)(m*model->Wo*one_on_r + 0.5); | ||
429 | for(i=am; i<bm; i++) { | 418 | for(i=am; i<bm; i++) { |
430 | den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag; | 419 | den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag; |
431 | Am.real += Sw[i].real*W[i + offset].real; | ||
432 | Am.imag += Sw[i].imag*W[i + offset].real; | ||
433 | } | 420 | } |
434 | 421 | ||
435 | model->A[m] = sqrtf(den); | 422 | model->A[m] = sqrtf(den); |
436 | 423 | ||
437 | if (est_phase) { | 424 | if (est_phase) { |
425 | int b = (int)(m*model->Wo/r + 0.5); /* DFT bin of centre of current harmonic */ | ||
438 | 426 | ||
439 | /* Estimate phase of harmonic, this is expensive in CPU for | 427 | /* Estimate phase of harmonic, this is expensive in CPU for |
440 | embedded devicesso we make it an option */ | 428 | embedded devicesso we make it an option */ |
@@ -459,7 +447,7 @@ float est_voicing_mbe( | |||
459 | C2CONST *c2const, | 447 | C2CONST *c2const, |
460 | MODEL *model, | 448 | MODEL *model, |
461 | COMP Sw[], | 449 | COMP Sw[], |
462 | COMP W[] | 450 | float W[] |
463 | ) | 451 | ) |
464 | { | 452 | { |
465 | int l,al,bl,m; /* loop variables */ | 453 | int l,al,bl,m; /* loop variables */ |
@@ -497,9 +485,9 @@ float est_voicing_mbe( | |||
497 | 485 | ||
498 | offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; | 486 | offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; |
499 | for(m=al; m<bl; m++) { | 487 | for(m=al; m<bl; m++) { |
500 | Am.real += Sw[m].real*W[offset+m].real; | 488 | Am.real += Sw[m].real*W[offset+m]; |
501 | Am.imag += Sw[m].imag*W[offset+m].real; | 489 | Am.imag += Sw[m].imag*W[offset+m]; |
502 | den += W[offset+m].real*W[offset+m].real; | 490 | den += W[offset+m]*W[offset+m]; |
503 | } | 491 | } |
504 | 492 | ||
505 | Am.real = Am.real/den; | 493 | Am.real = Am.real/den; |
@@ -507,10 +495,9 @@ float est_voicing_mbe( | |||
507 | 495 | ||
508 | /* Determine error between estimated harmonic and original */ | 496 | /* Determine error between estimated harmonic and original */ |
509 | 497 | ||
510 | // Redundant! offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
511 | for(m=al; m<bl; m++) { | 498 | for(m=al; m<bl; m++) { |
512 | Ew.real = Sw[m].real - Am.real*W[offset+m].real; | 499 | Ew.real = Sw[m].real - Am.real*W[offset+m]; |
513 | Ew.imag = Sw[m].imag - Am.imag*W[offset+m].real; | 500 | Ew.imag = Sw[m].imag - Am.imag*W[offset+m]; |
514 | error += Ew.real*Ew.real; | 501 | error += Ew.real*Ew.real; |
515 | error += Ew.imag*Ew.imag; | 502 | error += Ew.imag*Ew.imag; |
516 | } | 503 | } |
@@ -34,12 +34,12 @@ | |||
34 | 34 | ||
35 | C2CONST c2const_create(int Fs, float framelength_ms); | 35 | C2CONST c2const_create(int Fs, float framelength_ms); |
36 | 36 | ||
37 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], COMP W[]); | 37 | void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], float W[]); |
38 | float hpf(float x, float states[]); | 38 | float hpf(float x, float states[]); |
39 | void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]); | 39 | void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]); |
40 | void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]); | 40 | void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]); |
41 | void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase); | 41 | void estimate_amplitudes(MODEL *model, COMP Sw[], float W[], int est_phase); |
42 | float est_voicing_mbe(C2CONST *c2const, MODEL *model, COMP Sw[], COMP W[]); | 42 | float est_voicing_mbe(C2CONST *c2const, MODEL *model, COMP Sw[], float W[]); |
43 | void make_synthesis_window(C2CONST *c2const, float Pn[]); | 43 | void make_synthesis_window(C2CONST *c2const, float Pn[]); |
44 | void synthesise(int n_samp, codec2_fftr_cfg fftr_inv_cfg, float Sn_[], MODEL *model, float Pn[], int shift); | 44 | void synthesise(int n_samp, codec2_fftr_cfg fftr_inv_cfg, float Sn_[], MODEL *model, float Pn[], int shift); |
45 | 45 | ||
diff --git a/stripdown.sh b/stripdown.sh index 2e4466e..c8d8332 100755 --- a/stripdown.sh +++ b/stripdown.sh | |||
@@ -23,7 +23,7 @@ for file in ${LIBSRC} ${LIBINC}; do | |||
23 | done | 23 | done |
24 | 24 | ||
25 | # fixup one include | 25 | # fixup one include |
26 | sed s:codec2/version.h:version.h: src/codec2.h > "${DESTDIR}"/codec2.h | 26 | sed s:\<codec2/version.h\>:\"version.h\": src/codec2.h > "${DESTDIR}"/codec2.h |
27 | 27 | ||
28 | cat > "${DESTDIR}"/debug_alloc.h <<'EOF' | 28 | cat > "${DESTDIR}"/debug_alloc.h <<'EOF' |
29 | #define FREE free | 29 | #define FREE free |
@@ -51,7 +51,7 @@ cd .. | |||
51 | rm -r "${BUILDDIR}" | 51 | rm -r "${BUILDDIR}" |
52 | 52 | ||
53 | cat > "${DESTDIR}"/Makefile <<'EOF' | 53 | cat > "${DESTDIR}"/Makefile <<'EOF' |
54 | CFLAGS = -Wall -Wno-strict-overflow -std=gnu11 -fPIC -g -O2 -I. | 54 | CFLAGS = -Wall -Wno-strict-overflow -std=gnu11 -fPIC -g -O2 -I. -lm |
55 | CFLAGS += -DHORUS_L2_RX -DINTERLEAVER -DRUN_TIME_TABLES -DSCRAMBLER -Dcodec2_EXPORTS | 55 | CFLAGS += -DHORUS_L2_RX -DINTERLEAVER -DRUN_TIME_TABLES -DSCRAMBLER -Dcodec2_EXPORTS |
56 | CFLAGS += -Wno-incompatible-pointer-types-discards-qualifiers | 56 | CFLAGS += -Wno-incompatible-pointer-types-discards-qualifiers |
57 | 57 | ||
@@ -31,7 +31,7 @@ | |||
31 | 31 | ||
32 | #define CODEC2_VERSION_MAJOR 0 | 32 | #define CODEC2_VERSION_MAJOR 0 |
33 | #define CODEC2_VERSION_MINOR 9 | 33 | #define CODEC2_VERSION_MINOR 9 |
34 | /* #undef CODEC2_VERSION_PATCH */ | 34 | #define CODEC2_VERSION_PATCH 2 |
35 | #define CODEC2_VERSION "0.9" | 35 | #define CODEC2_VERSION "0.9.2" |
36 | 36 | ||
37 | #endif //CODEC2_HAVE_VERSION | 37 | #endif //CODEC2_HAVE_VERSION |