diff options
Diffstat (limited to 'nlp.c')
-rw-r--r-- | nlp.c | 183 |
1 files changed, 1 insertions, 182 deletions
@@ -53,7 +53,6 @@ | |||
53 | #define F0_MAX 500 | 53 | #define F0_MAX 500 |
54 | #define CNLP 0.3 /* post processor constant */ | 54 | #define CNLP 0.3 /* post processor constant */ |
55 | #define NLP_NTAP 48 /* Decimation LPF order */ | 55 | #define NLP_NTAP 48 /* Decimation LPF order */ |
56 | #undef POST_PROCESS_MBE /* choose post processor */ | ||
57 | 56 | ||
58 | /* 8 to 16 kHz sample rate conversion */ | 57 | /* 8 to 16 kHz sample rate conversion */ |
59 | 58 | ||
@@ -132,10 +131,6 @@ typedef struct { | |||
132 | FILE *f; | 131 | FILE *f; |
133 | } NLP; | 132 | } NLP; |
134 | 133 | ||
135 | #ifdef POST_PROCESS_MBE | ||
136 | float test_candidate_mbe(COMP Sw[], COMP W[], float f0); | ||
137 | float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo); | ||
138 | #endif | ||
139 | float post_process_sub_multiples(COMP Fw[], | 134 | float post_process_sub_multiples(COMP Fw[], |
140 | int pmin, int pmax, float gmax, int gmax_bin, | 135 | int pmin, int pmax, float gmax, int gmax_bin, |
141 | float *prev_f0); | 136 | float *prev_f0); |
@@ -258,7 +253,7 @@ float nlp( | |||
258 | int n, /* frames shift (no. new samples in Sn[]) */ | 253 | int n, /* frames shift (no. new samples in Sn[]) */ |
259 | float *pitch, /* estimated pitch period in samples at current Fs */ | 254 | float *pitch, /* estimated pitch period in samples at current Fs */ |
260 | COMP Sw[], /* Freq domain version of Sn[] */ | 255 | COMP Sw[], /* Freq domain version of Sn[] */ |
261 | COMP W[], /* Freq domain window */ | 256 | float W[], /* Freq domain window */ |
262 | float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ | 257 | float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ |
263 | ) | 258 | ) |
264 | { | 259 | { |
@@ -389,11 +384,7 @@ float nlp( | |||
389 | 384 | ||
390 | PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); | 385 | PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); |
391 | 386 | ||
392 | #ifdef POST_PROCESS_MBE | ||
393 | best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_f0); | ||
394 | #else | ||
395 | best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); | 387 | best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); |
396 | #endif | ||
397 | 388 | ||
398 | PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); | 389 | PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); |
399 | 390 | ||
@@ -491,178 +482,6 @@ float post_process_sub_multiples(COMP Fw[], | |||
491 | return best_f0; | 482 | return best_f0; |
492 | } | 483 | } |
493 | 484 | ||
494 | #ifdef POST_PROCESS_MBE | ||
495 | |||
496 | /*---------------------------------------------------------------------------*\ | ||
497 | |||
498 | post_process_mbe() | ||
499 | |||
500 | Use the MBE pitch estimation algorithm to evaluate pitch candidates. This | ||
501 | works OK but the accuracy at low F0 is affected by NW, the analysis window | ||
502 | size used for the DFT of the input speech Sw[]. Also favours high F0 in | ||
503 | the presence of background noise which causes periodic artifacts in the | ||
504 | synthesised speech. | ||
505 | |||
506 | \*---------------------------------------------------------------------------*/ | ||
507 | |||
508 | float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo) | ||
509 | { | ||
510 | float candidate_f0; | ||
511 | float f0,best_f0; /* fundamental frequency */ | ||
512 | float e,e_min; /* MBE cost function */ | ||
513 | int i; | ||
514 | #ifdef DUMP | ||
515 | float e_hz[F0_MAX]; | ||
516 | #endif | ||
517 | #if !defined(NDEBUG) || defined(DUMP) | ||
518 | int bin; | ||
519 | #endif | ||
520 | float f0_min, f0_max; | ||
521 | float f0_start, f0_end; | ||
522 | |||
523 | f0_min = (float)SAMPLE_RATE/pmax; | ||
524 | f0_max = (float)SAMPLE_RATE/pmin; | ||
525 | |||
526 | /* Now look for local maxima. Each local maxima is a candidate | ||
527 | that we test using the MBE pitch estimation algotithm */ | ||
528 | |||
529 | #ifdef DUMP | ||
530 | for(i=0; i<F0_MAX; i++) | ||
531 | e_hz[i] = -1; | ||
532 | #endif | ||
533 | e_min = 1E32; | ||
534 | best_f0 = 50; | ||
535 | for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) { | ||
536 | if ((Fw[i].real > Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) { | ||
537 | |||
538 | /* local maxima found, lets test if it's big enough */ | ||
539 | |||
540 | if (Fw[i].real > T*gmax) { | ||
541 | |||
542 | /* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */ | ||
543 | |||
544 | candidate_f0 = (float)i*SAMPLE_RATE/(PE_FFT_SIZE*DEC); | ||
545 | f0_start = candidate_f0-20; | ||
546 | f0_end = candidate_f0+20; | ||
547 | if (f0_start < f0_min) f0_start = f0_min; | ||
548 | if (f0_end > f0_max) f0_end = f0_max; | ||
549 | |||
550 | for(f0=f0_start; f0<=f0_end; f0+= 2.5) { | ||
551 | e = test_candidate_mbe(Sw, W, f0); | ||
552 | #if !defined(NDEBUG) || defined(DUMP) | ||
553 | bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX)); | ||
554 | #endif | ||
555 | #ifdef DUMP | ||
556 | e_hz[bin] = e; | ||
557 | #endif | ||
558 | if (e < e_min) { | ||
559 | e_min = e; | ||
560 | best_f0 = f0; | ||
561 | } | ||
562 | } | ||
563 | |||
564 | } | ||
565 | } | ||
566 | } | ||
567 | |||
568 | /* finally sample MBE cost function around previous pitch estimate | ||
569 | (form of pitch tracking) */ | ||
570 | |||
571 | candidate_f0 = *prev_Wo * SAMPLE_RATE/TWO_PI; | ||
572 | f0_start = candidate_f0-20; | ||
573 | f0_end = candidate_f0+20; | ||
574 | if (f0_start < f0_min) f0_start = f0_min; | ||
575 | if (f0_end > f0_max) f0_end = f0_max; | ||
576 | |||
577 | for(f0=f0_start; f0<=f0_end; f0+= 2.5) { | ||
578 | e = test_candidate_mbe(Sw, W, f0); | ||
579 | #if !defined(NDEBUG) || defined(DUMP) | ||
580 | bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX)); | ||
581 | #endif | ||
582 | #ifdef DUMP | ||
583 | e_hz[bin] = e; | ||
584 | #endif | ||
585 | if (e < e_min) { | ||
586 | e_min = e; | ||
587 | best_f0 = f0; | ||
588 | } | ||
589 | } | ||
590 | |||
591 | #ifdef DUMP | ||
592 | dump_e(e_hz); | ||
593 | #endif | ||
594 | |||
595 | return best_f0; | ||
596 | } | ||
597 | |||
598 | /*---------------------------------------------------------------------------*\ | ||
599 | |||
600 | test_candidate_mbe() | ||
601 | |||
602 | Returns the error of the MBE cost function for the input f0. | ||
603 | |||
604 | Note: I think a lot of the operations below can be simplified as | ||
605 | W[].imag = 0 and has been normalised such that den always equals 1. | ||
606 | |||
607 | \*---------------------------------------------------------------------------*/ | ||
608 | |||
609 | float test_candidate_mbe( | ||
610 | COMP Sw[], | ||
611 | COMP W[], | ||
612 | float f0 | ||
613 | ) | ||
614 | { | ||
615 | COMP Sw_[FFT_ENC]; /* DFT of all voiced synthesised signal */ | ||
616 | int l,al,bl,m; /* loop variables */ | ||
617 | COMP Am; /* amplitude sample for this band */ | ||
618 | int offset; /* centers Hw[] about current harmonic */ | ||
619 | float den; /* denominator of Am expression */ | ||
620 | float error; /* accumulated error between originl and synthesised */ | ||
621 | float Wo; /* current "test" fundamental freq. */ | ||
622 | int L; | ||
623 | |||
624 | L = floorf((SAMPLE_RATE/2.0)/f0); | ||
625 | Wo = f0*(2*PI/SAMPLE_RATE); | ||
626 | |||
627 | error = 0.0; | ||
628 | |||
629 | /* Just test across the harmonics in the first 1000 Hz (L/4) */ | ||
630 | |||
631 | for(l=1; l<L/4; l++) { | ||
632 | Am.real = 0.0; | ||
633 | Am.imag = 0.0; | ||
634 | den = 0.0; | ||
635 | al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI); | ||
636 | bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI); | ||
637 | |||
638 | /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ | ||
639 | |||
640 | for(m=al; m<bl; m++) { | ||
641 | offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
642 | Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag; | ||
643 | Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag; | ||
644 | den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag; | ||
645 | } | ||
646 | |||
647 | Am.real = Am.real/den; | ||
648 | Am.imag = Am.imag/den; | ||
649 | |||
650 | /* Determine error between estimated harmonic and original */ | ||
651 | |||
652 | for(m=al; m<bl; m++) { | ||
653 | offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
654 | Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag; | ||
655 | Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real; | ||
656 | error += (Sw[m].real - Sw_[m].real)*(Sw[m].real - Sw_[m].real); | ||
657 | error += (Sw[m].imag - Sw_[m].imag)*(Sw[m].imag - Sw_[m].imag); | ||
658 | } | ||
659 | } | ||
660 | |||
661 | return error; | ||
662 | } | ||
663 | |||
664 | #endif | ||
665 | |||
666 | /*---------------------------------------------------------------------------*\ | 485 | /*---------------------------------------------------------------------------*\ |
667 | 486 | ||
668 | FUNCTION....: fdmdv_16_to_8() | 487 | FUNCTION....: fdmdv_16_to_8() |