diff options
Diffstat (limited to 'nlp.c')
| -rw-r--r-- | nlp.c | 183 |
1 files changed, 1 insertions, 182 deletions
| @@ -53,7 +53,6 @@ | |||
| 53 | #define F0_MAX 500 | 53 | #define F0_MAX 500 |
| 54 | #define CNLP 0.3 /* post processor constant */ | 54 | #define CNLP 0.3 /* post processor constant */ |
| 55 | #define NLP_NTAP 48 /* Decimation LPF order */ | 55 | #define NLP_NTAP 48 /* Decimation LPF order */ |
| 56 | #undef POST_PROCESS_MBE /* choose post processor */ | ||
| 57 | 56 | ||
| 58 | /* 8 to 16 kHz sample rate conversion */ | 57 | /* 8 to 16 kHz sample rate conversion */ |
| 59 | 58 | ||
| @@ -132,10 +131,6 @@ typedef struct { | |||
| 132 | FILE *f; | 131 | FILE *f; |
| 133 | } NLP; | 132 | } NLP; |
| 134 | 133 | ||
| 135 | #ifdef POST_PROCESS_MBE | ||
| 136 | float test_candidate_mbe(COMP Sw[], COMP W[], float f0); | ||
| 137 | float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo); | ||
| 138 | #endif | ||
| 139 | float post_process_sub_multiples(COMP Fw[], | 134 | float post_process_sub_multiples(COMP Fw[], |
| 140 | int pmin, int pmax, float gmax, int gmax_bin, | 135 | int pmin, int pmax, float gmax, int gmax_bin, |
| 141 | float *prev_f0); | 136 | float *prev_f0); |
| @@ -258,7 +253,7 @@ float nlp( | |||
| 258 | int n, /* frames shift (no. new samples in Sn[]) */ | 253 | int n, /* frames shift (no. new samples in Sn[]) */ |
| 259 | float *pitch, /* estimated pitch period in samples at current Fs */ | 254 | float *pitch, /* estimated pitch period in samples at current Fs */ |
| 260 | COMP Sw[], /* Freq domain version of Sn[] */ | 255 | COMP Sw[], /* Freq domain version of Sn[] */ |
| 261 | COMP W[], /* Freq domain window */ | 256 | float W[], /* Freq domain window */ |
| 262 | float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ | 257 | float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ |
| 263 | ) | 258 | ) |
| 264 | { | 259 | { |
| @@ -389,11 +384,7 @@ float nlp( | |||
| 389 | 384 | ||
| 390 | PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); | 385 | PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); |
| 391 | 386 | ||
| 392 | #ifdef POST_PROCESS_MBE | ||
| 393 | best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_f0); | ||
| 394 | #else | ||
| 395 | best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); | 387 | best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0); |
| 396 | #endif | ||
| 397 | 388 | ||
| 398 | PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); | 389 | PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); |
| 399 | 390 | ||
| @@ -491,178 +482,6 @@ float post_process_sub_multiples(COMP Fw[], | |||
| 491 | return best_f0; | 482 | return best_f0; |
| 492 | } | 483 | } |
| 493 | 484 | ||
| 494 | #ifdef POST_PROCESS_MBE | ||
| 495 | |||
| 496 | /*---------------------------------------------------------------------------*\ | ||
| 497 | |||
| 498 | post_process_mbe() | ||
| 499 | |||
| 500 | Use the MBE pitch estimation algorithm to evaluate pitch candidates. This | ||
| 501 | works OK but the accuracy at low F0 is affected by NW, the analysis window | ||
| 502 | size used for the DFT of the input speech Sw[]. Also favours high F0 in | ||
| 503 | the presence of background noise which causes periodic artifacts in the | ||
| 504 | synthesised speech. | ||
| 505 | |||
| 506 | \*---------------------------------------------------------------------------*/ | ||
| 507 | |||
| 508 | float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo) | ||
| 509 | { | ||
| 510 | float candidate_f0; | ||
| 511 | float f0,best_f0; /* fundamental frequency */ | ||
| 512 | float e,e_min; /* MBE cost function */ | ||
| 513 | int i; | ||
| 514 | #ifdef DUMP | ||
| 515 | float e_hz[F0_MAX]; | ||
| 516 | #endif | ||
| 517 | #if !defined(NDEBUG) || defined(DUMP) | ||
| 518 | int bin; | ||
| 519 | #endif | ||
| 520 | float f0_min, f0_max; | ||
| 521 | float f0_start, f0_end; | ||
| 522 | |||
| 523 | f0_min = (float)SAMPLE_RATE/pmax; | ||
| 524 | f0_max = (float)SAMPLE_RATE/pmin; | ||
| 525 | |||
| 526 | /* Now look for local maxima. Each local maxima is a candidate | ||
| 527 | that we test using the MBE pitch estimation algotithm */ | ||
| 528 | |||
| 529 | #ifdef DUMP | ||
| 530 | for(i=0; i<F0_MAX; i++) | ||
| 531 | e_hz[i] = -1; | ||
| 532 | #endif | ||
| 533 | e_min = 1E32; | ||
| 534 | best_f0 = 50; | ||
| 535 | for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) { | ||
| 536 | if ((Fw[i].real > Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) { | ||
| 537 | |||
| 538 | /* local maxima found, lets test if it's big enough */ | ||
| 539 | |||
| 540 | if (Fw[i].real > T*gmax) { | ||
| 541 | |||
| 542 | /* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */ | ||
| 543 | |||
| 544 | candidate_f0 = (float)i*SAMPLE_RATE/(PE_FFT_SIZE*DEC); | ||
| 545 | f0_start = candidate_f0-20; | ||
| 546 | f0_end = candidate_f0+20; | ||
| 547 | if (f0_start < f0_min) f0_start = f0_min; | ||
| 548 | if (f0_end > f0_max) f0_end = f0_max; | ||
| 549 | |||
| 550 | for(f0=f0_start; f0<=f0_end; f0+= 2.5) { | ||
| 551 | e = test_candidate_mbe(Sw, W, f0); | ||
| 552 | #if !defined(NDEBUG) || defined(DUMP) | ||
| 553 | bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX)); | ||
| 554 | #endif | ||
| 555 | #ifdef DUMP | ||
| 556 | e_hz[bin] = e; | ||
| 557 | #endif | ||
| 558 | if (e < e_min) { | ||
| 559 | e_min = e; | ||
| 560 | best_f0 = f0; | ||
| 561 | } | ||
| 562 | } | ||
| 563 | |||
| 564 | } | ||
| 565 | } | ||
| 566 | } | ||
| 567 | |||
| 568 | /* finally sample MBE cost function around previous pitch estimate | ||
| 569 | (form of pitch tracking) */ | ||
| 570 | |||
| 571 | candidate_f0 = *prev_Wo * SAMPLE_RATE/TWO_PI; | ||
| 572 | f0_start = candidate_f0-20; | ||
| 573 | f0_end = candidate_f0+20; | ||
| 574 | if (f0_start < f0_min) f0_start = f0_min; | ||
| 575 | if (f0_end > f0_max) f0_end = f0_max; | ||
| 576 | |||
| 577 | for(f0=f0_start; f0<=f0_end; f0+= 2.5) { | ||
| 578 | e = test_candidate_mbe(Sw, W, f0); | ||
| 579 | #if !defined(NDEBUG) || defined(DUMP) | ||
| 580 | bin = floorf(f0); assert((bin > 0) && (bin < F0_MAX)); | ||
| 581 | #endif | ||
| 582 | #ifdef DUMP | ||
| 583 | e_hz[bin] = e; | ||
| 584 | #endif | ||
| 585 | if (e < e_min) { | ||
| 586 | e_min = e; | ||
| 587 | best_f0 = f0; | ||
| 588 | } | ||
| 589 | } | ||
| 590 | |||
| 591 | #ifdef DUMP | ||
| 592 | dump_e(e_hz); | ||
| 593 | #endif | ||
| 594 | |||
| 595 | return best_f0; | ||
| 596 | } | ||
| 597 | |||
| 598 | /*---------------------------------------------------------------------------*\ | ||
| 599 | |||
| 600 | test_candidate_mbe() | ||
| 601 | |||
| 602 | Returns the error of the MBE cost function for the input f0. | ||
| 603 | |||
| 604 | Note: I think a lot of the operations below can be simplified as | ||
| 605 | W[].imag = 0 and has been normalised such that den always equals 1. | ||
| 606 | |||
| 607 | \*---------------------------------------------------------------------------*/ | ||
| 608 | |||
| 609 | float test_candidate_mbe( | ||
| 610 | COMP Sw[], | ||
| 611 | COMP W[], | ||
| 612 | float f0 | ||
| 613 | ) | ||
| 614 | { | ||
| 615 | COMP Sw_[FFT_ENC]; /* DFT of all voiced synthesised signal */ | ||
| 616 | int l,al,bl,m; /* loop variables */ | ||
| 617 | COMP Am; /* amplitude sample for this band */ | ||
| 618 | int offset; /* centers Hw[] about current harmonic */ | ||
| 619 | float den; /* denominator of Am expression */ | ||
| 620 | float error; /* accumulated error between originl and synthesised */ | ||
| 621 | float Wo; /* current "test" fundamental freq. */ | ||
| 622 | int L; | ||
| 623 | |||
| 624 | L = floorf((SAMPLE_RATE/2.0)/f0); | ||
| 625 | Wo = f0*(2*PI/SAMPLE_RATE); | ||
| 626 | |||
| 627 | error = 0.0; | ||
| 628 | |||
| 629 | /* Just test across the harmonics in the first 1000 Hz (L/4) */ | ||
| 630 | |||
| 631 | for(l=1; l<L/4; l++) { | ||
| 632 | Am.real = 0.0; | ||
| 633 | Am.imag = 0.0; | ||
| 634 | den = 0.0; | ||
| 635 | al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI); | ||
| 636 | bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI); | ||
| 637 | |||
| 638 | /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ | ||
| 639 | |||
| 640 | for(m=al; m<bl; m++) { | ||
| 641 | offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
| 642 | Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag; | ||
| 643 | Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag; | ||
| 644 | den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag; | ||
| 645 | } | ||
| 646 | |||
| 647 | Am.real = Am.real/den; | ||
| 648 | Am.imag = Am.imag/den; | ||
| 649 | |||
| 650 | /* Determine error between estimated harmonic and original */ | ||
| 651 | |||
| 652 | for(m=al; m<bl; m++) { | ||
| 653 | offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5; | ||
| 654 | Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag; | ||
| 655 | Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real; | ||
| 656 | error += (Sw[m].real - Sw_[m].real)*(Sw[m].real - Sw_[m].real); | ||
| 657 | error += (Sw[m].imag - Sw_[m].imag)*(Sw[m].imag - Sw_[m].imag); | ||
| 658 | } | ||
| 659 | } | ||
| 660 | |||
| 661 | return error; | ||
| 662 | } | ||
| 663 | |||
| 664 | #endif | ||
| 665 | |||
| 666 | /*---------------------------------------------------------------------------*\ | 485 | /*---------------------------------------------------------------------------*\ |
| 667 | 486 | ||
| 668 | FUNCTION....: fdmdv_16_to_8() | 487 | FUNCTION....: fdmdv_16_to_8() |
