diff --git a/Record/ProcessFile.cpp b/Record/ProcessFile.cpp index 8a7a8a8..b4c0d01 100644 --- a/Record/ProcessFile.cpp +++ b/Record/ProcessFile.cpp @@ -1,5 +1,5 @@ #include "IoContext.h" -#include "RKAP_3A.h" +// #include "RKAP_3A.h" #include "SpeexDsp.h" #include "WebRtcAecm.h" #include "api/audio/echo_canceller3_config.h" @@ -18,7 +18,7 @@ public: farendBuffer = std::make_unique(sampleRate, channels, sampleRate, channels, sampleRate, channels); linearOutputBuffer = std::make_unique(sampleRate, channels, sampleRate, channels, sampleRate, channels); - RKAP_3A_Init(&m_vqe, AEC_TX_TYPE); + // RKAP_3A_Init(&m_vqe, AEC_TX_TYPE); } std::unique_ptr echoCanceller; @@ -26,7 +26,7 @@ public: std::unique_ptr farendBuffer; std::unique_ptr linearOutputBuffer; - RKAP_AEC_State m_vqe; + // RKAP_AEC_State m_vqe; }; ProcessFileTask::ProcessFileTask() : m_d{new ProcessFileTaskPrivate()} { diff --git a/VocieProcess/CMakeLists.txt b/VocieProcess/CMakeLists.txt index a6c4d92..d13b3a9 100644 --- a/VocieProcess/CMakeLists.txt +++ b/VocieProcess/CMakeLists.txt @@ -55,6 +55,7 @@ add_library(VocieProcess common_audio/third_party/ooura/fft_size_128/ooura_fft.h common_audio/third_party/ooura/fft_size_128/ooura_fft_neon.cc common_audio/third_party/ooura/fft_size_128/ooura_fft.cc + common_audio/third_party/ooura/fft_size_256/fft4g.h common_audio/third_party/ooura/fft_size_256/fft4g.cc common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c rtc_base/checks.h rtc_base/checks.cc @@ -147,6 +148,20 @@ add_library(VocieProcess modules/audio_processing/logging/apm_data_dumper.h modules/audio_processing/logging/apm_data_dumper.cc + modules/audio_processing/ns/fast_math.h modules/audio_processing/ns/fast_math.cc + modules/audio_processing/ns/histograms.h modules/audio_processing/ns/histograms.cc + modules/audio_processing/ns/noise_estimator.h modules/audio_processing/ns/noise_estimator.cc + modules/audio_processing/ns/noise_suppressor.h modules/audio_processing/ns/noise_suppressor.cc + modules/audio_processing/ns/ns_fft.h modules/audio_processing/ns/ns_fft.cc + modules/audio_processing/ns/prior_signal_model_estimator.h modules/audio_processing/ns/prior_signal_model_estimator.cc + modules/audio_processing/ns/prior_signal_model.h modules/audio_processing/ns/prior_signal_model.cc + modules/audio_processing/ns/quantile_noise_estimator.h modules/audio_processing/ns/quantile_noise_estimator.cc + modules/audio_processing/ns/signal_model_estimator.h modules/audio_processing/ns/signal_model_estimator.cc + modules/audio_processing/ns/signal_model.h modules/audio_processing/ns/signal_model.cc + modules/audio_processing/ns/speech_probability_estimator.h modules/audio_processing/ns/speech_probability_estimator.cc + modules/audio_processing/ns/suppression_params.h modules/audio_processing/ns/suppression_params.cc + modules/audio_processing/ns/wiener_filter.h modules/audio_processing/ns/wiener_filter.cc + modules/audio_processing/utility/cascaded_biquad_filter.h modules/audio_processing/utility/cascaded_biquad_filter.cc modules/audio_processing/utility/delay_estimator_wrapper.h modules/audio_processing/utility/delay_estimator_wrapper.cc modules/audio_processing/utility/delay_estimator.h modules/audio_processing/utility/delay_estimator.cc diff --git a/VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.cc b/VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.cc new file mode 100644 index 0000000..2573f23 --- /dev/null +++ b/VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.cc @@ -0,0 +1,866 @@ +/* + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * Copyright Takuya OOURA, 1996-2001 + * + * You may use, copy, modify and distribute this code for any purpose (include + * commercial use) and without fee. Please refer to this package when you modify + * this code. + * + * Changes: + * Trivial type modifications by the WebRTC authors. + */ + +/* +Fast Fourier/Cosine/Sine Transform + dimension :one + data length :power of 2 + decimation :frequency + radix :4, 2 + data :inplace + table :use +functions + cdft: Complex Discrete Fourier Transform + rdft: Real Discrete Fourier Transform + ddct: Discrete Cosine Transform + ddst: Discrete Sine Transform + dfct: Cosine Transform of RDFT (Real Symmetric DFT) + dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) +function prototypes + void cdft(int, int, float *, int *, float *); + void rdft(size_t, int, float *, size_t *, float *); + void ddct(int, int, float *, int *, float *); + void ddst(int, int, float *, int *, float *); + void dfct(int, float *, float *, int *, float *); + void dfst(int, float *, float *, int *, float *); + + +-------- Complex DFT (Discrete Fourier Transform) -------- + [definition] + + X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k + X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k + ip[0] = 0; // first time only + cdft(2*n, 1, a, ip, w); + + ip[0] = 0; // first time only + cdft(2*n, -1, a, ip, w); + [parameters] + 2*n :data length (int) + n >= 1, n = power of 2 + a[0...2*n-1] :input/output data (float *) + input data + a[2*j] = Re(x[j]), + a[2*j+1] = Im(x[j]), 0<=j= 2+sqrt(n) + strictly, + length of ip >= + 2+(1<<(int)(log(n+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + cdft(2*n, -1, a, ip, w); + is + cdft(2*n, 1, a, ip, w); + for (j = 0; j <= 2 * n - 1; j++) { + a[j] *= 1.0 / n; + } + . + + +-------- Real DFT / Inverse of Real DFT -------- + [definition] + RDFT + R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 + I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) + a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k + ip[0] = 0; // first time only + rdft(n, 1, a, ip, w); + + ip[0] = 0; // first time only + rdft(n, -1, a, ip, w); + [parameters] + n :data length (size_t) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + + output data + a[2*k] = R[k], 0<=k + input data + a[2*j] = R[j], 0<=j= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + rdft(n, 1, a, ip, w); + is + rdft(n, -1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + IDCT (excluding scale) + C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT + C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k + ip[0] = 0; // first time only + ddct(n, 1, a, ip, w); + + ip[0] = 0; // first time only + ddct(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + output data + a[k] = C[k], 0<=k= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddct(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddct(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DST (Discrete Sine Transform) / Inverse of DST -------- + [definition] + IDST (excluding scale) + S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST + S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 + ip[0] = 0; // first time only + ddst(n, 1, a, ip, w); + + ip[0] = 0; // first time only + ddst(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + + input data + a[j] = A[j], 0 + output data + a[k] = S[k], 0= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddst(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddst(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Cosine Transform of RDFT (Real Symmetric DFT) -------- + [definition] + C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n + [usage] + ip[0] = 0; // first time only + dfct(n, a, t, ip, w); + [parameters] + n :data length - 1 (int) + n >= 2, n = power of 2 + a[0...n] :input/output data (float *) + output data + a[k] = C[k], 0<=k<=n + t[0...n/2] :work area (float *) + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + is + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + for (j = 0; j <= n; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- + [definition] + S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + output data + a[k] = S[k], 0= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + dfst(n, a, t, ip, w); + is + dfst(n, a, t, ip, w); + for (j = 1; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +Appendix : + The cos/sin table is recalculated when the larger table required. + w[] and ip[] are compatible with all routines. +*/ + +#include "common_audio/third_party/ooura/fft_size_256/fft4g.h" + +#include +#include + +namespace webrtc { + +namespace { + +void makewt(size_t nw, size_t* ip, float* w); +void makect(size_t nc, size_t* ip, float* c); +void bitrv2(size_t n, size_t* ip, float* a); +void cftfsub(size_t n, float* a, float* w); +void cftbsub(size_t n, float* a, float* w); +void cft1st(size_t n, float* a, float* w); +void cftmdl(size_t n, size_t l, float* a, float* w); +void rftfsub(size_t n, float* a, size_t nc, float* c); +void rftbsub(size_t n, float* a, size_t nc, float* c); + +/* -------- initializing routines -------- */ + +void makewt(size_t nw, size_t* ip, float* w) { + size_t j, nwh; + float delta, x, y; + + ip[0] = nw; + ip[1] = 1; + if (nw > 2) { + nwh = nw >> 1; + delta = atanf(1.0f) / nwh; + w[0] = 1; + w[1] = 0; + w[nwh] = (float)cos(delta * nwh); + w[nwh + 1] = w[nwh]; + if (nwh > 2) { + for (j = 2; j < nwh; j += 2) { + x = (float)cos(delta * j); + y = (float)sin(delta * j); + w[j] = x; + w[j + 1] = y; + w[nw - j] = y; + w[nw - j + 1] = x; + } + bitrv2(nw, ip + 2, w); + } + } +} + +void makect(size_t nc, size_t* ip, float* c) { + size_t j, nch; + float delta; + + ip[1] = nc; + if (nc > 1) { + nch = nc >> 1; + delta = atanf(1.0f) / nch; + c[0] = (float)cos(delta * nch); + c[nch] = 0.5f * c[0]; + for (j = 1; j < nch; j++) { + c[j] = 0.5f * (float)cos(delta * j); + c[nc - j] = 0.5f * (float)sin(delta * j); + } + } +} + +/* -------- child routines -------- */ + +void bitrv2(size_t n, size_t* ip, float* a) { + size_t j, j1, k, k1, l, m, m2; + float xr, xi, yr, yi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + m2 = 2 * m; + if ((m << 3) == l) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + m2 + ip[k]; + k1 = j1 + m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } else { + for (k = 1; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } + } +} + +void cftfsub(size_t n, float* a, float* w) { + size_t j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = a[j + 1] - a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] += a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } +} + +void cftbsub(size_t n, float* a, float* w) { + size_t j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = -a[j + 1] + a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] = -a[j + 1] - a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } +} + +void cft1st(size_t n, float* a, float* w) { + size_t j, k1, k2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[2]; + x0i = a[1] + a[3]; + x1r = a[0] - a[2]; + x1i = a[1] - a[3]; + x2r = a[4] + a[6]; + x2i = a[5] + a[7]; + x3r = a[4] - a[6]; + x3i = a[5] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; + wk1r = w[2]; + x0r = a[8] + a[10]; + x0i = a[9] + a[11]; + x1r = a[8] - a[10]; + x1i = a[9] - a[11]; + x2r = a[12] + a[14]; + x2i = a[13] + a[15]; + x3r = a[12] - a[14]; + x3i = a[13] - a[15]; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[12] = x2i - x0i; + a[13] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[10] = wk1r * (x0r - x0i); + a[11] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[14] = wk1r * (x0i - x0r); + a[15] = wk1r * (x0i + x0r); + k1 = 0; + for (j = 16; j < n; j += 16) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + x0r = a[j] + a[j + 2]; + x0i = a[j + 1] + a[j + 3]; + x1r = a[j] - a[j + 2]; + x1i = a[j + 1] - a[j + 3]; + x2r = a[j + 4] + a[j + 6]; + x2i = a[j + 5] + a[j + 7]; + x3r = a[j + 4] - a[j + 6]; + x3i = a[j + 5] - a[j + 7]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 4] = wk2r * x0r - wk2i * x0i; + a[j + 5] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 2] = wk1r * x0r - wk1i * x0i; + a[j + 3] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 6] = wk3r * x0r - wk3i * x0i; + a[j + 7] = wk3r * x0i + wk3i * x0r; + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + x0r = a[j + 8] + a[j + 10]; + x0i = a[j + 9] + a[j + 11]; + x1r = a[j + 8] - a[j + 10]; + x1i = a[j + 9] - a[j + 11]; + x2r = a[j + 12] + a[j + 14]; + x2i = a[j + 13] + a[j + 15]; + x3r = a[j + 12] - a[j + 14]; + x3i = a[j + 13] - a[j + 15]; + a[j + 8] = x0r + x2r; + a[j + 9] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 12] = -wk2i * x0r - wk2r * x0i; + a[j + 13] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 10] = wk1r * x0r - wk1i * x0i; + a[j + 11] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 14] = wk3r * x0r - wk3i * x0i; + a[j + 15] = wk3r * x0i + wk3i * x0r; + } +} + +void cftmdl(size_t n, size_t l, float* a, float* w) { + size_t j, j1, j2, j3, k, k1, k2, m, m2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + m = l << 2; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + wk1r = w[2]; + for (j = m; j < l + m; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x2i - x0i; + a[j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * (x0r - x0i); + a[j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3] = wk1r * (x0i - x0r); + a[j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 0; + m2 = 2 * m; + for (k = m2; k < n; k += m2) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j < l + k; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = wk2r * x0r - wk2i * x0i; + a[j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + for (j = k + m; j < l + (k + m); j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = -wk2i * x0r - wk2r * x0i; + a[j2 + 1] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } +} + +void rftfsub(size_t n, float* a, size_t nc, float* c) { + size_t j, k, kk, ks, m; + float wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5f - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + +void rftbsub(size_t n, float* a, size_t nc, float* c) { + size_t j, k, kk, ks, m; + float wkr, wki, xr, xi, yr, yi; + + a[1] = -a[1]; + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5f - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j] -= yr; + a[j + 1] = yi - a[j + 1]; + a[k] += yr; + a[k + 1] = yi - a[k + 1]; + } + a[m + 1] = -a[m + 1]; +} + +} // namespace + +void WebRtc_rdft(size_t n, int isgn, float* a, size_t* ip, float* w) { + size_t nw, nc; + float xi; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 2)) { + nc = n >> 2; + makect(nc, ip, w + nw); + } + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; + } else { + a[1] = 0.5f * (a[0] - a[1]); + a[0] -= a[1]; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } +} + +} // namespace webrtc diff --git a/VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.h b/VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.h new file mode 100644 index 0000000..5a465a3 --- /dev/null +++ b/VocieProcess/common_audio/third_party/ooura/fft_size_256/fft4g.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the ../../../LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_ +#define COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_ + +#include + +namespace webrtc { + +// Refer to fft4g.c for documentation. +void WebRtc_rdft(size_t n, int isgn, float* a, size_t* ip, float* w); + +} // namespace webrtc + +#endif // COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_ diff --git a/VocieProcess/modules/audio_processing/ns/fast_math.cc b/VocieProcess/modules/audio_processing/ns/fast_math.cc new file mode 100644 index 0000000..d13110c --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/fast_math.cc @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/fast_math.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +float FastLog2f(float in) { + RTC_DCHECK_GT(in, .0f); + // Read and interpret float as uint32_t and then cast to float. + // This is done to extract the exponent (bits 30 - 23). + // "Right shift" of the exponent is then performed by multiplying + // with the constant (1/2^23). Finally, we subtract a constant to + // remove the bias (https://en.wikipedia.org/wiki/Exponent_bias). + union { + float dummy; + uint32_t a; + } x = {in}; + float out = x.a; + out *= 1.1920929e-7f; // 1/2^23 + out -= 126.942695f; // Remove bias. + return out; +} + +} // namespace + +float SqrtFastApproximation(float f) { + // TODO(peah): Add fast approximate implementation. + return sqrtf(f); +} + +float Pow2Approximation(float p) { + // TODO(peah): Add fast approximate implementation. + return powf(2.f, p); +} + +float PowApproximation(float x, float p) { + return Pow2Approximation(p * FastLog2f(x)); +} + +float LogApproximation(float x) { + constexpr float kLogOf2 = 0.69314718056f; + return FastLog2f(x) * kLogOf2; +} + +void LogApproximation(rtc::ArrayView x, rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + y[k] = LogApproximation(x[k]); + } +} + +float ExpApproximation(float x) { + constexpr float kLog10Ofe = 0.4342944819f; + return PowApproximation(10.f, x * kLog10Ofe); +} + +void ExpApproximation(rtc::ArrayView x, rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + y[k] = ExpApproximation(x[k]); + } +} + +void ExpApproximationSignFlip(rtc::ArrayView x, + rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + y[k] = ExpApproximation(-x[k]); + } +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/fast_math.h b/VocieProcess/modules/audio_processing/ns/fast_math.h new file mode 100644 index 0000000..0aefee9 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/fast_math.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ +#define MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ + +#include "api/array_view.h" + +namespace webrtc { + +// Sqrt approximation. +float SqrtFastApproximation(float f); + +// Log base conversion log(x) = log2(x)/log2(e). +float LogApproximation(float x); +void LogApproximation(rtc::ArrayView x, rtc::ArrayView y); + +// 2^x approximation. +float Pow2Approximation(float p); + +// x^p approximation. +float PowApproximation(float x, float p); + +// e^x approximation. +float ExpApproximation(float x); +void ExpApproximation(rtc::ArrayView x, rtc::ArrayView y); +void ExpApproximationSignFlip(rtc::ArrayView x, + rtc::ArrayView y); +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ diff --git a/VocieProcess/modules/audio_processing/ns/histograms.cc b/VocieProcess/modules/audio_processing/ns/histograms.cc new file mode 100644 index 0000000..1d4f459 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/histograms.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/histograms.h" + +namespace webrtc { + +Histograms::Histograms() { + Clear(); +} + +void Histograms::Clear() { + lrt_.fill(0); + spectral_flatness_.fill(0); + spectral_diff_.fill(0); +} + +void Histograms::Update(const SignalModel& features_) { + // Update the histogram for the LRT. + constexpr float kOneByBinSizeLrt = 1.f / kBinSizeLrt; + if (features_.lrt < kHistogramSize * kBinSizeLrt && features_.lrt >= 0.f) { + ++lrt_[kOneByBinSizeLrt * features_.lrt]; + } + + // Update histogram for the spectral flatness. + constexpr float kOneByBinSizeSpecFlat = 1.f / kBinSizeSpecFlat; + if (features_.spectral_flatness < kHistogramSize * kBinSizeSpecFlat && + features_.spectral_flatness >= 0.f) { + ++spectral_flatness_[features_.spectral_flatness * kOneByBinSizeSpecFlat]; + } + + // Update histogram for the spectral difference. + constexpr float kOneByBinSizeSpecDiff = 1.f / kBinSizeSpecDiff; + if (features_.spectral_diff < kHistogramSize * kBinSizeSpecDiff && + features_.spectral_diff >= 0.f) { + ++spectral_diff_[features_.spectral_diff * kOneByBinSizeSpecDiff]; + } +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/histograms.h b/VocieProcess/modules/audio_processing/ns/histograms.h new file mode 100644 index 0000000..9640e74 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/histograms.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ +#define MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/signal_model.h" + +namespace webrtc { + +constexpr int kHistogramSize = 1000; + +// Class for handling the updating of histograms. +class Histograms { + public: + Histograms(); + Histograms(const Histograms&) = delete; + Histograms& operator=(const Histograms&) = delete; + + // Clears the histograms. + void Clear(); + + // Extracts thresholds for feature parameters and updates the corresponding + // histogram. + void Update(const SignalModel& features_); + + // Methods for accessing the histograms. + rtc::ArrayView get_lrt() const { return lrt_; } + rtc::ArrayView get_spectral_flatness() const { + return spectral_flatness_; + } + rtc::ArrayView get_spectral_diff() const { + return spectral_diff_; + } + + private: + std::array lrt_; + std::array spectral_flatness_; + std::array spectral_diff_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ diff --git a/VocieProcess/modules/audio_processing/ns/noise_estimator.cc b/VocieProcess/modules/audio_processing/ns/noise_estimator.cc new file mode 100644 index 0000000..5367545 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/noise_estimator.cc @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_estimator.h" + +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Log(i). +constexpr std::array log_table = { + 0.f, 0.f, 0.f, 0.f, 0.f, 1.609438f, 1.791759f, + 1.945910f, 2.079442f, 2.197225f, 2.302585f, 2.397895f, 2.484907f, 2.564949f, + 2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f, + 3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f, + 3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f, + 3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f, + 3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f, + 3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f, + 4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f, + 4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f, + 4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f, + 4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f, + 4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f, + 4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f, + 4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f, + 4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f, + 4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f, + 4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f, + 4.836282f, 4.844187f, 4.852030f}; + +} // namespace + +NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params) + : suppression_params_(suppression_params) { + noise_spectrum_.fill(0.f); + prev_noise_spectrum_.fill(0.f); + conservative_noise_spectrum_.fill(0.f); + parametric_noise_spectrum_.fill(0.f); +} + +void NoiseEstimator::PrepareAnalysis() { + std::copy(noise_spectrum_.begin(), noise_spectrum_.end(), + prev_noise_spectrum_.begin()); +} + +void NoiseEstimator::PreUpdate( + int32_t num_analyzed_frames, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum) { + quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_); + + if (num_analyzed_frames < kShortStartupPhaseBlocks) { + // Compute simplified noise model during startup. + const size_t kStartBand = 5; + float sum_log_i_log_magn = 0.f; + float sum_log_i = 0.f; + float sum_log_i_square = 0.f; + float sum_log_magn = 0.f; + for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) { + float log_i = log_table[i]; + sum_log_i += log_i; + sum_log_i_square += log_i * log_i; + float log_signal = LogApproximation(signal_spectrum[i]); + sum_log_magn += log_signal; + sum_log_i_log_magn += log_i * log_signal; + } + + // Estimate the parameter for the level of the white noise. + constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; + white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 * + suppression_params_.over_subtraction_factor; + + // Estimate pink noise parameters. + float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) - + sum_log_i * sum_log_i; + float num = + sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn; + RTC_DCHECK_NE(denom, 0.f); + float pink_noise_adjustment = num / denom; + + // Constrain the estimated spectrum to be positive. + pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f); + pink_noise_numerator_ += pink_noise_adjustment; + num = sum_log_i * sum_log_magn - + (kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn; + RTC_DCHECK_NE(denom, 0.f); + pink_noise_adjustment = num / denom; + + // Constrain the pink noise power to be in the interval [0, 1]. + pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f); + + pink_noise_exp_ += pink_noise_adjustment; + + const float one_by_num_analyzed_frames_plus_1 = + 1.f / (num_analyzed_frames + 1.f); + + // Calculate the frequency-independent parts of parametric noise estimate. + float parametric_exp = 0.f; + float parametric_num = 0.f; + if (pink_noise_exp_ > 0.f) { + // Use pink noise estimate. + parametric_num = ExpApproximation(pink_noise_numerator_ * + one_by_num_analyzed_frames_plus_1); + parametric_num *= num_analyzed_frames + 1.f; + parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1; + } + + constexpr float kOneByShortStartupPhaseBlocks = + 1.f / kShortStartupPhaseBlocks; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Estimate the background noise using the white and pink noise + // parameters. + if (pink_noise_exp_ == 0.f) { + // Use white noise estimate. + parametric_noise_spectrum_[i] = white_noise_level_; + } else { + // Use pink noise estimate. + float use_band = i < kStartBand ? kStartBand : i; + float denom = PowApproximation(use_band, parametric_exp); + RTC_DCHECK_NE(denom, 0.f); + parametric_noise_spectrum_[i] = parametric_num / denom; + } + } + + // Weight quantile noise with modeled noise. + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + noise_spectrum_[i] *= num_analyzed_frames; + float tmp = parametric_noise_spectrum_[i] * + (kShortStartupPhaseBlocks - num_analyzed_frames); + noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1; + noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks; + } + } +} + +void NoiseEstimator::PostUpdate( + rtc::ArrayView speech_probability, + rtc::ArrayView signal_spectrum) { + // Time-avg parameter for noise_spectrum update. + constexpr float kNoiseUpdate = 0.9f; + + float gamma = kNoiseUpdate; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + const float prob_speech = speech_probability[i]; + const float prob_non_speech = 1.f - prob_speech; + + // Temporary noise update used for speech frames if update value is less + // than previous. + float noise_update_tmp = + gamma * prev_noise_spectrum_[i] + + (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + + prob_speech * prev_noise_spectrum_[i]); + + // Time-constant based on speech/noise_spectrum state. + float gamma_old = gamma; + + // Increase gamma for frame likely to be seech. + constexpr float kProbRange = .2f; + gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate; + + // Conservative noise_spectrum update. + if (prob_speech < kProbRange) { + conservative_noise_spectrum_[i] += + 0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]); + } + + // Noise_spectrum update. + if (gamma == gamma_old) { + noise_spectrum_[i] = noise_update_tmp; + } else { + noise_spectrum_[i] = + gamma * prev_noise_spectrum_[i] + + (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + + prob_speech * prev_noise_spectrum_[i]); + // Allow for noise_spectrum update downwards: If noise_spectrum update + // decreases the noise_spectrum, it is safe, so allow it to happen. + noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp); + } + } +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/noise_estimator.h b/VocieProcess/modules/audio_processing/ns/noise_estimator.h new file mode 100644 index 0000000..0c0466a --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/noise_estimator.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/quantile_noise_estimator.h" +#include "modules/audio_processing/ns/suppression_params.h" + +namespace webrtc { + +// Class for estimating the spectral characteristics of the noise in an incoming +// signal. +class NoiseEstimator { + public: + explicit NoiseEstimator(const SuppressionParams& suppression_params); + + // Prepare the estimator for analysis of a new frame. + void PrepareAnalysis(); + + // Performs the first step of the estimator update. + void PreUpdate(int32_t num_analyzed_frames, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum); + + // Performs the second step of the estimator update. + void PostUpdate( + rtc::ArrayView speech_probability, + rtc::ArrayView signal_spectrum); + + // Returns the noise spectral estimate. + rtc::ArrayView get_noise_spectrum() const { + return noise_spectrum_; + } + + // Returns the noise from the previous frame. + rtc::ArrayView get_prev_noise_spectrum() + const { + return prev_noise_spectrum_; + } + + // Returns a noise spectral estimate based on white and pink noise parameters. + rtc::ArrayView get_parametric_noise_spectrum() + const { + return parametric_noise_spectrum_; + } + rtc::ArrayView + get_conservative_noise_spectrum() const { + return conservative_noise_spectrum_; + } + + private: + const SuppressionParams& suppression_params_; + float white_noise_level_ = 0.f; + float pink_noise_numerator_ = 0.f; + float pink_noise_exp_ = 0.f; + std::array prev_noise_spectrum_; + std::array conservative_noise_spectrum_; + std::array parametric_noise_spectrum_; + std::array noise_spectrum_; + QuantileNoiseEstimator quantile_noise_estimator_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ diff --git a/VocieProcess/modules/audio_processing/ns/noise_suppressor.cc b/VocieProcess/modules/audio_processing/ns/noise_suppressor.cc new file mode 100644 index 0000000..7c524da --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/noise_suppressor.cc @@ -0,0 +1,556 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_suppressor.h" + +#include +#include +#include + +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Maps sample rate to number of bands. +size_t NumBandsForRate(size_t sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000 || + sample_rate_hz == 48000); + return sample_rate_hz / 16000; +} + +// Maximum number of channels for which the channel data is stored on +// the stack. If the number of channels are larger than this, they are stored +// using scratch memory that is pre-allocated on the heap. The reason for this +// partitioning is not to waste heap space for handling the more common numbers +// of channels, while at the same time not limiting the support for higher +// numbers of channels by enforcing the channel data to be stored on the +// stack using a fixed maximum value. +constexpr size_t kMaxNumChannelsOnStack = 2; + +// Chooses the number of channels to store on the heap when that is required due +// to the number of channels being larger than the pre-defined number +// of channels to store on the stack. +size_t NumChannelsOnHeap(size_t num_channels) { + return num_channels > kMaxNumChannelsOnStack ? num_channels : 0; +} + +// Hybrib Hanning and flat window for the filterbank. +constexpr std::array kBlocks160w256FirstHalf = { + 0.00000000f, 0.01636173f, 0.03271908f, 0.04906767f, 0.06540313f, + 0.08172107f, 0.09801714f, 0.11428696f, 0.13052619f, 0.14673047f, + 0.16289547f, 0.17901686f, 0.19509032f, 0.21111155f, 0.22707626f, + 0.24298018f, 0.25881905f, 0.27458862f, 0.29028468f, 0.30590302f, + 0.32143947f, 0.33688985f, 0.35225005f, 0.36751594f, 0.38268343f, + 0.39774847f, 0.41270703f, 0.42755509f, 0.44228869f, 0.45690388f, + 0.47139674f, 0.48576339f, 0.50000000f, 0.51410274f, 0.52806785f, + 0.54189158f, 0.55557023f, 0.56910015f, 0.58247770f, 0.59569930f, + 0.60876143f, 0.62166057f, 0.63439328f, 0.64695615f, 0.65934582f, + 0.67155895f, 0.68359230f, 0.69544264f, 0.70710678f, 0.71858162f, + 0.72986407f, 0.74095113f, 0.75183981f, 0.76252720f, 0.77301045f, + 0.78328675f, 0.79335334f, 0.80320753f, 0.81284668f, 0.82226822f, + 0.83146961f, 0.84044840f, 0.84920218f, 0.85772861f, 0.86602540f, + 0.87409034f, 0.88192126f, 0.88951608f, 0.89687274f, 0.90398929f, + 0.91086382f, 0.91749450f, 0.92387953f, 0.93001722f, 0.93590593f, + 0.94154407f, 0.94693013f, 0.95206268f, 0.95694034f, 0.96156180f, + 0.96592583f, 0.97003125f, 0.97387698f, 0.97746197f, 0.98078528f, + 0.98384601f, 0.98664333f, 0.98917651f, 0.99144486f, 0.99344778f, + 0.99518473f, 0.99665524f, 0.99785892f, 0.99879546f, 0.99946459f, + 0.99986614f}; + +// Applies the filterbank window to a buffer. +void ApplyFilterBankWindow(rtc::ArrayView x) { + for (size_t i = 0; i < 96; ++i) { + x[i] = kBlocks160w256FirstHalf[i] * x[i]; + } + + for (size_t i = 161, k = 95; i < kFftSize; ++i, --k) { + RTC_DCHECK_NE(0, k); + x[i] = kBlocks160w256FirstHalf[k] * x[i]; + } +} + +// Extends a frame with previous data. +void FormExtendedFrame(rtc::ArrayView frame, + rtc::ArrayView old_data, + rtc::ArrayView extended_frame) { + std::copy(old_data.begin(), old_data.end(), extended_frame.begin()); + std::copy(frame.begin(), frame.end(), + extended_frame.begin() + old_data.size()); + std::copy(extended_frame.end() - old_data.size(), extended_frame.end(), + old_data.begin()); +} + +// Uses overlap-and-add to produce an output frame. +void OverlapAndAdd(rtc::ArrayView extended_frame, + rtc::ArrayView overlap_memory, + rtc::ArrayView output_frame) { + for (size_t i = 0; i < kOverlapSize; ++i) { + output_frame[i] = overlap_memory[i] + extended_frame[i]; + } + std::copy(extended_frame.begin() + kOverlapSize, + extended_frame.begin() + kNsFrameSize, + output_frame.begin() + kOverlapSize); + std::copy(extended_frame.begin() + kNsFrameSize, extended_frame.end(), + overlap_memory.begin()); +} + +// Produces a delayed frame. +void DelaySignal(rtc::ArrayView frame, + rtc::ArrayView delay_buffer, + rtc::ArrayView delayed_frame) { + constexpr size_t kSamplesFromFrame = kNsFrameSize - (kFftSize - kNsFrameSize); + std::copy(delay_buffer.begin(), delay_buffer.end(), delayed_frame.begin()); + std::copy(frame.begin(), frame.begin() + kSamplesFromFrame, + delayed_frame.begin() + delay_buffer.size()); + + std::copy(frame.begin() + kSamplesFromFrame, frame.end(), + delay_buffer.begin()); +} + +// Computes the energy of an extended frame. +float ComputeEnergyOfExtendedFrame(rtc::ArrayView x) { + float energy = 0.f; + for (float x_k : x) { + energy += x_k * x_k; + } + + return energy; +} + +// Computes the energy of an extended frame based on its subcomponents. +float ComputeEnergyOfExtendedFrame( + rtc::ArrayView frame, + rtc::ArrayView old_data) { + float energy = 0.f; + for (float v : old_data) { + energy += v * v; + } + for (float v : frame) { + energy += v * v; + } + + return energy; +} + +// Computes the magnitude spectrum based on an FFT output. +void ComputeMagnitudeSpectrum( + rtc::ArrayView real, + rtc::ArrayView imag, + rtc::ArrayView signal_spectrum) { + signal_spectrum[0] = fabsf(real[0]) + 1.f; + signal_spectrum[kFftSizeBy2Plus1 - 1] = + fabsf(real[kFftSizeBy2Plus1 - 1]) + 1.f; + + for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { + signal_spectrum[i] = + SqrtFastApproximation(real[i] * real[i] + imag[i] * imag[i]) + 1.f; + } +} + +// Compute prior and post SNR. +void ComputeSnr(rtc::ArrayView filter, + rtc::ArrayView prev_signal_spectrum, + rtc::ArrayView signal_spectrum, + rtc::ArrayView prev_noise_spectrum, + rtc::ArrayView noise_spectrum, + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr) { + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Previous post SNR. + // Previous estimate: based on previous frame with gain filter. + float prev_estimate = prev_signal_spectrum[i] / + (prev_noise_spectrum[i] + 0.0001f) * filter[i]; + // Post SNR. + if (signal_spectrum[i] > noise_spectrum[i]) { + post_snr[i] = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f; + } else { + post_snr[i] = 0.f; + } + // The directed decision estimate of the prior SNR is a sum the current and + // previous estimates. + prior_snr[i] = 0.98f * prev_estimate + (1.f - 0.98f) * post_snr[i]; + } +} + +// Computes the attenuating gain for the noise suppression of the upper bands. +float ComputeUpperBandsGain( + float minimum_attenuating_gain, + rtc::ArrayView filter, + rtc::ArrayView speech_probability, + rtc::ArrayView prev_analysis_signal_spectrum, + rtc::ArrayView signal_spectrum) { + // Average speech prob and filter gain for the end of the lowest band. + constexpr int kNumAvgBins = 32; + constexpr float kOneByNumAvgBins = 1.f / kNumAvgBins; + + float avg_prob_speech = 0.f; + float avg_filter_gain = 0.f; + for (size_t i = kFftSizeBy2Plus1 - kNumAvgBins - 1; i < kFftSizeBy2Plus1 - 1; + i++) { + avg_prob_speech += speech_probability[i]; + avg_filter_gain += filter[i]; + } + avg_prob_speech = avg_prob_speech * kOneByNumAvgBins; + avg_filter_gain = avg_filter_gain * kOneByNumAvgBins; + + // If the speech was suppressed by a component between Analyze and Process, an + // example being by an AEC, it should not be considered speech for the purpose + // of high band suppression. To that end, the speech probability is scaled + // accordingly. + float sum_analysis_spectrum = 0.f; + float sum_processing_spectrum = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + sum_analysis_spectrum += prev_analysis_signal_spectrum[i]; + sum_processing_spectrum += signal_spectrum[i]; + } + + // The magnitude spectrum computation enforces the spectrum to be strictly + // positive. + RTC_DCHECK_GT(sum_analysis_spectrum, 0.f); + avg_prob_speech *= sum_processing_spectrum / sum_analysis_spectrum; + + // Compute gain based on speech probability. + float gain = + 0.5f * (1.f + static_cast(tanh(2.f * avg_prob_speech - 1.f))); + + // Combine gain with low band gain. + if (avg_prob_speech >= 0.5f) { + gain = 0.25f * gain + 0.75f * avg_filter_gain; + } else { + gain = 0.5f * gain + 0.5f * avg_filter_gain; + } + + // Make sure gain is within flooring range. + return std::min(std::max(gain, minimum_attenuating_gain), 1.f); +} + +} // namespace + +NoiseSuppressor::ChannelState::ChannelState( + const SuppressionParams& suppression_params, + size_t num_bands) + : wiener_filter(suppression_params), + noise_estimator(suppression_params), + process_delay_memory(num_bands > 1 ? num_bands - 1 : 0) { + analyze_analysis_memory.fill(0.f); + prev_analysis_signal_spectrum.fill(1.f); + process_analysis_memory.fill(0.f); + process_synthesis_memory.fill(0.f); + for (auto& d : process_delay_memory) { + d.fill(0.f); + } +} + +NoiseSuppressor::NoiseSuppressor(const NsConfig& config, + size_t sample_rate_hz, + size_t num_channels) + : num_bands_(NumBandsForRate(sample_rate_hz)), + num_channels_(num_channels), + suppression_params_(config.target_level), + filter_bank_states_heap_(NumChannelsOnHeap(num_channels_)), + upper_band_gains_heap_(NumChannelsOnHeap(num_channels_)), + energies_before_filtering_heap_(NumChannelsOnHeap(num_channels_)), + gain_adjustments_heap_(NumChannelsOnHeap(num_channels_)), + channels_(num_channels_) { + for (size_t ch = 0; ch < num_channels_; ++ch) { + channels_[ch] = + std::make_unique(suppression_params_, num_bands_); + } +} + +void NoiseSuppressor::AggregateWienerFilters( + rtc::ArrayView filter) const { + rtc::ArrayView filter0 = + channels_[0]->wiener_filter.get_filter(); + std::copy(filter0.begin(), filter0.end(), filter.begin()); + + for (size_t ch = 1; ch < num_channels_; ++ch) { + rtc::ArrayView filter_ch = + channels_[ch]->wiener_filter.get_filter(); + + for (size_t k = 0; k < kFftSizeBy2Plus1; ++k) { + filter[k] = std::min(filter[k], filter_ch[k]); + } + } +} + +void NoiseSuppressor::Analyze(const AudioBuffer& audio) { + // Prepare the noise estimator for the analysis stage. + for (size_t ch = 0; ch < num_channels_; ++ch) { + channels_[ch]->noise_estimator.PrepareAnalysis(); + } + + // Check for zero frames. + bool zero_frame = true; + for (size_t ch = 0; ch < num_channels_; ++ch) { + rtc::ArrayView y_band0( + &audio.split_bands_const(ch)[0][0], kNsFrameSize); + float energy = ComputeEnergyOfExtendedFrame( + y_band0, channels_[ch]->analyze_analysis_memory); + if (energy > 0.f) { + zero_frame = false; + break; + } + } + + if (zero_frame) { + // We want to avoid updating statistics in this case: + // Updating feature statistics when we have zeros only will cause + // thresholds to move towards zero signal situations. This in turn has the + // effect that once the signal is "turned on" (non-zero values) everything + // will be treated as speech and there is no noise suppression effect. + // Depending on the duration of the inactive signal it takes a + // considerable amount of time for the system to learn what is noise and + // what is speech. + return; + } + + // Only update analysis counter for frames that are properly analyzed. + if (++num_analyzed_frames_ < 0) { + num_analyzed_frames_ = 0; + } + + // Analyze all channels. + for (size_t ch = 0; ch < num_channels_; ++ch) { + std::unique_ptr& ch_p = channels_[ch]; + rtc::ArrayView y_band0( + &audio.split_bands_const(ch)[0][0], kNsFrameSize); + + // Form an extended frame and apply analysis filter bank windowing. + std::array extended_frame; + FormExtendedFrame(y_band0, ch_p->analyze_analysis_memory, extended_frame); + ApplyFilterBankWindow(extended_frame); + + // Compute the magnitude spectrum. + std::array real; + std::array imag; + fft_.Fft(extended_frame, real, imag); + + std::array signal_spectrum; + ComputeMagnitudeSpectrum(real, imag, signal_spectrum); + + // Compute energies. + float signal_energy = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + signal_energy += real[i] * real[i] + imag[i] * imag[i]; + } + signal_energy /= kFftSizeBy2Plus1; + + float signal_spectral_sum = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + signal_spectral_sum += signal_spectrum[i]; + } + + // Estimate the noise spectra and the probability estimates of speech + // presence. + ch_p->noise_estimator.PreUpdate(num_analyzed_frames_, signal_spectrum, + signal_spectral_sum); + + std::array post_snr; + std::array prior_snr; + ComputeSnr(ch_p->wiener_filter.get_filter(), + ch_p->prev_analysis_signal_spectrum, signal_spectrum, + ch_p->noise_estimator.get_prev_noise_spectrum(), + ch_p->noise_estimator.get_noise_spectrum(), prior_snr, post_snr); + + ch_p->speech_probability_estimator.Update( + num_analyzed_frames_, prior_snr, post_snr, + ch_p->noise_estimator.get_conservative_noise_spectrum(), + signal_spectrum, signal_spectral_sum, signal_energy); + + ch_p->noise_estimator.PostUpdate( + ch_p->speech_probability_estimator.get_probability(), signal_spectrum); + + // Store the magnitude spectrum to make it avalilable for the process + // method. + std::copy(signal_spectrum.begin(), signal_spectrum.end(), + ch_p->prev_analysis_signal_spectrum.begin()); + } +} + +void NoiseSuppressor::Process(AudioBuffer* audio) { + // Select the space for storing data during the processing. + std::array filter_bank_states_stack; + rtc::ArrayView filter_bank_states( + filter_bank_states_stack.data(), num_channels_); + std::array upper_band_gains_stack; + rtc::ArrayView upper_band_gains(upper_band_gains_stack.data(), + num_channels_); + std::array energies_before_filtering_stack; + rtc::ArrayView energies_before_filtering( + energies_before_filtering_stack.data(), num_channels_); + std::array gain_adjustments_stack; + rtc::ArrayView gain_adjustments(gain_adjustments_stack.data(), + num_channels_); + if (NumChannelsOnHeap(num_channels_) > 0) { + // If the stack-allocated space is too small, use the heap for storing the + // data. + filter_bank_states = rtc::ArrayView( + filter_bank_states_heap_.data(), num_channels_); + upper_band_gains = + rtc::ArrayView(upper_band_gains_heap_.data(), num_channels_); + energies_before_filtering = rtc::ArrayView( + energies_before_filtering_heap_.data(), num_channels_); + gain_adjustments = + rtc::ArrayView(gain_adjustments_heap_.data(), num_channels_); + } + + // Compute the suppression filters for all channels. + for (size_t ch = 0; ch < num_channels_; ++ch) { + // Form an extended frame and apply analysis filter bank windowing. + rtc::ArrayView y_band0(&audio->split_bands(ch)[0][0], + kNsFrameSize); + + FormExtendedFrame(y_band0, channels_[ch]->process_analysis_memory, + filter_bank_states[ch].extended_frame); + + ApplyFilterBankWindow(filter_bank_states[ch].extended_frame); + + energies_before_filtering[ch] = + ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame); + + // Perform filter bank analysis and compute the magnitude spectrum. + fft_.Fft(filter_bank_states[ch].extended_frame, filter_bank_states[ch].real, + filter_bank_states[ch].imag); + + std::array signal_spectrum; + ComputeMagnitudeSpectrum(filter_bank_states[ch].real, + filter_bank_states[ch].imag, signal_spectrum); + + // Compute the frequency domain gain filter for noise attenuation. + channels_[ch]->wiener_filter.Update( + num_analyzed_frames_, + channels_[ch]->noise_estimator.get_noise_spectrum(), + channels_[ch]->noise_estimator.get_prev_noise_spectrum(), + channels_[ch]->noise_estimator.get_parametric_noise_spectrum(), + signal_spectrum); + + if (num_bands_ > 1) { + // Compute the time-domain gain for attenuating the noise in the upper + // bands. + + upper_band_gains[ch] = ComputeUpperBandsGain( + suppression_params_.minimum_attenuating_gain, + channels_[ch]->wiener_filter.get_filter(), + channels_[ch]->speech_probability_estimator.get_probability(), + channels_[ch]->prev_analysis_signal_spectrum, signal_spectrum); + } + } + + // Only do the below processing if the output of the audio processing module + // is used. + if (!capture_output_used_) { + return; + } + + // Aggregate the Wiener filters for all channels. + std::array filter_data; + rtc::ArrayView filter = filter_data; + if (num_channels_ == 1) { + filter = channels_[0]->wiener_filter.get_filter(); + } else { + AggregateWienerFilters(filter_data); + } + + for (size_t ch = 0; ch < num_channels_; ++ch) { + // Apply the filter to the lower band. + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + filter_bank_states[ch].real[i] *= filter[i]; + filter_bank_states[ch].imag[i] *= filter[i]; + } + } + + // Perform filter bank synthesis + for (size_t ch = 0; ch < num_channels_; ++ch) { + fft_.Ifft(filter_bank_states[ch].real, filter_bank_states[ch].imag, + filter_bank_states[ch].extended_frame); + } + + for (size_t ch = 0; ch < num_channels_; ++ch) { + const float energy_after_filtering = + ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame); + + // Apply synthesis window. + ApplyFilterBankWindow(filter_bank_states[ch].extended_frame); + + // Compute the adjustment of the noise attenuation filter based on the + // effect of the attenuation. + gain_adjustments[ch] = + channels_[ch]->wiener_filter.ComputeOverallScalingFactor( + num_analyzed_frames_, + channels_[ch]->speech_probability_estimator.get_prior_probability(), + energies_before_filtering[ch], energy_after_filtering); + } + + // Select and apply adjustment of the noise attenuation filter based on the + // effect of the attenuation. + float gain_adjustment = gain_adjustments[0]; + for (size_t ch = 1; ch < num_channels_; ++ch) { + gain_adjustment = std::min(gain_adjustment, gain_adjustments[ch]); + } + for (size_t ch = 0; ch < num_channels_; ++ch) { + for (size_t i = 0; i < kFftSize; ++i) { + filter_bank_states[ch].extended_frame[i] = + gain_adjustment * filter_bank_states[ch].extended_frame[i]; + } + } + + // Use overlap-and-add to form the output frame of the lowest band. + for (size_t ch = 0; ch < num_channels_; ++ch) { + rtc::ArrayView y_band0(&audio->split_bands(ch)[0][0], + kNsFrameSize); + OverlapAndAdd(filter_bank_states[ch].extended_frame, + channels_[ch]->process_synthesis_memory, y_band0); + } + + if (num_bands_ > 1) { + // Select the noise attenuating gain to apply to the upper band. + float upper_band_gain = upper_band_gains[0]; + for (size_t ch = 1; ch < num_channels_; ++ch) { + upper_band_gain = std::min(upper_band_gain, upper_band_gains[ch]); + } + + // Process the upper bands. + for (size_t ch = 0; ch < num_channels_; ++ch) { + for (size_t b = 1; b < num_bands_; ++b) { + // Delay the upper bands to match the delay of the filterbank applied to + // the lowest band. + rtc::ArrayView y_band( + &audio->split_bands(ch)[b][0], kNsFrameSize); + std::array delayed_frame; + DelaySignal(y_band, channels_[ch]->process_delay_memory[b - 1], + delayed_frame); + + // Apply the time-domain noise-attenuating gain. + for (size_t j = 0; j < kNsFrameSize; j++) { + y_band[j] = upper_band_gain * delayed_frame[j]; + } + } + } + } + + // Limit the output the allowed range. + for (size_t ch = 0; ch < num_channels_; ++ch) { + for (size_t b = 0; b < num_bands_; ++b) { + rtc::ArrayView y_band(&audio->split_bands(ch)[b][0], + kNsFrameSize); + for (size_t j = 0; j < kNsFrameSize; j++) { + y_band[j] = std::min(std::max(y_band[j], -32768.f), 32767.f); + } + } + } +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/noise_suppressor.h b/VocieProcess/modules/audio_processing/ns/noise_suppressor.h new file mode 100644 index 0000000..1e321cf --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/noise_suppressor.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/ns/noise_estimator.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/ns_config.h" +#include "modules/audio_processing/ns/ns_fft.h" +#include "modules/audio_processing/ns/speech_probability_estimator.h" +#include "modules/audio_processing/ns/wiener_filter.h" + +namespace webrtc { + +// Class for suppressing noise in a signal. +class NoiseSuppressor { + public: + NoiseSuppressor(const NsConfig& config, + size_t sample_rate_hz, + size_t num_channels); + NoiseSuppressor(const NoiseSuppressor&) = delete; + NoiseSuppressor& operator=(const NoiseSuppressor&) = delete; + + // Analyses the signal (typically applied before the AEC to avoid analyzing + // any comfort noise signal). + void Analyze(const AudioBuffer& audio); + + // Applies noise suppression. + void Process(AudioBuffer* audio); + + // Specifies whether the capture output will be used. The purpose of this is + // to allow the noise suppressor to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + void SetCaptureOutputUsage(bool capture_output_used) { + capture_output_used_ = capture_output_used; + } + + private: + const size_t num_bands_; + const size_t num_channels_; + const SuppressionParams suppression_params_; + int32_t num_analyzed_frames_ = -1; + NrFft fft_; + bool capture_output_used_ = true; + + struct ChannelState { + ChannelState(const SuppressionParams& suppression_params, size_t num_bands); + + SpeechProbabilityEstimator speech_probability_estimator; + WienerFilter wiener_filter; + NoiseEstimator noise_estimator; + std::array prev_analysis_signal_spectrum; + std::array analyze_analysis_memory; + std::array process_analysis_memory; + std::array process_synthesis_memory; + std::vector> process_delay_memory; + }; + + struct FilterBankState { + std::array real; + std::array imag; + std::array extended_frame; + }; + + std::vector filter_bank_states_heap_; + std::vector upper_band_gains_heap_; + std::vector energies_before_filtering_heap_; + std::vector gain_adjustments_heap_; + std::vector> channels_; + + // Aggregates the Wiener filters into a single filter to use. + void AggregateWienerFilters( + rtc::ArrayView filter) const; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ diff --git a/VocieProcess/modules/audio_processing/ns/ns_common.h b/VocieProcess/modules/audio_processing/ns/ns_common.h new file mode 100644 index 0000000..d6149f7 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/ns_common.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ + +#include + +namespace webrtc { + +constexpr size_t kFftSize = 256; +constexpr size_t kFftSizeBy2Plus1 = kFftSize / 2 + 1; +constexpr size_t kNsFrameSize = 160; +constexpr size_t kOverlapSize = kFftSize - kNsFrameSize; + +constexpr int kShortStartupPhaseBlocks = 50; +constexpr int kLongStartupPhaseBlocks = 200; +constexpr int kFeatureUpdateWindowSize = 500; + +constexpr float kLtrFeatureThr = 0.5f; +constexpr float kBinSizeLrt = 0.1f; +constexpr float kBinSizeSpecFlat = 0.05f; +constexpr float kBinSizeSpecDiff = 0.1f; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ diff --git a/VocieProcess/modules/audio_processing/ns/ns_config.h b/VocieProcess/modules/audio_processing/ns/ns_config.h new file mode 100644 index 0000000..0a285e9 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/ns_config.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ + +namespace webrtc { + +// Config struct for the noise suppressor +struct NsConfig { + enum class SuppressionLevel { k6dB, k12dB, k18dB, k21dB }; + SuppressionLevel target_level = SuppressionLevel::k12dB; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ diff --git a/VocieProcess/modules/audio_processing/ns/ns_fft.cc b/VocieProcess/modules/audio_processing/ns/ns_fft.cc new file mode 100644 index 0000000..264c469 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/ns_fft.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/ns_fft.h" + +#include "common_audio/third_party/ooura/fft_size_256/fft4g.h" + +namespace webrtc { + +NrFft::NrFft() : bit_reversal_state_(kFftSize / 2), tables_(kFftSize / 2) { + // Initialize WebRtc_rdt (setting (bit_reversal_state_[0] to 0 triggers + // initialization) + bit_reversal_state_[0] = 0.f; + std::array tmp_buffer; + tmp_buffer.fill(0.f); + WebRtc_rdft(kFftSize, 1, tmp_buffer.data(), bit_reversal_state_.data(), + tables_.data()); +} + +void NrFft::Fft(rtc::ArrayView time_data, + rtc::ArrayView real, + rtc::ArrayView imag) { + WebRtc_rdft(kFftSize, 1, time_data.data(), bit_reversal_state_.data(), + tables_.data()); + + imag[0] = 0; + real[0] = time_data[0]; + + imag[kFftSizeBy2Plus1 - 1] = 0; + real[kFftSizeBy2Plus1 - 1] = time_data[1]; + + for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { + real[i] = time_data[2 * i]; + imag[i] = time_data[2 * i + 1]; + } +} + +void NrFft::Ifft(rtc::ArrayView real, + rtc::ArrayView imag, + rtc::ArrayView time_data) { + time_data[0] = real[0]; + time_data[1] = real[kFftSizeBy2Plus1 - 1]; + for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { + time_data[2 * i] = real[i]; + time_data[2 * i + 1] = imag[i]; + } + WebRtc_rdft(kFftSize, -1, time_data.data(), bit_reversal_state_.data(), + tables_.data()); + + // Scale the output + constexpr float kScaling = 2.f / kFftSize; + for (float& d : time_data) { + d *= kScaling; + } +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/ns_fft.h b/VocieProcess/modules/audio_processing/ns/ns_fft.h new file mode 100644 index 0000000..539251e --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/ns_fft.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" + +namespace webrtc { + +// Wrapper class providing 256 point FFT functionality. +class NrFft { + public: + NrFft(); + NrFft(const NrFft&) = delete; + NrFft& operator=(const NrFft&) = delete; + + // Transforms the signal from time to frequency domain. + void Fft(rtc::ArrayView time_data, + rtc::ArrayView real, + rtc::ArrayView imag); + + // Transforms the signal from frequency to time domain. + void Ifft(rtc::ArrayView real, + rtc::ArrayView imag, + rtc::ArrayView time_data); + + private: + std::vector bit_reversal_state_; + std::vector tables_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ diff --git a/VocieProcess/modules/audio_processing/ns/prior_signal_model.cc b/VocieProcess/modules/audio_processing/ns/prior_signal_model.cc new file mode 100644 index 0000000..f25a1e2 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/prior_signal_model.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/prior_signal_model.h" + +namespace webrtc { + +PriorSignalModel::PriorSignalModel(float lrt_initial_value) + : lrt(lrt_initial_value) {} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/prior_signal_model.h b/VocieProcess/modules/audio_processing/ns/prior_signal_model.h new file mode 100644 index 0000000..dcfa7ea --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/prior_signal_model.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ +#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ + +namespace webrtc { + +// Struct for storing the prior signal model parameters. +struct PriorSignalModel { + explicit PriorSignalModel(float lrt_initial_value); + PriorSignalModel(const PriorSignalModel&) = delete; + PriorSignalModel& operator=(const PriorSignalModel&) = delete; + + float lrt; + float flatness_threshold = .5f; + float template_diff_threshold = .5f; + float lrt_weighting = 1.f; + float flatness_weighting = 0.f; + float difference_weighting = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ diff --git a/VocieProcess/modules/audio_processing/ns/prior_signal_model_estimator.cc b/VocieProcess/modules/audio_processing/ns/prior_signal_model_estimator.cc new file mode 100644 index 0000000..f77dcd6 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/prior_signal_model_estimator.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/prior_signal_model_estimator.h" + +#include + +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Identifies the first of the two largest peaks in the histogram. +void FindFirstOfTwoLargestPeaks( + float bin_size, + rtc::ArrayView spectral_flatness, + float* peak_position, + int* peak_weight) { + RTC_DCHECK(peak_position); + RTC_DCHECK(peak_weight); + + int peak_value = 0; + int secondary_peak_value = 0; + *peak_position = 0.f; + float secondary_peak_position = 0.f; + *peak_weight = 0; + int secondary_peak_weight = 0; + + // Identify the two largest peaks. + for (int i = 0; i < kHistogramSize; ++i) { + const float bin_mid = (i + 0.5f) * bin_size; + if (spectral_flatness[i] > peak_value) { + // Found new "first" peak candidate. + secondary_peak_value = peak_value; + secondary_peak_weight = *peak_weight; + secondary_peak_position = *peak_position; + + peak_value = spectral_flatness[i]; + *peak_weight = spectral_flatness[i]; + *peak_position = bin_mid; + } else if (spectral_flatness[i] > secondary_peak_value) { + // Found new "second" peak candidate. + secondary_peak_value = spectral_flatness[i]; + secondary_peak_weight = spectral_flatness[i]; + secondary_peak_position = bin_mid; + } + } + + // Merge the peaks if they are close. + if ((fabs(secondary_peak_position - *peak_position) < 2 * bin_size) && + (secondary_peak_weight > 0.5f * (*peak_weight))) { + *peak_weight += secondary_peak_weight; + *peak_position = 0.5f * (*peak_position + secondary_peak_position); + } +} + +void UpdateLrt(rtc::ArrayView lrt_histogram, + float* prior_model_lrt, + bool* low_lrt_fluctuations) { + RTC_DCHECK(prior_model_lrt); + RTC_DCHECK(low_lrt_fluctuations); + + float average = 0.f; + float average_compl = 0.f; + float average_squared = 0.f; + int count = 0; + + for (int i = 0; i < 10; ++i) { + float bin_mid = (i + 0.5f) * kBinSizeLrt; + average += lrt_histogram[i] * bin_mid; + count += lrt_histogram[i]; + } + if (count > 0) { + average = average / count; + } + + for (int i = 0; i < kHistogramSize; ++i) { + float bin_mid = (i + 0.5f) * kBinSizeLrt; + average_squared += lrt_histogram[i] * bin_mid * bin_mid; + average_compl += lrt_histogram[i] * bin_mid; + } + constexpr float kOneFeatureUpdateWindowSize = 1.f / kFeatureUpdateWindowSize; + average_squared = average_squared * kOneFeatureUpdateWindowSize; + average_compl = average_compl * kOneFeatureUpdateWindowSize; + + // Fluctuation limit of LRT feature. + *low_lrt_fluctuations = average_squared - average * average_compl < 0.05f; + + // Get threshold for LRT feature. + constexpr float kMaxLrt = 1.f; + constexpr float kMinLrt = .2f; + if (*low_lrt_fluctuations) { + // Very low fluctuation, so likely noise. + *prior_model_lrt = kMaxLrt; + } else { + *prior_model_lrt = std::min(kMaxLrt, std::max(kMinLrt, 1.2f * average)); + } +} + +} // namespace + +PriorSignalModelEstimator::PriorSignalModelEstimator(float lrt_initial_value) + : prior_model_(lrt_initial_value) {} + +// Extract thresholds for feature parameters and computes the threshold/weights. +void PriorSignalModelEstimator::Update(const Histograms& histograms) { + bool low_lrt_fluctuations; + UpdateLrt(histograms.get_lrt(), &prior_model_.lrt, &low_lrt_fluctuations); + + // For spectral flatness and spectral difference: compute the main peaks of + // the histograms. + float spectral_flatness_peak_position; + int spectral_flatness_peak_weight; + FindFirstOfTwoLargestPeaks( + kBinSizeSpecFlat, histograms.get_spectral_flatness(), + &spectral_flatness_peak_position, &spectral_flatness_peak_weight); + + float spectral_diff_peak_position = 0.f; + int spectral_diff_peak_weight = 0; + FindFirstOfTwoLargestPeaks(kBinSizeSpecDiff, histograms.get_spectral_diff(), + &spectral_diff_peak_position, + &spectral_diff_peak_weight); + + // Reject if weight of peaks is not large enough, or peak value too small. + // Peak limit for spectral flatness (varies between 0 and 1). + const int use_spec_flat = spectral_flatness_peak_weight < 0.3f * 500 || + spectral_flatness_peak_position < 0.6f + ? 0 + : 1; + + // Reject if weight of peaks is not large enough or if fluctuation of the LRT + // feature are very low, indicating a noise state. + const int use_spec_diff = + spectral_diff_peak_weight < 0.3f * 500 || low_lrt_fluctuations ? 0 : 1; + + // Update the model. + prior_model_.template_diff_threshold = 1.2f * spectral_diff_peak_position; + prior_model_.template_diff_threshold = + std::min(1.f, std::max(0.16f, prior_model_.template_diff_threshold)); + + float one_by_feature_sum = 1.f / (1.f + use_spec_flat + use_spec_diff); + prior_model_.lrt_weighting = one_by_feature_sum; + + if (use_spec_flat == 1) { + prior_model_.flatness_threshold = 0.9f * spectral_flatness_peak_position; + prior_model_.flatness_threshold = + std::min(.95f, std::max(0.1f, prior_model_.flatness_threshold)); + prior_model_.flatness_weighting = one_by_feature_sum; + } else { + prior_model_.flatness_weighting = 0.f; + } + + if (use_spec_diff == 1) { + prior_model_.difference_weighting = one_by_feature_sum; + } else { + prior_model_.difference_weighting = 0.f; + } +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/prior_signal_model_estimator.h b/VocieProcess/modules/audio_processing/ns/prior_signal_model_estimator.h new file mode 100644 index 0000000..d178323 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/prior_signal_model_estimator.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ + +#include "modules/audio_processing/ns/histograms.h" +#include "modules/audio_processing/ns/prior_signal_model.h" + +namespace webrtc { + +// Estimator of the prior signal model parameters. +class PriorSignalModelEstimator { + public: + explicit PriorSignalModelEstimator(float lrt_initial_value); + PriorSignalModelEstimator(const PriorSignalModelEstimator&) = delete; + PriorSignalModelEstimator& operator=(const PriorSignalModelEstimator&) = + delete; + + // Updates the model estimate. + void Update(const Histograms& h); + + // Returns the estimated model. + const PriorSignalModel& get_prior_model() const { return prior_model_; } + + private: + PriorSignalModel prior_model_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ diff --git a/VocieProcess/modules/audio_processing/ns/quantile_noise_estimator.cc b/VocieProcess/modules/audio_processing/ns/quantile_noise_estimator.cc new file mode 100644 index 0000000..bab494f --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/quantile_noise_estimator.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/quantile_noise_estimator.h" + +#include + +#include "modules/audio_processing/ns/fast_math.h" + +namespace webrtc { + +QuantileNoiseEstimator::QuantileNoiseEstimator() { + quantile_.fill(0.f); + density_.fill(0.3f); + log_quantile_.fill(8.f); + + constexpr float kOneBySimult = 1.f / kSimult; + for (size_t i = 0; i < kSimult; ++i) { + counter_[i] = floor(kLongStartupPhaseBlocks * (i + 1.f) * kOneBySimult); + } +} + +void QuantileNoiseEstimator::Estimate( + rtc::ArrayView signal_spectrum, + rtc::ArrayView noise_spectrum) { + std::array log_spectrum; + LogApproximation(signal_spectrum, log_spectrum); + + int quantile_index_to_return = -1; + // Loop over simultaneous estimates. + for (int s = 0, k = 0; s < kSimult; + ++s, k += static_cast(kFftSizeBy2Plus1)) { + const float one_by_counter_plus_1 = 1.f / (counter_[s] + 1.f); + for (int i = 0, j = k; i < static_cast(kFftSizeBy2Plus1); ++i, ++j) { + // Update log quantile estimate. + const float delta = density_[j] > 1.f ? 40.f / density_[j] : 40.f; + + const float multiplier = delta * one_by_counter_plus_1; + if (log_spectrum[i] > log_quantile_[j]) { + log_quantile_[j] += 0.25f * multiplier; + } else { + log_quantile_[j] -= 0.75f * multiplier; + } + + // Update density estimate. + constexpr float kWidth = 0.01f; + constexpr float kOneByWidthPlus2 = 1.f / (2.f * kWidth); + if (fabs(log_spectrum[i] - log_quantile_[j]) < kWidth) { + density_[j] = (counter_[s] * density_[j] + kOneByWidthPlus2) * + one_by_counter_plus_1; + } + } + + if (counter_[s] >= kLongStartupPhaseBlocks) { + counter_[s] = 0; + if (num_updates_ >= kLongStartupPhaseBlocks) { + quantile_index_to_return = k; + } + } + + ++counter_[s]; + } + + // Sequentially update the noise during startup. + if (num_updates_ < kLongStartupPhaseBlocks) { + // Use the last "s" to get noise during startup that differ from zero. + quantile_index_to_return = kFftSizeBy2Plus1 * (kSimult - 1); + ++num_updates_; + } + + if (quantile_index_to_return >= 0) { + ExpApproximation( + rtc::ArrayView(&log_quantile_[quantile_index_to_return], + kFftSizeBy2Plus1), + quantile_); + } + + std::copy(quantile_.begin(), quantile_.end(), noise_spectrum.begin()); +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/quantile_noise_estimator.h b/VocieProcess/modules/audio_processing/ns/quantile_noise_estimator.h new file mode 100644 index 0000000..55b0bfa --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/quantile_noise_estimator.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ + +#include + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" + +namespace webrtc { + +constexpr int kSimult = 3; + +// For quantile noise estimation. +class QuantileNoiseEstimator { + public: + QuantileNoiseEstimator(); + QuantileNoiseEstimator(const QuantileNoiseEstimator&) = delete; + QuantileNoiseEstimator& operator=(const QuantileNoiseEstimator&) = delete; + + // Estimate noise. + void Estimate(rtc::ArrayView signal_spectrum, + rtc::ArrayView noise_spectrum); + + private: + std::array density_; + std::array log_quantile_; + std::array quantile_; + std::array counter_; + int num_updates_ = 1; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ diff --git a/VocieProcess/modules/audio_processing/ns/signal_model.cc b/VocieProcess/modules/audio_processing/ns/signal_model.cc new file mode 100644 index 0000000..364bfd0 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/signal_model.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/signal_model.h" + +namespace webrtc { + +SignalModel::SignalModel() { + constexpr float kSfFeatureThr = 0.5f; + + lrt = kLtrFeatureThr; + spectral_flatness = kSfFeatureThr; + spectral_diff = kSfFeatureThr; + avg_log_lrt.fill(kLtrFeatureThr); +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/signal_model.h b/VocieProcess/modules/audio_processing/ns/signal_model.h new file mode 100644 index 0000000..6614d38 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/signal_model.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ +#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ + +#include + +#include "modules/audio_processing/ns/ns_common.h" + +namespace webrtc { + +struct SignalModel { + SignalModel(); + SignalModel(const SignalModel&) = delete; + SignalModel& operator=(const SignalModel&) = delete; + + float lrt; + float spectral_diff; + float spectral_flatness; + // Log LRT factor with time-smoothing. + std::array avg_log_lrt; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ diff --git a/VocieProcess/modules/audio_processing/ns/signal_model_estimator.cc b/VocieProcess/modules/audio_processing/ns/signal_model_estimator.cc new file mode 100644 index 0000000..67dd3bb --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/signal_model_estimator.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/signal_model_estimator.h" + +#include "modules/audio_processing/ns/fast_math.h" + +namespace webrtc { + +namespace { + +constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; + +// Computes the difference measure between input spectrum and a template/learned +// noise spectrum. +float ComputeSpectralDiff( + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float diff_normalization) { + // spectral_diff = var(signal_spectrum) - cov(signal_spectrum, magnAvgPause)^2 + // / var(magnAvgPause) + + // Compute average quantities. + float noise_average = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Conservative smooth noise spectrum from pause frames. + noise_average += conservative_noise_spectrum[i]; + } + noise_average = noise_average * kOneByFftSizeBy2Plus1; + float signal_average = signal_spectral_sum * kOneByFftSizeBy2Plus1; + + // Compute variance and covariance quantities. + float covariance = 0.f; + float noise_variance = 0.f; + float signal_variance = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + float signal_diff = signal_spectrum[i] - signal_average; + float noise_diff = conservative_noise_spectrum[i] - noise_average; + covariance += signal_diff * noise_diff; + noise_variance += noise_diff * noise_diff; + signal_variance += signal_diff * signal_diff; + } + covariance *= kOneByFftSizeBy2Plus1; + noise_variance *= kOneByFftSizeBy2Plus1; + signal_variance *= kOneByFftSizeBy2Plus1; + + // Update of average magnitude spectrum. + float spectral_diff = + signal_variance - (covariance * covariance) / (noise_variance + 0.0001f); + // Normalize. + return spectral_diff / (diff_normalization + 0.0001f); +} + +// Updates the spectral flatness based on the input spectrum. +void UpdateSpectralFlatness( + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float* spectral_flatness) { + RTC_DCHECK(spectral_flatness); + + // Compute log of ratio of the geometric to arithmetic mean (handle the log(0) + // separately). + constexpr float kAveraging = 0.3f; + float avg_spect_flatness_num = 0.f; + for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) { + if (signal_spectrum[i] == 0.f) { + *spectral_flatness -= kAveraging * (*spectral_flatness); + return; + } + } + + for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) { + avg_spect_flatness_num += LogApproximation(signal_spectrum[i]); + } + + float avg_spect_flatness_denom = signal_spectral_sum - signal_spectrum[0]; + + avg_spect_flatness_denom = avg_spect_flatness_denom * kOneByFftSizeBy2Plus1; + avg_spect_flatness_num = avg_spect_flatness_num * kOneByFftSizeBy2Plus1; + + float spectral_tmp = + ExpApproximation(avg_spect_flatness_num) / avg_spect_flatness_denom; + + // Time-avg update of spectral flatness feature. + *spectral_flatness += kAveraging * (spectral_tmp - *spectral_flatness); +} + +// Updates the log LRT measures. +void UpdateSpectralLrt(rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView avg_log_lrt, + float* lrt) { + RTC_DCHECK(lrt); + + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + float tmp1 = 1.f + 2.f * prior_snr[i]; + float tmp2 = 2.f * prior_snr[i] / (tmp1 + 0.0001f); + float bessel_tmp = (post_snr[i] + 1.f) * tmp2; + avg_log_lrt[i] += + .5f * (bessel_tmp - LogApproximation(tmp1) - avg_log_lrt[i]); + } + + float log_lrt_time_avg_k_sum = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + log_lrt_time_avg_k_sum += avg_log_lrt[i]; + } + *lrt = log_lrt_time_avg_k_sum * kOneByFftSizeBy2Plus1; +} + +} // namespace + +SignalModelEstimator::SignalModelEstimator() + : prior_model_estimator_(kLtrFeatureThr) {} + +void SignalModelEstimator::AdjustNormalization(int32_t num_analyzed_frames, + float signal_energy) { + diff_normalization_ *= num_analyzed_frames; + diff_normalization_ += signal_energy; + diff_normalization_ /= (num_analyzed_frames + 1); +} + +// Update the noise features. +void SignalModelEstimator::Update( + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy) { + // Compute spectral flatness on input spectrum. + UpdateSpectralFlatness(signal_spectrum, signal_spectral_sum, + &features_.spectral_flatness); + + // Compute difference of input spectrum with learned/estimated noise spectrum. + float spectral_diff = + ComputeSpectralDiff(conservative_noise_spectrum, signal_spectrum, + signal_spectral_sum, diff_normalization_); + // Compute time-avg update of difference feature. + features_.spectral_diff += 0.3f * (spectral_diff - features_.spectral_diff); + + signal_energy_sum_ += signal_energy; + + // Compute histograms for parameter decisions (thresholds and weights for + // features). Parameters are extracted periodically. + if (--histogram_analysis_counter_ > 0) { + histograms_.Update(features_); + } else { + // Compute model parameters. + prior_model_estimator_.Update(histograms_); + + // Clear histograms for next update. + histograms_.Clear(); + + histogram_analysis_counter_ = kFeatureUpdateWindowSize; + + // Update every window: + // Compute normalization for the spectral difference for next estimation. + signal_energy_sum_ = signal_energy_sum_ / kFeatureUpdateWindowSize; + diff_normalization_ = 0.5f * (signal_energy_sum_ + diff_normalization_); + signal_energy_sum_ = 0.f; + } + + // Compute the LRT. + UpdateSpectralLrt(prior_snr, post_snr, features_.avg_log_lrt, &features_.lrt); +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/signal_model_estimator.h b/VocieProcess/modules/audio_processing/ns/signal_model_estimator.h new file mode 100644 index 0000000..58ce00a --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/signal_model_estimator.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/histograms.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/prior_signal_model.h" +#include "modules/audio_processing/ns/prior_signal_model_estimator.h" +#include "modules/audio_processing/ns/signal_model.h" + +namespace webrtc { + +class SignalModelEstimator { + public: + SignalModelEstimator(); + SignalModelEstimator(const SignalModelEstimator&) = delete; + SignalModelEstimator& operator=(const SignalModelEstimator&) = delete; + + // Compute signal normalization during the initial startup phase. + void AdjustNormalization(int32_t num_analyzed_frames, float signal_energy); + + void Update( + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy); + + const PriorSignalModel& get_prior_model() const { + return prior_model_estimator_.get_prior_model(); + } + const SignalModel& get_model() { return features_; } + + private: + float diff_normalization_ = 0.f; + float signal_energy_sum_ = 0.f; + Histograms histograms_; + int histogram_analysis_counter_ = 500; + PriorSignalModelEstimator prior_model_estimator_; + SignalModel features_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ diff --git a/VocieProcess/modules/audio_processing/ns/speech_probability_estimator.cc b/VocieProcess/modules/audio_processing/ns/speech_probability_estimator.cc new file mode 100644 index 0000000..65f17f4 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/speech_probability_estimator.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/speech_probability_estimator.h" + +#include + +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +SpeechProbabilityEstimator::SpeechProbabilityEstimator() { + speech_probability_.fill(0.f); +} + +void SpeechProbabilityEstimator::Update( + int32_t num_analyzed_frames, + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy) { + // Update models. + if (num_analyzed_frames < kLongStartupPhaseBlocks) { + signal_model_estimator_.AdjustNormalization(num_analyzed_frames, + signal_energy); + } + signal_model_estimator_.Update(prior_snr, post_snr, + conservative_noise_spectrum, signal_spectrum, + signal_spectral_sum, signal_energy); + + const SignalModel& model = signal_model_estimator_.get_model(); + const PriorSignalModel& prior_model = + signal_model_estimator_.get_prior_model(); + + // Width parameter in sigmoid map for prior model. + constexpr float kWidthPrior0 = 4.f; + // Width for pause region: lower range, so increase width in tanh map. + constexpr float kWidthPrior1 = 2.f * kWidthPrior0; + + // Average LRT feature: use larger width in tanh map for pause regions. + float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0; + + // Compute indicator function: sigmoid map. + float indicator0 = + 0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f); + + // Spectral flatness feature: use larger width in tanh map for pause regions. + width_prior = model.spectral_flatness > prior_model.flatness_threshold + ? kWidthPrior1 + : kWidthPrior0; + + // Compute indicator function: sigmoid map. + float indicator1 = + 0.5f * (tanh(1.f * width_prior * + (prior_model.flatness_threshold - model.spectral_flatness)) + + 1.f); + + // For template spectrum-difference : use larger width in tanh map for pause + // regions. + width_prior = model.spectral_diff < prior_model.template_diff_threshold + ? kWidthPrior1 + : kWidthPrior0; + + // Compute indicator function: sigmoid map. + float indicator2 = + 0.5f * (tanh(width_prior * (model.spectral_diff - + prior_model.template_diff_threshold)) + + 1.f); + + // Combine the indicator function with the feature weights. + float ind_prior = prior_model.lrt_weighting * indicator0 + + prior_model.flatness_weighting * indicator1 + + prior_model.difference_weighting * indicator2; + + // Compute the prior probability. + prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_); + + // Make sure probabilities are within range: keep floor to 0.01. + prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f); + + // Final speech probability: combine prior model with LR factor:. + float gain_prior = + (1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f); + + std::array inv_lrt; + ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt); + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]); + } +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/speech_probability_estimator.h b/VocieProcess/modules/audio_processing/ns/speech_probability_estimator.h new file mode 100644 index 0000000..259c3b6 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/speech_probability_estimator.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/signal_model_estimator.h" + +namespace webrtc { + +// Class for estimating the probability of speech. +class SpeechProbabilityEstimator { + public: + SpeechProbabilityEstimator(); + SpeechProbabilityEstimator(const SpeechProbabilityEstimator&) = delete; + SpeechProbabilityEstimator& operator=(const SpeechProbabilityEstimator&) = + delete; + + // Compute speech probability. + void Update( + int32_t num_analyzed_frames, + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy); + + float get_prior_probability() const { return prior_speech_prob_; } + rtc::ArrayView get_probability() { return speech_probability_; } + + private: + SignalModelEstimator signal_model_estimator_; + float prior_speech_prob_ = .5f; + std::array speech_probability_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ diff --git a/VocieProcess/modules/audio_processing/ns/suppression_params.cc b/VocieProcess/modules/audio_processing/ns/suppression_params.cc new file mode 100644 index 0000000..7bf1834 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/suppression_params.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/suppression_params.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +SuppressionParams::SuppressionParams( + NsConfig::SuppressionLevel suppression_level) { + switch (suppression_level) { + case NsConfig::SuppressionLevel::k6dB: + over_subtraction_factor = 1.f; + // 6 dB attenuation. + minimum_attenuating_gain = 0.5f; + use_attenuation_adjustment = false; + break; + case NsConfig::SuppressionLevel::k12dB: + over_subtraction_factor = 1.f; + // 12 dB attenuation. + minimum_attenuating_gain = 0.25f; + use_attenuation_adjustment = true; + break; + case NsConfig::SuppressionLevel::k18dB: + over_subtraction_factor = 1.1f; + // 18 dB attenuation. + minimum_attenuating_gain = 0.125f; + use_attenuation_adjustment = true; + break; + case NsConfig::SuppressionLevel::k21dB: + over_subtraction_factor = 1.25f; + // 20.9 dB attenuation. + minimum_attenuating_gain = 0.09f; + use_attenuation_adjustment = true; + break; + default: + RTC_DCHECK_NOTREACHED(); + } +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/suppression_params.h b/VocieProcess/modules/audio_processing/ns/suppression_params.h new file mode 100644 index 0000000..ad11977 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/suppression_params.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ +#define MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ + +#include "modules/audio_processing/ns/ns_config.h" + +namespace webrtc { + +struct SuppressionParams { + explicit SuppressionParams(NsConfig::SuppressionLevel suppression_level); + SuppressionParams(const SuppressionParams&) = delete; + SuppressionParams& operator=(const SuppressionParams&) = delete; + + float over_subtraction_factor; + float minimum_attenuating_gain; + bool use_attenuation_adjustment; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ diff --git a/VocieProcess/modules/audio_processing/ns/wiener_filter.cc b/VocieProcess/modules/audio_processing/ns/wiener_filter.cc new file mode 100644 index 0000000..1eb50a7 --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/wiener_filter.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/wiener_filter.h" + +#include +#include +#include + +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +WienerFilter::WienerFilter(const SuppressionParams& suppression_params) + : suppression_params_(suppression_params) { + filter_.fill(1.f); + initial_spectral_estimate_.fill(0.f); + spectrum_prev_process_.fill(0.f); +} + +void WienerFilter::Update( + int32_t num_analyzed_frames, + rtc::ArrayView noise_spectrum, + rtc::ArrayView prev_noise_spectrum, + rtc::ArrayView parametric_noise_spectrum, + rtc::ArrayView signal_spectrum) { + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Previous estimate based on previous frame with gain filter. + float prev_tsa = spectrum_prev_process_[i] / + (prev_noise_spectrum[i] + 0.0001f) * filter_[i]; + + // Current estimate. + float current_tsa; + if (signal_spectrum[i] > noise_spectrum[i]) { + current_tsa = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f; + } else { + current_tsa = 0.f; + } + + // Directed decision estimate is sum of two terms: current estimate and + // previous estimate. + float snr_prior = 0.98f * prev_tsa + (1.f - 0.98f) * current_tsa; + filter_[i] = + snr_prior / (suppression_params_.over_subtraction_factor + snr_prior); + filter_[i] = std::max(std::min(filter_[i], 1.f), + suppression_params_.minimum_attenuating_gain); + } + + if (num_analyzed_frames < kShortStartupPhaseBlocks) { + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + initial_spectral_estimate_[i] += signal_spectrum[i]; + float filter_initial = initial_spectral_estimate_[i] - + suppression_params_.over_subtraction_factor * + parametric_noise_spectrum[i]; + filter_initial /= initial_spectral_estimate_[i] + 0.0001f; + + filter_initial = std::max(std::min(filter_initial, 1.f), + suppression_params_.minimum_attenuating_gain); + + // Weight the two suppression filters. + constexpr float kOnyByShortStartupPhaseBlocks = + 1.f / kShortStartupPhaseBlocks; + filter_initial *= kShortStartupPhaseBlocks - num_analyzed_frames; + filter_[i] *= num_analyzed_frames; + filter_[i] += filter_initial; + filter_[i] *= kOnyByShortStartupPhaseBlocks; + } + } + + std::copy(signal_spectrum.begin(), signal_spectrum.end(), + spectrum_prev_process_.begin()); +} + +float WienerFilter::ComputeOverallScalingFactor( + int32_t num_analyzed_frames, + float prior_speech_probability, + float energy_before_filtering, + float energy_after_filtering) const { + if (!suppression_params_.use_attenuation_adjustment || + num_analyzed_frames <= kLongStartupPhaseBlocks) { + return 1.f; + } + + float gain = SqrtFastApproximation(energy_after_filtering / + (energy_before_filtering + 1.f)); + + // Scaling for new version. Threshold in final energy gain factor calculation. + constexpr float kBLim = 0.5f; + float scale_factor1 = 1.f; + if (gain > kBLim) { + scale_factor1 = 1.f + 1.3f * (gain - kBLim); + if (gain * scale_factor1 > 1.f) { + scale_factor1 = 1.f / gain; + } + } + + float scale_factor2 = 1.f; + if (gain < kBLim) { + // Do not reduce scale too much for pause regions: attenuation here should + // be controlled by flooring. + gain = std::max(gain, suppression_params_.minimum_attenuating_gain); + scale_factor2 = 1.f - 0.3f * (kBLim - gain); + } + + // Combine both scales with speech/noise prob: note prior + // (prior_speech_probability) is not frequency dependent. + return prior_speech_probability * scale_factor1 + + (1.f - prior_speech_probability) * scale_factor2; +} + +} // namespace webrtc diff --git a/VocieProcess/modules/audio_processing/ns/wiener_filter.h b/VocieProcess/modules/audio_processing/ns/wiener_filter.h new file mode 100644 index 0000000..b55c5dc --- /dev/null +++ b/VocieProcess/modules/audio_processing/ns/wiener_filter.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/suppression_params.h" + +namespace webrtc { + +// Estimates a Wiener-filter based frequency domain noise reduction filter. +class WienerFilter { + public: + explicit WienerFilter(const SuppressionParams& suppression_params); + WienerFilter(const WienerFilter&) = delete; + WienerFilter& operator=(const WienerFilter&) = delete; + + // Updates the filter estimate. + void Update( + int32_t num_analyzed_frames, + rtc::ArrayView noise_spectrum, + rtc::ArrayView prev_noise_spectrum, + rtc::ArrayView parametric_noise_spectrum, + rtc::ArrayView signal_spectrum); + + // Compute an overall gain scaling factor. + float ComputeOverallScalingFactor(int32_t num_analyzed_frames, + float prior_speech_probability, + float energy_before_filtering, + float energy_after_filtering) const; + + // Returns the filter. + rtc::ArrayView get_filter() const { + return filter_; + } + + private: + const SuppressionParams& suppression_params_; + std::array spectrum_prev_process_; + std::array initial_spectral_estimate_; + std::array filter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_