add ns code.

This commit is contained in:
luocai 2024-09-06 16:46:55 +08:00
parent 43166b5474
commit 35bf68338f
32 changed files with 3315 additions and 3 deletions

View File

@ -1,5 +1,5 @@
#include "IoContext.h"
#include "RKAP_3A.h"
// #include "RKAP_3A.h"
#include "SpeexDsp.h"
#include "WebRtcAecm.h"
#include "api/audio/echo_canceller3_config.h"
@ -18,7 +18,7 @@ public:
farendBuffer = std::make_unique<webrtc::AudioBuffer>(sampleRate, channels, sampleRate, channels, sampleRate, channels);
linearOutputBuffer = std::make_unique<webrtc::AudioBuffer>(sampleRate, channels, sampleRate, channels, sampleRate, channels);
RKAP_3A_Init(&m_vqe, AEC_TX_TYPE);
// RKAP_3A_Init(&m_vqe, AEC_TX_TYPE);
}
std::unique_ptr<webrtc::EchoControl> echoCanceller;
@ -26,7 +26,7 @@ public:
std::unique_ptr<webrtc::AudioBuffer> farendBuffer;
std::unique_ptr<webrtc::AudioBuffer> linearOutputBuffer;
RKAP_AEC_State m_vqe;
// RKAP_AEC_State m_vqe;
};
ProcessFileTask::ProcessFileTask() : m_d{new ProcessFileTaskPrivate()} {

View File

@ -55,6 +55,7 @@ add_library(VocieProcess
common_audio/third_party/ooura/fft_size_128/ooura_fft.h common_audio/third_party/ooura/fft_size_128/ooura_fft_neon.cc
common_audio/third_party/ooura/fft_size_128/ooura_fft.cc
common_audio/third_party/ooura/fft_size_256/fft4g.h common_audio/third_party/ooura/fft_size_256/fft4g.cc
common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
rtc_base/checks.h rtc_base/checks.cc
@ -147,6 +148,20 @@ add_library(VocieProcess
modules/audio_processing/logging/apm_data_dumper.h modules/audio_processing/logging/apm_data_dumper.cc
modules/audio_processing/ns/fast_math.h modules/audio_processing/ns/fast_math.cc
modules/audio_processing/ns/histograms.h modules/audio_processing/ns/histograms.cc
modules/audio_processing/ns/noise_estimator.h modules/audio_processing/ns/noise_estimator.cc
modules/audio_processing/ns/noise_suppressor.h modules/audio_processing/ns/noise_suppressor.cc
modules/audio_processing/ns/ns_fft.h modules/audio_processing/ns/ns_fft.cc
modules/audio_processing/ns/prior_signal_model_estimator.h modules/audio_processing/ns/prior_signal_model_estimator.cc
modules/audio_processing/ns/prior_signal_model.h modules/audio_processing/ns/prior_signal_model.cc
modules/audio_processing/ns/quantile_noise_estimator.h modules/audio_processing/ns/quantile_noise_estimator.cc
modules/audio_processing/ns/signal_model_estimator.h modules/audio_processing/ns/signal_model_estimator.cc
modules/audio_processing/ns/signal_model.h modules/audio_processing/ns/signal_model.cc
modules/audio_processing/ns/speech_probability_estimator.h modules/audio_processing/ns/speech_probability_estimator.cc
modules/audio_processing/ns/suppression_params.h modules/audio_processing/ns/suppression_params.cc
modules/audio_processing/ns/wiener_filter.h modules/audio_processing/ns/wiener_filter.cc
modules/audio_processing/utility/cascaded_biquad_filter.h modules/audio_processing/utility/cascaded_biquad_filter.cc
modules/audio_processing/utility/delay_estimator_wrapper.h modules/audio_processing/utility/delay_estimator_wrapper.cc
modules/audio_processing/utility/delay_estimator.h modules/audio_processing/utility/delay_estimator.cc

View File

@ -0,0 +1,866 @@
/*
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
* Copyright Takuya OOURA, 1996-2001
*
* You may use, copy, modify and distribute this code for any purpose (include
* commercial use) and without fee. Please refer to this package when you modify
* this code.
*
* Changes:
* Trivial type modifications by the WebRTC authors.
*/
/*
Fast Fourier/Cosine/Sine Transform
dimension :one
data length :power of 2
decimation :frequency
radix :4, 2
data :inplace
table :use
functions
cdft: Complex Discrete Fourier Transform
rdft: Real Discrete Fourier Transform
ddct: Discrete Cosine Transform
ddst: Discrete Sine Transform
dfct: Cosine Transform of RDFT (Real Symmetric DFT)
dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
function prototypes
void cdft(int, int, float *, int *, float *);
void rdft(size_t, int, float *, size_t *, float *);
void ddct(int, int, float *, int *, float *);
void ddst(int, int, float *, int *, float *);
void dfct(int, float *, float *, int *, float *);
void dfst(int, float *, float *, int *, float *);
-------- Complex DFT (Discrete Fourier Transform) --------
[definition]
<case1>
X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
<case2>
X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
(notes: sum_j=0^n-1 is a summation from j=0 to n-1)
[usage]
<case1>
ip[0] = 0; // first time only
cdft(2*n, 1, a, ip, w);
<case2>
ip[0] = 0; // first time only
cdft(2*n, -1, a, ip, w);
[parameters]
2*n :data length (int)
n >= 1, n = power of 2
a[0...2*n-1] :input/output data (float *)
input data
a[2*j] = Re(x[j]),
a[2*j+1] = Im(x[j]), 0<=j<n
output data
a[2*k] = Re(X[k]),
a[2*k+1] = Im(X[k]), 0<=k<n
ip[0...*] :work area for bit reversal (int *)
length of ip >= 2+sqrt(n)
strictly,
length of ip >=
2+(1<<(int)(log(n+0.5)/log(2))/2).
ip[0],ip[1] are pointers of the cos/sin table.
w[0...n/2-1] :cos/sin table (float *)
w[],ip[] are initialized if ip[0] == 0.
[remark]
Inverse of
cdft(2*n, -1, a, ip, w);
is
cdft(2*n, 1, a, ip, w);
for (j = 0; j <= 2 * n - 1; j++) {
a[j] *= 1.0 / n;
}
.
-------- Real DFT / Inverse of Real DFT --------
[definition]
<case1> RDFT
R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
<case2> IRDFT (excluding scale)
a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
[usage]
<case1>
ip[0] = 0; // first time only
rdft(n, 1, a, ip, w);
<case2>
ip[0] = 0; // first time only
rdft(n, -1, a, ip, w);
[parameters]
n :data length (size_t)
n >= 2, n = power of 2
a[0...n-1] :input/output data (float *)
<case1>
output data
a[2*k] = R[k], 0<=k<n/2
a[2*k+1] = I[k], 0<k<n/2
a[1] = R[n/2]
<case2>
input data
a[2*j] = R[j], 0<=j<n/2
a[2*j+1] = I[j], 0<j<n/2
a[1] = R[n/2]
ip[0...*] :work area for bit reversal (size_t *)
length of ip >= 2+sqrt(n/2)
strictly,
length of ip >=
2+(1<<(int)(log(n/2+0.5)/log(2))/2).
ip[0],ip[1] are pointers of the cos/sin table.
w[0...n/2-1] :cos/sin table (float *)
w[],ip[] are initialized if ip[0] == 0.
[remark]
Inverse of
rdft(n, 1, a, ip, w);
is
rdft(n, -1, a, ip, w);
for (j = 0; j <= n - 1; j++) {
a[j] *= 2.0 / n;
}
.
-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
[definition]
<case1> IDCT (excluding scale)
C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
<case2> DCT
C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
[usage]
<case1>
ip[0] = 0; // first time only
ddct(n, 1, a, ip, w);
<case2>
ip[0] = 0; // first time only
ddct(n, -1, a, ip, w);
[parameters]
n :data length (int)
n >= 2, n = power of 2
a[0...n-1] :input/output data (float *)
output data
a[k] = C[k], 0<=k<n
ip[0...*] :work area for bit reversal (int *)
length of ip >= 2+sqrt(n/2)
strictly,
length of ip >=
2+(1<<(int)(log(n/2+0.5)/log(2))/2).
ip[0],ip[1] are pointers of the cos/sin table.
w[0...n*5/4-1] :cos/sin table (float *)
w[],ip[] are initialized if ip[0] == 0.
[remark]
Inverse of
ddct(n, -1, a, ip, w);
is
a[0] *= 0.5;
ddct(n, 1, a, ip, w);
for (j = 0; j <= n - 1; j++) {
a[j] *= 2.0 / n;
}
.
-------- DST (Discrete Sine Transform) / Inverse of DST --------
[definition]
<case1> IDST (excluding scale)
S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
<case2> DST
S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
[usage]
<case1>
ip[0] = 0; // first time only
ddst(n, 1, a, ip, w);
<case2>
ip[0] = 0; // first time only
ddst(n, -1, a, ip, w);
[parameters]
n :data length (int)
n >= 2, n = power of 2
a[0...n-1] :input/output data (float *)
<case1>
input data
a[j] = A[j], 0<j<n
a[0] = A[n]
output data
a[k] = S[k], 0<=k<n
<case2>
output data
a[k] = S[k], 0<k<n
a[0] = S[n]
ip[0...*] :work area for bit reversal (int *)
length of ip >= 2+sqrt(n/2)
strictly,
length of ip >=
2+(1<<(int)(log(n/2+0.5)/log(2))/2).
ip[0],ip[1] are pointers of the cos/sin table.
w[0...n*5/4-1] :cos/sin table (float *)
w[],ip[] are initialized if ip[0] == 0.
[remark]
Inverse of
ddst(n, -1, a, ip, w);
is
a[0] *= 0.5;
ddst(n, 1, a, ip, w);
for (j = 0; j <= n - 1; j++) {
a[j] *= 2.0 / n;
}
.
-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
[definition]
C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
[usage]
ip[0] = 0; // first time only
dfct(n, a, t, ip, w);
[parameters]
n :data length - 1 (int)
n >= 2, n = power of 2
a[0...n] :input/output data (float *)
output data
a[k] = C[k], 0<=k<=n
t[0...n/2] :work area (float *)
ip[0...*] :work area for bit reversal (int *)
length of ip >= 2+sqrt(n/4)
strictly,
length of ip >=
2+(1<<(int)(log(n/4+0.5)/log(2))/2).
ip[0],ip[1] are pointers of the cos/sin table.
w[0...n*5/8-1] :cos/sin table (float *)
w[],ip[] are initialized if ip[0] == 0.
[remark]
Inverse of
a[0] *= 0.5;
a[n] *= 0.5;
dfct(n, a, t, ip, w);
is
a[0] *= 0.5;
a[n] *= 0.5;
dfct(n, a, t, ip, w);
for (j = 0; j <= n; j++) {
a[j] *= 2.0 / n;
}
.
-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
[definition]
S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
[usage]
ip[0] = 0; // first time only
dfst(n, a, t, ip, w);
[parameters]
n :data length + 1 (int)
n >= 2, n = power of 2
a[0...n-1] :input/output data (float *)
output data
a[k] = S[k], 0<k<n
(a[0] is used for work area)
t[0...n/2-1] :work area (float *)
ip[0...*] :work area for bit reversal (int *)
length of ip >= 2+sqrt(n/4)
strictly,
length of ip >=
2+(1<<(int)(log(n/4+0.5)/log(2))/2).
ip[0],ip[1] are pointers of the cos/sin table.
w[0...n*5/8-1] :cos/sin table (float *)
w[],ip[] are initialized if ip[0] == 0.
[remark]
Inverse of
dfst(n, a, t, ip, w);
is
dfst(n, a, t, ip, w);
for (j = 1; j <= n - 1; j++) {
a[j] *= 2.0 / n;
}
.
Appendix :
The cos/sin table is recalculated when the larger table required.
w[] and ip[] are compatible with all routines.
*/
#include "common_audio/third_party/ooura/fft_size_256/fft4g.h"
#include <math.h>
#include <stddef.h>
namespace webrtc {
namespace {
void makewt(size_t nw, size_t* ip, float* w);
void makect(size_t nc, size_t* ip, float* c);
void bitrv2(size_t n, size_t* ip, float* a);
void cftfsub(size_t n, float* a, float* w);
void cftbsub(size_t n, float* a, float* w);
void cft1st(size_t n, float* a, float* w);
void cftmdl(size_t n, size_t l, float* a, float* w);
void rftfsub(size_t n, float* a, size_t nc, float* c);
void rftbsub(size_t n, float* a, size_t nc, float* c);
/* -------- initializing routines -------- */
void makewt(size_t nw, size_t* ip, float* w) {
size_t j, nwh;
float delta, x, y;
ip[0] = nw;
ip[1] = 1;
if (nw > 2) {
nwh = nw >> 1;
delta = atanf(1.0f) / nwh;
w[0] = 1;
w[1] = 0;
w[nwh] = (float)cos(delta * nwh);
w[nwh + 1] = w[nwh];
if (nwh > 2) {
for (j = 2; j < nwh; j += 2) {
x = (float)cos(delta * j);
y = (float)sin(delta * j);
w[j] = x;
w[j + 1] = y;
w[nw - j] = y;
w[nw - j + 1] = x;
}
bitrv2(nw, ip + 2, w);
}
}
}
void makect(size_t nc, size_t* ip, float* c) {
size_t j, nch;
float delta;
ip[1] = nc;
if (nc > 1) {
nch = nc >> 1;
delta = atanf(1.0f) / nch;
c[0] = (float)cos(delta * nch);
c[nch] = 0.5f * c[0];
for (j = 1; j < nch; j++) {
c[j] = 0.5f * (float)cos(delta * j);
c[nc - j] = 0.5f * (float)sin(delta * j);
}
}
}
/* -------- child routines -------- */
void bitrv2(size_t n, size_t* ip, float* a) {
size_t j, j1, k, k1, l, m, m2;
float xr, xi, yr, yi;
ip[0] = 0;
l = n;
m = 1;
while ((m << 3) < l) {
l >>= 1;
for (j = 0; j < m; j++) {
ip[m + j] = ip[j] + l;
}
m <<= 1;
}
m2 = 2 * m;
if ((m << 3) == l) {
for (k = 0; k < m; k++) {
for (j = 0; j < k; j++) {
j1 = 2 * j + ip[k];
k1 = 2 * k + ip[j];
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
j1 += m2;
k1 += 2 * m2;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
j1 += m2;
k1 -= m2;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
j1 += m2;
k1 += 2 * m2;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
}
j1 = 2 * k + m2 + ip[k];
k1 = j1 + m2;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
}
} else {
for (k = 1; k < m; k++) {
for (j = 0; j < k; j++) {
j1 = 2 * j + ip[k];
k1 = 2 * k + ip[j];
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
j1 += m2;
k1 += m2;
xr = a[j1];
xi = a[j1 + 1];
yr = a[k1];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 1] = xi;
}
}
}
}
void cftfsub(size_t n, float* a, float* w) {
size_t j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
l = 2;
if (n > 8) {
cft1st(n, a, w);
l = 8;
while ((l << 2) < n) {
cftmdl(n, l, a, w);
l <<= 2;
}
}
if ((l << 2) == n) {
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
} else {
for (j = 0; j < l; j += 2) {
j1 = j + l;
x0r = a[j] - a[j1];
x0i = a[j + 1] - a[j1 + 1];
a[j] += a[j1];
a[j + 1] += a[j1 + 1];
a[j1] = x0r;
a[j1 + 1] = x0i;
}
}
}
void cftbsub(size_t n, float* a, float* w) {
size_t j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
l = 2;
if (n > 8) {
cft1st(n, a, w);
l = 8;
while ((l << 2) < n) {
cftmdl(n, l, a, w);
l <<= 2;
}
}
if ((l << 2) == n) {
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = -a[j + 1] - a[j1 + 1];
x1r = a[j] - a[j1];
x1i = -a[j + 1] + a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i - x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i + x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i - x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i + x3r;
}
} else {
for (j = 0; j < l; j += 2) {
j1 = j + l;
x0r = a[j] - a[j1];
x0i = -a[j + 1] + a[j1 + 1];
a[j] += a[j1];
a[j + 1] = -a[j + 1] - a[j1 + 1];
a[j1] = x0r;
a[j1 + 1] = x0i;
}
}
}
void cft1st(size_t n, float* a, float* w) {
size_t j, k1, k2;
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
x0r = a[0] + a[2];
x0i = a[1] + a[3];
x1r = a[0] - a[2];
x1i = a[1] - a[3];
x2r = a[4] + a[6];
x2i = a[5] + a[7];
x3r = a[4] - a[6];
x3i = a[5] - a[7];
a[0] = x0r + x2r;
a[1] = x0i + x2i;
a[4] = x0r - x2r;
a[5] = x0i - x2i;
a[2] = x1r - x3i;
a[3] = x1i + x3r;
a[6] = x1r + x3i;
a[7] = x1i - x3r;
wk1r = w[2];
x0r = a[8] + a[10];
x0i = a[9] + a[11];
x1r = a[8] - a[10];
x1i = a[9] - a[11];
x2r = a[12] + a[14];
x2i = a[13] + a[15];
x3r = a[12] - a[14];
x3i = a[13] - a[15];
a[8] = x0r + x2r;
a[9] = x0i + x2i;
a[12] = x2i - x0i;
a[13] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[10] = wk1r * (x0r - x0i);
a[11] = wk1r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[14] = wk1r * (x0i - x0r);
a[15] = wk1r * (x0i + x0r);
k1 = 0;
for (j = 16; j < n; j += 16) {
k1 += 2;
k2 = 2 * k1;
wk2r = w[k1];
wk2i = w[k1 + 1];
wk1r = w[k2];
wk1i = w[k2 + 1];
wk3r = wk1r - 2 * wk2i * wk1i;
wk3i = 2 * wk2i * wk1r - wk1i;
x0r = a[j] + a[j + 2];
x0i = a[j + 1] + a[j + 3];
x1r = a[j] - a[j + 2];
x1i = a[j + 1] - a[j + 3];
x2r = a[j + 4] + a[j + 6];
x2i = a[j + 5] + a[j + 7];
x3r = a[j + 4] - a[j + 6];
x3i = a[j + 5] - a[j + 7];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 4] = wk2r * x0r - wk2i * x0i;
a[j + 5] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 2] = wk1r * x0r - wk1i * x0i;
a[j + 3] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 6] = wk3r * x0r - wk3i * x0i;
a[j + 7] = wk3r * x0i + wk3i * x0r;
wk1r = w[k2 + 2];
wk1i = w[k2 + 3];
wk3r = wk1r - 2 * wk2r * wk1i;
wk3i = 2 * wk2r * wk1r - wk1i;
x0r = a[j + 8] + a[j + 10];
x0i = a[j + 9] + a[j + 11];
x1r = a[j + 8] - a[j + 10];
x1i = a[j + 9] - a[j + 11];
x2r = a[j + 12] + a[j + 14];
x2i = a[j + 13] + a[j + 15];
x3r = a[j + 12] - a[j + 14];
x3i = a[j + 13] - a[j + 15];
a[j + 8] = x0r + x2r;
a[j + 9] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 12] = -wk2i * x0r - wk2r * x0i;
a[j + 13] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 10] = wk1r * x0r - wk1i * x0i;
a[j + 11] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 14] = wk3r * x0r - wk3i * x0i;
a[j + 15] = wk3r * x0i + wk3i * x0r;
}
}
void cftmdl(size_t n, size_t l, float* a, float* w) {
size_t j, j1, j2, j3, k, k1, k2, m, m2;
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
m = l << 2;
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
wk1r = w[2];
for (j = m; j < l + m; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
a[j2] = x2i - x0i;
a[j2 + 1] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1] = wk1r * (x0r - x0i);
a[j1 + 1] = wk1r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[j3] = wk1r * (x0i - x0r);
a[j3 + 1] = wk1r * (x0i + x0r);
}
k1 = 0;
m2 = 2 * m;
for (k = m2; k < n; k += m2) {
k1 += 2;
k2 = 2 * k1;
wk2r = w[k1];
wk2i = w[k1 + 1];
wk1r = w[k2];
wk1i = w[k2 + 1];
wk3r = wk1r - 2 * wk2i * wk1i;
wk3i = 2 * wk2i * wk1r - wk1i;
for (j = k; j < l + k; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2] = wk2r * x0r - wk2i * x0i;
a[j2 + 1] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
wk1r = w[k2 + 2];
wk1i = w[k2 + 3];
wk3r = wk1r - 2 * wk2r * wk1i;
wk3i = 2 * wk2r * wk1r - wk1i;
for (j = k + m; j < l + (k + m); j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2] = -wk2i * x0r - wk2r * x0i;
a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
}
}
void rftfsub(size_t n, float* a, size_t nc, float* c) {
size_t j, k, kk, ks, m;
float wkr, wki, xr, xi, yr, yi;
m = n >> 1;
ks = 2 * nc / m;
kk = 0;
for (j = 2; j < m; j += 2) {
k = n - j;
kk += ks;
wkr = 0.5f - c[nc - kk];
wki = c[kk];
xr = a[j] - a[k];
xi = a[j + 1] + a[k + 1];
yr = wkr * xr - wki * xi;
yi = wkr * xi + wki * xr;
a[j] -= yr;
a[j + 1] -= yi;
a[k] += yr;
a[k + 1] -= yi;
}
}
void rftbsub(size_t n, float* a, size_t nc, float* c) {
size_t j, k, kk, ks, m;
float wkr, wki, xr, xi, yr, yi;
a[1] = -a[1];
m = n >> 1;
ks = 2 * nc / m;
kk = 0;
for (j = 2; j < m; j += 2) {
k = n - j;
kk += ks;
wkr = 0.5f - c[nc - kk];
wki = c[kk];
xr = a[j] - a[k];
xi = a[j + 1] + a[k + 1];
yr = wkr * xr + wki * xi;
yi = wkr * xi - wki * xr;
a[j] -= yr;
a[j + 1] = yi - a[j + 1];
a[k] += yr;
a[k + 1] = yi - a[k + 1];
}
a[m + 1] = -a[m + 1];
}
} // namespace
void WebRtc_rdft(size_t n, int isgn, float* a, size_t* ip, float* w) {
size_t nw, nc;
float xi;
nw = ip[0];
if (n > (nw << 2)) {
nw = n >> 2;
makewt(nw, ip, w);
}
nc = ip[1];
if (n > (nc << 2)) {
nc = n >> 2;
makect(nc, ip, w + nw);
}
if (isgn >= 0) {
if (n > 4) {
bitrv2(n, ip + 2, a);
cftfsub(n, a, w);
rftfsub(n, a, nc, w + nw);
} else if (n == 4) {
cftfsub(n, a, w);
}
xi = a[0] - a[1];
a[0] += a[1];
a[1] = xi;
} else {
a[1] = 0.5f * (a[0] - a[1]);
a[0] -= a[1];
if (n > 4) {
rftbsub(n, a, nc, w + nw);
bitrv2(n, ip + 2, a);
cftbsub(n, a, w);
} else if (n == 4) {
cftfsub(n, a, w);
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,23 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the ../../../LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_
#define COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_
#include <stddef.h>
namespace webrtc {
// Refer to fft4g.c for documentation.
void WebRtc_rdft(size_t n, int isgn, float* a, size_t* ip, float* w);
} // namespace webrtc
#endif // COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_

View File

@ -0,0 +1,84 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/fast_math.h"
#include <math.h>
#include <stdint.h>
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
float FastLog2f(float in) {
RTC_DCHECK_GT(in, .0f);
// Read and interpret float as uint32_t and then cast to float.
// This is done to extract the exponent (bits 30 - 23).
// "Right shift" of the exponent is then performed by multiplying
// with the constant (1/2^23). Finally, we subtract a constant to
// remove the bias (https://en.wikipedia.org/wiki/Exponent_bias).
union {
float dummy;
uint32_t a;
} x = {in};
float out = x.a;
out *= 1.1920929e-7f; // 1/2^23
out -= 126.942695f; // Remove bias.
return out;
}
} // namespace
float SqrtFastApproximation(float f) {
// TODO(peah): Add fast approximate implementation.
return sqrtf(f);
}
float Pow2Approximation(float p) {
// TODO(peah): Add fast approximate implementation.
return powf(2.f, p);
}
float PowApproximation(float x, float p) {
return Pow2Approximation(p * FastLog2f(x));
}
float LogApproximation(float x) {
constexpr float kLogOf2 = 0.69314718056f;
return FastLog2f(x) * kLogOf2;
}
void LogApproximation(rtc::ArrayView<const float> x, rtc::ArrayView<float> y) {
for (size_t k = 0; k < x.size(); ++k) {
y[k] = LogApproximation(x[k]);
}
}
float ExpApproximation(float x) {
constexpr float kLog10Ofe = 0.4342944819f;
return PowApproximation(10.f, x * kLog10Ofe);
}
void ExpApproximation(rtc::ArrayView<const float> x, rtc::ArrayView<float> y) {
for (size_t k = 0; k < x.size(); ++k) {
y[k] = ExpApproximation(x[k]);
}
}
void ExpApproximationSignFlip(rtc::ArrayView<const float> x,
rtc::ArrayView<float> y) {
for (size_t k = 0; k < x.size(); ++k) {
y[k] = ExpApproximation(-x[k]);
}
}
} // namespace webrtc

View File

@ -0,0 +1,38 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_
#define MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_
#include "api/array_view.h"
namespace webrtc {
// Sqrt approximation.
float SqrtFastApproximation(float f);
// Log base conversion log(x) = log2(x)/log2(e).
float LogApproximation(float x);
void LogApproximation(rtc::ArrayView<const float> x, rtc::ArrayView<float> y);
// 2^x approximation.
float Pow2Approximation(float p);
// x^p approximation.
float PowApproximation(float x, float p);
// e^x approximation.
float ExpApproximation(float x);
void ExpApproximation(rtc::ArrayView<const float> x, rtc::ArrayView<float> y);
void ExpApproximationSignFlip(rtc::ArrayView<const float> x,
rtc::ArrayView<float> y);
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/histograms.h"
namespace webrtc {
Histograms::Histograms() {
Clear();
}
void Histograms::Clear() {
lrt_.fill(0);
spectral_flatness_.fill(0);
spectral_diff_.fill(0);
}
void Histograms::Update(const SignalModel& features_) {
// Update the histogram for the LRT.
constexpr float kOneByBinSizeLrt = 1.f / kBinSizeLrt;
if (features_.lrt < kHistogramSize * kBinSizeLrt && features_.lrt >= 0.f) {
++lrt_[kOneByBinSizeLrt * features_.lrt];
}
// Update histogram for the spectral flatness.
constexpr float kOneByBinSizeSpecFlat = 1.f / kBinSizeSpecFlat;
if (features_.spectral_flatness < kHistogramSize * kBinSizeSpecFlat &&
features_.spectral_flatness >= 0.f) {
++spectral_flatness_[features_.spectral_flatness * kOneByBinSizeSpecFlat];
}
// Update histogram for the spectral difference.
constexpr float kOneByBinSizeSpecDiff = 1.f / kBinSizeSpecDiff;
if (features_.spectral_diff < kHistogramSize * kBinSizeSpecDiff &&
features_.spectral_diff >= 0.f) {
++spectral_diff_[features_.spectral_diff * kOneByBinSizeSpecDiff];
}
}
} // namespace webrtc

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_
#define MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_
#include <array>
#include "api/array_view.h"
#include "modules/audio_processing/ns/ns_common.h"
#include "modules/audio_processing/ns/signal_model.h"
namespace webrtc {
constexpr int kHistogramSize = 1000;
// Class for handling the updating of histograms.
class Histograms {
public:
Histograms();
Histograms(const Histograms&) = delete;
Histograms& operator=(const Histograms&) = delete;
// Clears the histograms.
void Clear();
// Extracts thresholds for feature parameters and updates the corresponding
// histogram.
void Update(const SignalModel& features_);
// Methods for accessing the histograms.
rtc::ArrayView<const int, kHistogramSize> get_lrt() const { return lrt_; }
rtc::ArrayView<const int, kHistogramSize> get_spectral_flatness() const {
return spectral_flatness_;
}
rtc::ArrayView<const int, kHistogramSize> get_spectral_diff() const {
return spectral_diff_;
}
private:
std::array<int, kHistogramSize> lrt_;
std::array<int, kHistogramSize> spectral_flatness_;
std::array<int, kHistogramSize> spectral_diff_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_

View File

@ -0,0 +1,195 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/noise_estimator.h"
#include <algorithm>
#include "modules/audio_processing/ns/fast_math.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
// Log(i).
constexpr std::array<float, 129> log_table = {
0.f, 0.f, 0.f, 0.f, 0.f, 1.609438f, 1.791759f,
1.945910f, 2.079442f, 2.197225f, 2.302585f, 2.397895f, 2.484907f, 2.564949f,
2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f,
3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f,
3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f,
3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f,
3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f,
3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f,
4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f,
4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f,
4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f,
4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f,
4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f,
4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f,
4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f,
4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f,
4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f,
4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f,
4.836282f, 4.844187f, 4.852030f};
} // namespace
NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params)
: suppression_params_(suppression_params) {
noise_spectrum_.fill(0.f);
prev_noise_spectrum_.fill(0.f);
conservative_noise_spectrum_.fill(0.f);
parametric_noise_spectrum_.fill(0.f);
}
void NoiseEstimator::PrepareAnalysis() {
std::copy(noise_spectrum_.begin(), noise_spectrum_.end(),
prev_noise_spectrum_.begin());
}
void NoiseEstimator::PreUpdate(
int32_t num_analyzed_frames,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
float signal_spectral_sum) {
quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_);
if (num_analyzed_frames < kShortStartupPhaseBlocks) {
// Compute simplified noise model during startup.
const size_t kStartBand = 5;
float sum_log_i_log_magn = 0.f;
float sum_log_i = 0.f;
float sum_log_i_square = 0.f;
float sum_log_magn = 0.f;
for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) {
float log_i = log_table[i];
sum_log_i += log_i;
sum_log_i_square += log_i * log_i;
float log_signal = LogApproximation(signal_spectrum[i]);
sum_log_magn += log_signal;
sum_log_i_log_magn += log_i * log_signal;
}
// Estimate the parameter for the level of the white noise.
constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1;
white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 *
suppression_params_.over_subtraction_factor;
// Estimate pink noise parameters.
float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) -
sum_log_i * sum_log_i;
float num =
sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn;
RTC_DCHECK_NE(denom, 0.f);
float pink_noise_adjustment = num / denom;
// Constrain the estimated spectrum to be positive.
pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f);
pink_noise_numerator_ += pink_noise_adjustment;
num = sum_log_i * sum_log_magn -
(kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn;
RTC_DCHECK_NE(denom, 0.f);
pink_noise_adjustment = num / denom;
// Constrain the pink noise power to be in the interval [0, 1].
pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f);
pink_noise_exp_ += pink_noise_adjustment;
const float one_by_num_analyzed_frames_plus_1 =
1.f / (num_analyzed_frames + 1.f);
// Calculate the frequency-independent parts of parametric noise estimate.
float parametric_exp = 0.f;
float parametric_num = 0.f;
if (pink_noise_exp_ > 0.f) {
// Use pink noise estimate.
parametric_num = ExpApproximation(pink_noise_numerator_ *
one_by_num_analyzed_frames_plus_1);
parametric_num *= num_analyzed_frames + 1.f;
parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1;
}
constexpr float kOneByShortStartupPhaseBlocks =
1.f / kShortStartupPhaseBlocks;
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
// Estimate the background noise using the white and pink noise
// parameters.
if (pink_noise_exp_ == 0.f) {
// Use white noise estimate.
parametric_noise_spectrum_[i] = white_noise_level_;
} else {
// Use pink noise estimate.
float use_band = i < kStartBand ? kStartBand : i;
float denom = PowApproximation(use_band, parametric_exp);
RTC_DCHECK_NE(denom, 0.f);
parametric_noise_spectrum_[i] = parametric_num / denom;
}
}
// Weight quantile noise with modeled noise.
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
noise_spectrum_[i] *= num_analyzed_frames;
float tmp = parametric_noise_spectrum_[i] *
(kShortStartupPhaseBlocks - num_analyzed_frames);
noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1;
noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks;
}
}
}
void NoiseEstimator::PostUpdate(
rtc::ArrayView<const float> speech_probability,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) {
// Time-avg parameter for noise_spectrum update.
constexpr float kNoiseUpdate = 0.9f;
float gamma = kNoiseUpdate;
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
const float prob_speech = speech_probability[i];
const float prob_non_speech = 1.f - prob_speech;
// Temporary noise update used for speech frames if update value is less
// than previous.
float noise_update_tmp =
gamma * prev_noise_spectrum_[i] +
(1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
prob_speech * prev_noise_spectrum_[i]);
// Time-constant based on speech/noise_spectrum state.
float gamma_old = gamma;
// Increase gamma for frame likely to be seech.
constexpr float kProbRange = .2f;
gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate;
// Conservative noise_spectrum update.
if (prob_speech < kProbRange) {
conservative_noise_spectrum_[i] +=
0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]);
}
// Noise_spectrum update.
if (gamma == gamma_old) {
noise_spectrum_[i] = noise_update_tmp;
} else {
noise_spectrum_[i] =
gamma * prev_noise_spectrum_[i] +
(1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
prob_speech * prev_noise_spectrum_[i]);
// Allow for noise_spectrum update downwards: If noise_spectrum update
// decreases the noise_spectrum, it is safe, so allow it to happen.
noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp);
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_
#include <array>
#include "api/array_view.h"
#include "modules/audio_processing/ns/ns_common.h"
#include "modules/audio_processing/ns/quantile_noise_estimator.h"
#include "modules/audio_processing/ns/suppression_params.h"
namespace webrtc {
// Class for estimating the spectral characteristics of the noise in an incoming
// signal.
class NoiseEstimator {
public:
explicit NoiseEstimator(const SuppressionParams& suppression_params);
// Prepare the estimator for analysis of a new frame.
void PrepareAnalysis();
// Performs the first step of the estimator update.
void PreUpdate(int32_t num_analyzed_frames,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
float signal_spectral_sum);
// Performs the second step of the estimator update.
void PostUpdate(
rtc::ArrayView<const float> speech_probability,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum);
// Returns the noise spectral estimate.
rtc::ArrayView<const float, kFftSizeBy2Plus1> get_noise_spectrum() const {
return noise_spectrum_;
}
// Returns the noise from the previous frame.
rtc::ArrayView<const float, kFftSizeBy2Plus1> get_prev_noise_spectrum()
const {
return prev_noise_spectrum_;
}
// Returns a noise spectral estimate based on white and pink noise parameters.
rtc::ArrayView<const float, kFftSizeBy2Plus1> get_parametric_noise_spectrum()
const {
return parametric_noise_spectrum_;
}
rtc::ArrayView<const float, kFftSizeBy2Plus1>
get_conservative_noise_spectrum() const {
return conservative_noise_spectrum_;
}
private:
const SuppressionParams& suppression_params_;
float white_noise_level_ = 0.f;
float pink_noise_numerator_ = 0.f;
float pink_noise_exp_ = 0.f;
std::array<float, kFftSizeBy2Plus1> prev_noise_spectrum_;
std::array<float, kFftSizeBy2Plus1> conservative_noise_spectrum_;
std::array<float, kFftSizeBy2Plus1> parametric_noise_spectrum_;
std::array<float, kFftSizeBy2Plus1> noise_spectrum_;
QuantileNoiseEstimator quantile_noise_estimator_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_

View File

@ -0,0 +1,556 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/noise_suppressor.h"
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include "modules/audio_processing/ns/fast_math.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
// Maps sample rate to number of bands.
size_t NumBandsForRate(size_t sample_rate_hz) {
RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000 ||
sample_rate_hz == 48000);
return sample_rate_hz / 16000;
}
// Maximum number of channels for which the channel data is stored on
// the stack. If the number of channels are larger than this, they are stored
// using scratch memory that is pre-allocated on the heap. The reason for this
// partitioning is not to waste heap space for handling the more common numbers
// of channels, while at the same time not limiting the support for higher
// numbers of channels by enforcing the channel data to be stored on the
// stack using a fixed maximum value.
constexpr size_t kMaxNumChannelsOnStack = 2;
// Chooses the number of channels to store on the heap when that is required due
// to the number of channels being larger than the pre-defined number
// of channels to store on the stack.
size_t NumChannelsOnHeap(size_t num_channels) {
return num_channels > kMaxNumChannelsOnStack ? num_channels : 0;
}
// Hybrib Hanning and flat window for the filterbank.
constexpr std::array<float, 96> kBlocks160w256FirstHalf = {
0.00000000f, 0.01636173f, 0.03271908f, 0.04906767f, 0.06540313f,
0.08172107f, 0.09801714f, 0.11428696f, 0.13052619f, 0.14673047f,
0.16289547f, 0.17901686f, 0.19509032f, 0.21111155f, 0.22707626f,
0.24298018f, 0.25881905f, 0.27458862f, 0.29028468f, 0.30590302f,
0.32143947f, 0.33688985f, 0.35225005f, 0.36751594f, 0.38268343f,
0.39774847f, 0.41270703f, 0.42755509f, 0.44228869f, 0.45690388f,
0.47139674f, 0.48576339f, 0.50000000f, 0.51410274f, 0.52806785f,
0.54189158f, 0.55557023f, 0.56910015f, 0.58247770f, 0.59569930f,
0.60876143f, 0.62166057f, 0.63439328f, 0.64695615f, 0.65934582f,
0.67155895f, 0.68359230f, 0.69544264f, 0.70710678f, 0.71858162f,
0.72986407f, 0.74095113f, 0.75183981f, 0.76252720f, 0.77301045f,
0.78328675f, 0.79335334f, 0.80320753f, 0.81284668f, 0.82226822f,
0.83146961f, 0.84044840f, 0.84920218f, 0.85772861f, 0.86602540f,
0.87409034f, 0.88192126f, 0.88951608f, 0.89687274f, 0.90398929f,
0.91086382f, 0.91749450f, 0.92387953f, 0.93001722f, 0.93590593f,
0.94154407f, 0.94693013f, 0.95206268f, 0.95694034f, 0.96156180f,
0.96592583f, 0.97003125f, 0.97387698f, 0.97746197f, 0.98078528f,
0.98384601f, 0.98664333f, 0.98917651f, 0.99144486f, 0.99344778f,
0.99518473f, 0.99665524f, 0.99785892f, 0.99879546f, 0.99946459f,
0.99986614f};
// Applies the filterbank window to a buffer.
void ApplyFilterBankWindow(rtc::ArrayView<float, kFftSize> x) {
for (size_t i = 0; i < 96; ++i) {
x[i] = kBlocks160w256FirstHalf[i] * x[i];
}
for (size_t i = 161, k = 95; i < kFftSize; ++i, --k) {
RTC_DCHECK_NE(0, k);
x[i] = kBlocks160w256FirstHalf[k] * x[i];
}
}
// Extends a frame with previous data.
void FormExtendedFrame(rtc::ArrayView<const float, kNsFrameSize> frame,
rtc::ArrayView<float, kFftSize - kNsFrameSize> old_data,
rtc::ArrayView<float, kFftSize> extended_frame) {
std::copy(old_data.begin(), old_data.end(), extended_frame.begin());
std::copy(frame.begin(), frame.end(),
extended_frame.begin() + old_data.size());
std::copy(extended_frame.end() - old_data.size(), extended_frame.end(),
old_data.begin());
}
// Uses overlap-and-add to produce an output frame.
void OverlapAndAdd(rtc::ArrayView<const float, kFftSize> extended_frame,
rtc::ArrayView<float, kOverlapSize> overlap_memory,
rtc::ArrayView<float, kNsFrameSize> output_frame) {
for (size_t i = 0; i < kOverlapSize; ++i) {
output_frame[i] = overlap_memory[i] + extended_frame[i];
}
std::copy(extended_frame.begin() + kOverlapSize,
extended_frame.begin() + kNsFrameSize,
output_frame.begin() + kOverlapSize);
std::copy(extended_frame.begin() + kNsFrameSize, extended_frame.end(),
overlap_memory.begin());
}
// Produces a delayed frame.
void DelaySignal(rtc::ArrayView<const float, kNsFrameSize> frame,
rtc::ArrayView<float, kFftSize - kNsFrameSize> delay_buffer,
rtc::ArrayView<float, kNsFrameSize> delayed_frame) {
constexpr size_t kSamplesFromFrame = kNsFrameSize - (kFftSize - kNsFrameSize);
std::copy(delay_buffer.begin(), delay_buffer.end(), delayed_frame.begin());
std::copy(frame.begin(), frame.begin() + kSamplesFromFrame,
delayed_frame.begin() + delay_buffer.size());
std::copy(frame.begin() + kSamplesFromFrame, frame.end(),
delay_buffer.begin());
}
// Computes the energy of an extended frame.
float ComputeEnergyOfExtendedFrame(rtc::ArrayView<const float, kFftSize> x) {
float energy = 0.f;
for (float x_k : x) {
energy += x_k * x_k;
}
return energy;
}
// Computes the energy of an extended frame based on its subcomponents.
float ComputeEnergyOfExtendedFrame(
rtc::ArrayView<const float, kNsFrameSize> frame,
rtc::ArrayView<float, kFftSize - kNsFrameSize> old_data) {
float energy = 0.f;
for (float v : old_data) {
energy += v * v;
}
for (float v : frame) {
energy += v * v;
}
return energy;
}
// Computes the magnitude spectrum based on an FFT output.
void ComputeMagnitudeSpectrum(
rtc::ArrayView<const float, kFftSize> real,
rtc::ArrayView<const float, kFftSize> imag,
rtc::ArrayView<float, kFftSizeBy2Plus1> signal_spectrum) {
signal_spectrum[0] = fabsf(real[0]) + 1.f;
signal_spectrum[kFftSizeBy2Plus1 - 1] =
fabsf(real[kFftSizeBy2Plus1 - 1]) + 1.f;
for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) {
signal_spectrum[i] =
SqrtFastApproximation(real[i] * real[i] + imag[i] * imag[i]) + 1.f;
}
}
// Compute prior and post SNR.
void ComputeSnr(rtc::ArrayView<const float, kFftSizeBy2Plus1> filter,
rtc::ArrayView<const float> prev_signal_spectrum,
rtc::ArrayView<const float> signal_spectrum,
rtc::ArrayView<const float> prev_noise_spectrum,
rtc::ArrayView<const float> noise_spectrum,
rtc::ArrayView<float> prior_snr,
rtc::ArrayView<float> post_snr) {
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
// Previous post SNR.
// Previous estimate: based on previous frame with gain filter.
float prev_estimate = prev_signal_spectrum[i] /
(prev_noise_spectrum[i] + 0.0001f) * filter[i];
// Post SNR.
if (signal_spectrum[i] > noise_spectrum[i]) {
post_snr[i] = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f;
} else {
post_snr[i] = 0.f;
}
// The directed decision estimate of the prior SNR is a sum the current and
// previous estimates.
prior_snr[i] = 0.98f * prev_estimate + (1.f - 0.98f) * post_snr[i];
}
}
// Computes the attenuating gain for the noise suppression of the upper bands.
float ComputeUpperBandsGain(
float minimum_attenuating_gain,
rtc::ArrayView<const float, kFftSizeBy2Plus1> filter,
rtc::ArrayView<const float> speech_probability,
rtc::ArrayView<const float, kFftSizeBy2Plus1> prev_analysis_signal_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) {
// Average speech prob and filter gain for the end of the lowest band.
constexpr int kNumAvgBins = 32;
constexpr float kOneByNumAvgBins = 1.f / kNumAvgBins;
float avg_prob_speech = 0.f;
float avg_filter_gain = 0.f;
for (size_t i = kFftSizeBy2Plus1 - kNumAvgBins - 1; i < kFftSizeBy2Plus1 - 1;
i++) {
avg_prob_speech += speech_probability[i];
avg_filter_gain += filter[i];
}
avg_prob_speech = avg_prob_speech * kOneByNumAvgBins;
avg_filter_gain = avg_filter_gain * kOneByNumAvgBins;
// If the speech was suppressed by a component between Analyze and Process, an
// example being by an AEC, it should not be considered speech for the purpose
// of high band suppression. To that end, the speech probability is scaled
// accordingly.
float sum_analysis_spectrum = 0.f;
float sum_processing_spectrum = 0.f;
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
sum_analysis_spectrum += prev_analysis_signal_spectrum[i];
sum_processing_spectrum += signal_spectrum[i];
}
// The magnitude spectrum computation enforces the spectrum to be strictly
// positive.
RTC_DCHECK_GT(sum_analysis_spectrum, 0.f);
avg_prob_speech *= sum_processing_spectrum / sum_analysis_spectrum;
// Compute gain based on speech probability.
float gain =
0.5f * (1.f + static_cast<float>(tanh(2.f * avg_prob_speech - 1.f)));
// Combine gain with low band gain.
if (avg_prob_speech >= 0.5f) {
gain = 0.25f * gain + 0.75f * avg_filter_gain;
} else {
gain = 0.5f * gain + 0.5f * avg_filter_gain;
}
// Make sure gain is within flooring range.
return std::min(std::max(gain, minimum_attenuating_gain), 1.f);
}
} // namespace
NoiseSuppressor::ChannelState::ChannelState(
const SuppressionParams& suppression_params,
size_t num_bands)
: wiener_filter(suppression_params),
noise_estimator(suppression_params),
process_delay_memory(num_bands > 1 ? num_bands - 1 : 0) {
analyze_analysis_memory.fill(0.f);
prev_analysis_signal_spectrum.fill(1.f);
process_analysis_memory.fill(0.f);
process_synthesis_memory.fill(0.f);
for (auto& d : process_delay_memory) {
d.fill(0.f);
}
}
NoiseSuppressor::NoiseSuppressor(const NsConfig& config,
size_t sample_rate_hz,
size_t num_channels)
: num_bands_(NumBandsForRate(sample_rate_hz)),
num_channels_(num_channels),
suppression_params_(config.target_level),
filter_bank_states_heap_(NumChannelsOnHeap(num_channels_)),
upper_band_gains_heap_(NumChannelsOnHeap(num_channels_)),
energies_before_filtering_heap_(NumChannelsOnHeap(num_channels_)),
gain_adjustments_heap_(NumChannelsOnHeap(num_channels_)),
channels_(num_channels_) {
for (size_t ch = 0; ch < num_channels_; ++ch) {
channels_[ch] =
std::make_unique<ChannelState>(suppression_params_, num_bands_);
}
}
void NoiseSuppressor::AggregateWienerFilters(
rtc::ArrayView<float, kFftSizeBy2Plus1> filter) const {
rtc::ArrayView<const float, kFftSizeBy2Plus1> filter0 =
channels_[0]->wiener_filter.get_filter();
std::copy(filter0.begin(), filter0.end(), filter.begin());
for (size_t ch = 1; ch < num_channels_; ++ch) {
rtc::ArrayView<const float, kFftSizeBy2Plus1> filter_ch =
channels_[ch]->wiener_filter.get_filter();
for (size_t k = 0; k < kFftSizeBy2Plus1; ++k) {
filter[k] = std::min(filter[k], filter_ch[k]);
}
}
}
void NoiseSuppressor::Analyze(const AudioBuffer& audio) {
// Prepare the noise estimator for the analysis stage.
for (size_t ch = 0; ch < num_channels_; ++ch) {
channels_[ch]->noise_estimator.PrepareAnalysis();
}
// Check for zero frames.
bool zero_frame = true;
for (size_t ch = 0; ch < num_channels_; ++ch) {
rtc::ArrayView<const float, kNsFrameSize> y_band0(
&audio.split_bands_const(ch)[0][0], kNsFrameSize);
float energy = ComputeEnergyOfExtendedFrame(
y_band0, channels_[ch]->analyze_analysis_memory);
if (energy > 0.f) {
zero_frame = false;
break;
}
}
if (zero_frame) {
// We want to avoid updating statistics in this case:
// Updating feature statistics when we have zeros only will cause
// thresholds to move towards zero signal situations. This in turn has the
// effect that once the signal is "turned on" (non-zero values) everything
// will be treated as speech and there is no noise suppression effect.
// Depending on the duration of the inactive signal it takes a
// considerable amount of time for the system to learn what is noise and
// what is speech.
return;
}
// Only update analysis counter for frames that are properly analyzed.
if (++num_analyzed_frames_ < 0) {
num_analyzed_frames_ = 0;
}
// Analyze all channels.
for (size_t ch = 0; ch < num_channels_; ++ch) {
std::unique_ptr<ChannelState>& ch_p = channels_[ch];
rtc::ArrayView<const float, kNsFrameSize> y_band0(
&audio.split_bands_const(ch)[0][0], kNsFrameSize);
// Form an extended frame and apply analysis filter bank windowing.
std::array<float, kFftSize> extended_frame;
FormExtendedFrame(y_band0, ch_p->analyze_analysis_memory, extended_frame);
ApplyFilterBankWindow(extended_frame);
// Compute the magnitude spectrum.
std::array<float, kFftSize> real;
std::array<float, kFftSize> imag;
fft_.Fft(extended_frame, real, imag);
std::array<float, kFftSizeBy2Plus1> signal_spectrum;
ComputeMagnitudeSpectrum(real, imag, signal_spectrum);
// Compute energies.
float signal_energy = 0.f;
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
signal_energy += real[i] * real[i] + imag[i] * imag[i];
}
signal_energy /= kFftSizeBy2Plus1;
float signal_spectral_sum = 0.f;
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
signal_spectral_sum += signal_spectrum[i];
}
// Estimate the noise spectra and the probability estimates of speech
// presence.
ch_p->noise_estimator.PreUpdate(num_analyzed_frames_, signal_spectrum,
signal_spectral_sum);
std::array<float, kFftSizeBy2Plus1> post_snr;
std::array<float, kFftSizeBy2Plus1> prior_snr;
ComputeSnr(ch_p->wiener_filter.get_filter(),
ch_p->prev_analysis_signal_spectrum, signal_spectrum,
ch_p->noise_estimator.get_prev_noise_spectrum(),
ch_p->noise_estimator.get_noise_spectrum(), prior_snr, post_snr);
ch_p->speech_probability_estimator.Update(
num_analyzed_frames_, prior_snr, post_snr,
ch_p->noise_estimator.get_conservative_noise_spectrum(),
signal_spectrum, signal_spectral_sum, signal_energy);
ch_p->noise_estimator.PostUpdate(
ch_p->speech_probability_estimator.get_probability(), signal_spectrum);
// Store the magnitude spectrum to make it avalilable for the process
// method.
std::copy(signal_spectrum.begin(), signal_spectrum.end(),
ch_p->prev_analysis_signal_spectrum.begin());
}
}
void NoiseSuppressor::Process(AudioBuffer* audio) {
// Select the space for storing data during the processing.
std::array<FilterBankState, kMaxNumChannelsOnStack> filter_bank_states_stack;
rtc::ArrayView<FilterBankState> filter_bank_states(
filter_bank_states_stack.data(), num_channels_);
std::array<float, kMaxNumChannelsOnStack> upper_band_gains_stack;
rtc::ArrayView<float> upper_band_gains(upper_band_gains_stack.data(),
num_channels_);
std::array<float, kMaxNumChannelsOnStack> energies_before_filtering_stack;
rtc::ArrayView<float> energies_before_filtering(
energies_before_filtering_stack.data(), num_channels_);
std::array<float, kMaxNumChannelsOnStack> gain_adjustments_stack;
rtc::ArrayView<float> gain_adjustments(gain_adjustments_stack.data(),
num_channels_);
if (NumChannelsOnHeap(num_channels_) > 0) {
// If the stack-allocated space is too small, use the heap for storing the
// data.
filter_bank_states = rtc::ArrayView<FilterBankState>(
filter_bank_states_heap_.data(), num_channels_);
upper_band_gains =
rtc::ArrayView<float>(upper_band_gains_heap_.data(), num_channels_);
energies_before_filtering = rtc::ArrayView<float>(
energies_before_filtering_heap_.data(), num_channels_);
gain_adjustments =
rtc::ArrayView<float>(gain_adjustments_heap_.data(), num_channels_);
}
// Compute the suppression filters for all channels.
for (size_t ch = 0; ch < num_channels_; ++ch) {
// Form an extended frame and apply analysis filter bank windowing.
rtc::ArrayView<float, kNsFrameSize> y_band0(&audio->split_bands(ch)[0][0],
kNsFrameSize);
FormExtendedFrame(y_band0, channels_[ch]->process_analysis_memory,
filter_bank_states[ch].extended_frame);
ApplyFilterBankWindow(filter_bank_states[ch].extended_frame);
energies_before_filtering[ch] =
ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame);
// Perform filter bank analysis and compute the magnitude spectrum.
fft_.Fft(filter_bank_states[ch].extended_frame, filter_bank_states[ch].real,
filter_bank_states[ch].imag);
std::array<float, kFftSizeBy2Plus1> signal_spectrum;
ComputeMagnitudeSpectrum(filter_bank_states[ch].real,
filter_bank_states[ch].imag, signal_spectrum);
// Compute the frequency domain gain filter for noise attenuation.
channels_[ch]->wiener_filter.Update(
num_analyzed_frames_,
channels_[ch]->noise_estimator.get_noise_spectrum(),
channels_[ch]->noise_estimator.get_prev_noise_spectrum(),
channels_[ch]->noise_estimator.get_parametric_noise_spectrum(),
signal_spectrum);
if (num_bands_ > 1) {
// Compute the time-domain gain for attenuating the noise in the upper
// bands.
upper_band_gains[ch] = ComputeUpperBandsGain(
suppression_params_.minimum_attenuating_gain,
channels_[ch]->wiener_filter.get_filter(),
channels_[ch]->speech_probability_estimator.get_probability(),
channels_[ch]->prev_analysis_signal_spectrum, signal_spectrum);
}
}
// Only do the below processing if the output of the audio processing module
// is used.
if (!capture_output_used_) {
return;
}
// Aggregate the Wiener filters for all channels.
std::array<float, kFftSizeBy2Plus1> filter_data;
rtc::ArrayView<const float, kFftSizeBy2Plus1> filter = filter_data;
if (num_channels_ == 1) {
filter = channels_[0]->wiener_filter.get_filter();
} else {
AggregateWienerFilters(filter_data);
}
for (size_t ch = 0; ch < num_channels_; ++ch) {
// Apply the filter to the lower band.
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
filter_bank_states[ch].real[i] *= filter[i];
filter_bank_states[ch].imag[i] *= filter[i];
}
}
// Perform filter bank synthesis
for (size_t ch = 0; ch < num_channels_; ++ch) {
fft_.Ifft(filter_bank_states[ch].real, filter_bank_states[ch].imag,
filter_bank_states[ch].extended_frame);
}
for (size_t ch = 0; ch < num_channels_; ++ch) {
const float energy_after_filtering =
ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame);
// Apply synthesis window.
ApplyFilterBankWindow(filter_bank_states[ch].extended_frame);
// Compute the adjustment of the noise attenuation filter based on the
// effect of the attenuation.
gain_adjustments[ch] =
channels_[ch]->wiener_filter.ComputeOverallScalingFactor(
num_analyzed_frames_,
channels_[ch]->speech_probability_estimator.get_prior_probability(),
energies_before_filtering[ch], energy_after_filtering);
}
// Select and apply adjustment of the noise attenuation filter based on the
// effect of the attenuation.
float gain_adjustment = gain_adjustments[0];
for (size_t ch = 1; ch < num_channels_; ++ch) {
gain_adjustment = std::min(gain_adjustment, gain_adjustments[ch]);
}
for (size_t ch = 0; ch < num_channels_; ++ch) {
for (size_t i = 0; i < kFftSize; ++i) {
filter_bank_states[ch].extended_frame[i] =
gain_adjustment * filter_bank_states[ch].extended_frame[i];
}
}
// Use overlap-and-add to form the output frame of the lowest band.
for (size_t ch = 0; ch < num_channels_; ++ch) {
rtc::ArrayView<float, kNsFrameSize> y_band0(&audio->split_bands(ch)[0][0],
kNsFrameSize);
OverlapAndAdd(filter_bank_states[ch].extended_frame,
channels_[ch]->process_synthesis_memory, y_band0);
}
if (num_bands_ > 1) {
// Select the noise attenuating gain to apply to the upper band.
float upper_band_gain = upper_band_gains[0];
for (size_t ch = 1; ch < num_channels_; ++ch) {
upper_band_gain = std::min(upper_band_gain, upper_band_gains[ch]);
}
// Process the upper bands.
for (size_t ch = 0; ch < num_channels_; ++ch) {
for (size_t b = 1; b < num_bands_; ++b) {
// Delay the upper bands to match the delay of the filterbank applied to
// the lowest band.
rtc::ArrayView<float, kNsFrameSize> y_band(
&audio->split_bands(ch)[b][0], kNsFrameSize);
std::array<float, kNsFrameSize> delayed_frame;
DelaySignal(y_band, channels_[ch]->process_delay_memory[b - 1],
delayed_frame);
// Apply the time-domain noise-attenuating gain.
for (size_t j = 0; j < kNsFrameSize; j++) {
y_band[j] = upper_band_gain * delayed_frame[j];
}
}
}
}
// Limit the output the allowed range.
for (size_t ch = 0; ch < num_channels_; ++ch) {
for (size_t b = 0; b < num_bands_; ++b) {
rtc::ArrayView<float, kNsFrameSize> y_band(&audio->split_bands(ch)[b][0],
kNsFrameSize);
for (size_t j = 0; j < kNsFrameSize; j++) {
y_band[j] = std::min(std::max(y_band[j], -32768.f), 32767.f);
}
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,92 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_
#define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_
#include <memory>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/ns/noise_estimator.h"
#include "modules/audio_processing/ns/ns_common.h"
#include "modules/audio_processing/ns/ns_config.h"
#include "modules/audio_processing/ns/ns_fft.h"
#include "modules/audio_processing/ns/speech_probability_estimator.h"
#include "modules/audio_processing/ns/wiener_filter.h"
namespace webrtc {
// Class for suppressing noise in a signal.
class NoiseSuppressor {
public:
NoiseSuppressor(const NsConfig& config,
size_t sample_rate_hz,
size_t num_channels);
NoiseSuppressor(const NoiseSuppressor&) = delete;
NoiseSuppressor& operator=(const NoiseSuppressor&) = delete;
// Analyses the signal (typically applied before the AEC to avoid analyzing
// any comfort noise signal).
void Analyze(const AudioBuffer& audio);
// Applies noise suppression.
void Process(AudioBuffer* audio);
// Specifies whether the capture output will be used. The purpose of this is
// to allow the noise suppressor to deactivate some of the processing when the
// resulting output is anyway not used, for instance when the endpoint is
// muted.
void SetCaptureOutputUsage(bool capture_output_used) {
capture_output_used_ = capture_output_used;
}
private:
const size_t num_bands_;
const size_t num_channels_;
const SuppressionParams suppression_params_;
int32_t num_analyzed_frames_ = -1;
NrFft fft_;
bool capture_output_used_ = true;
struct ChannelState {
ChannelState(const SuppressionParams& suppression_params, size_t num_bands);
SpeechProbabilityEstimator speech_probability_estimator;
WienerFilter wiener_filter;
NoiseEstimator noise_estimator;
std::array<float, kFftSizeBy2Plus1> prev_analysis_signal_spectrum;
std::array<float, kFftSize - kNsFrameSize> analyze_analysis_memory;
std::array<float, kOverlapSize> process_analysis_memory;
std::array<float, kOverlapSize> process_synthesis_memory;
std::vector<std::array<float, kOverlapSize>> process_delay_memory;
};
struct FilterBankState {
std::array<float, kFftSize> real;
std::array<float, kFftSize> imag;
std::array<float, kFftSize> extended_frame;
};
std::vector<FilterBankState> filter_bank_states_heap_;
std::vector<float> upper_band_gains_heap_;
std::vector<float> energies_before_filtering_heap_;
std::vector<float> gain_adjustments_heap_;
std::vector<std::unique_ptr<ChannelState>> channels_;
// Aggregates the Wiener filters into a single filter to use.
void AggregateWienerFilters(
rtc::ArrayView<float, kFftSizeBy2Plus1> filter) const;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_
#define MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_
#include <cstddef>
namespace webrtc {
constexpr size_t kFftSize = 256;
constexpr size_t kFftSizeBy2Plus1 = kFftSize / 2 + 1;
constexpr size_t kNsFrameSize = 160;
constexpr size_t kOverlapSize = kFftSize - kNsFrameSize;
constexpr int kShortStartupPhaseBlocks = 50;
constexpr int kLongStartupPhaseBlocks = 200;
constexpr int kFeatureUpdateWindowSize = 500;
constexpr float kLtrFeatureThr = 0.5f;
constexpr float kBinSizeLrt = 0.1f;
constexpr float kBinSizeSpecFlat = 0.05f;
constexpr float kBinSizeSpecDiff = 0.1f;
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_

View File

@ -0,0 +1,24 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_
#define MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_
namespace webrtc {
// Config struct for the noise suppressor
struct NsConfig {
enum class SuppressionLevel { k6dB, k12dB, k18dB, k21dB };
SuppressionLevel target_level = SuppressionLevel::k12dB;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/ns_fft.h"
#include "common_audio/third_party/ooura/fft_size_256/fft4g.h"
namespace webrtc {
NrFft::NrFft() : bit_reversal_state_(kFftSize / 2), tables_(kFftSize / 2) {
// Initialize WebRtc_rdt (setting (bit_reversal_state_[0] to 0 triggers
// initialization)
bit_reversal_state_[0] = 0.f;
std::array<float, kFftSize> tmp_buffer;
tmp_buffer.fill(0.f);
WebRtc_rdft(kFftSize, 1, tmp_buffer.data(), bit_reversal_state_.data(),
tables_.data());
}
void NrFft::Fft(rtc::ArrayView<float, kFftSize> time_data,
rtc::ArrayView<float, kFftSize> real,
rtc::ArrayView<float, kFftSize> imag) {
WebRtc_rdft(kFftSize, 1, time_data.data(), bit_reversal_state_.data(),
tables_.data());
imag[0] = 0;
real[0] = time_data[0];
imag[kFftSizeBy2Plus1 - 1] = 0;
real[kFftSizeBy2Plus1 - 1] = time_data[1];
for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) {
real[i] = time_data[2 * i];
imag[i] = time_data[2 * i + 1];
}
}
void NrFft::Ifft(rtc::ArrayView<const float> real,
rtc::ArrayView<const float> imag,
rtc::ArrayView<float> time_data) {
time_data[0] = real[0];
time_data[1] = real[kFftSizeBy2Plus1 - 1];
for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) {
time_data[2 * i] = real[i];
time_data[2 * i + 1] = imag[i];
}
WebRtc_rdft(kFftSize, -1, time_data.data(), bit_reversal_state_.data(),
tables_.data());
// Scale the output
constexpr float kScaling = 2.f / kFftSize;
for (float& d : time_data) {
d *= kScaling;
}
}
} // namespace webrtc

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_
#define MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/ns/ns_common.h"
namespace webrtc {
// Wrapper class providing 256 point FFT functionality.
class NrFft {
public:
NrFft();
NrFft(const NrFft&) = delete;
NrFft& operator=(const NrFft&) = delete;
// Transforms the signal from time to frequency domain.
void Fft(rtc::ArrayView<float, kFftSize> time_data,
rtc::ArrayView<float, kFftSize> real,
rtc::ArrayView<float, kFftSize> imag);
// Transforms the signal from frequency to time domain.
void Ifft(rtc::ArrayView<const float> real,
rtc::ArrayView<const float> imag,
rtc::ArrayView<float> time_data);
private:
std::vector<size_t> bit_reversal_state_;
std::vector<float> tables_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_

View File

@ -0,0 +1,18 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/prior_signal_model.h"
namespace webrtc {
PriorSignalModel::PriorSignalModel(float lrt_initial_value)
: lrt(lrt_initial_value) {}
} // namespace webrtc

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_
#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_
namespace webrtc {
// Struct for storing the prior signal model parameters.
struct PriorSignalModel {
explicit PriorSignalModel(float lrt_initial_value);
PriorSignalModel(const PriorSignalModel&) = delete;
PriorSignalModel& operator=(const PriorSignalModel&) = delete;
float lrt;
float flatness_threshold = .5f;
float template_diff_threshold = .5f;
float lrt_weighting = 1.f;
float flatness_weighting = 0.f;
float difference_weighting = 0.f;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_

View File

@ -0,0 +1,171 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/prior_signal_model_estimator.h"
#include <math.h>
#include <algorithm>
#include "modules/audio_processing/ns/fast_math.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
// Identifies the first of the two largest peaks in the histogram.
void FindFirstOfTwoLargestPeaks(
float bin_size,
rtc::ArrayView<const int, kHistogramSize> spectral_flatness,
float* peak_position,
int* peak_weight) {
RTC_DCHECK(peak_position);
RTC_DCHECK(peak_weight);
int peak_value = 0;
int secondary_peak_value = 0;
*peak_position = 0.f;
float secondary_peak_position = 0.f;
*peak_weight = 0;
int secondary_peak_weight = 0;
// Identify the two largest peaks.
for (int i = 0; i < kHistogramSize; ++i) {
const float bin_mid = (i + 0.5f) * bin_size;
if (spectral_flatness[i] > peak_value) {
// Found new "first" peak candidate.
secondary_peak_value = peak_value;
secondary_peak_weight = *peak_weight;
secondary_peak_position = *peak_position;
peak_value = spectral_flatness[i];
*peak_weight = spectral_flatness[i];
*peak_position = bin_mid;
} else if (spectral_flatness[i] > secondary_peak_value) {
// Found new "second" peak candidate.
secondary_peak_value = spectral_flatness[i];
secondary_peak_weight = spectral_flatness[i];
secondary_peak_position = bin_mid;
}
}
// Merge the peaks if they are close.
if ((fabs(secondary_peak_position - *peak_position) < 2 * bin_size) &&
(secondary_peak_weight > 0.5f * (*peak_weight))) {
*peak_weight += secondary_peak_weight;
*peak_position = 0.5f * (*peak_position + secondary_peak_position);
}
}
void UpdateLrt(rtc::ArrayView<const int, kHistogramSize> lrt_histogram,
float* prior_model_lrt,
bool* low_lrt_fluctuations) {
RTC_DCHECK(prior_model_lrt);
RTC_DCHECK(low_lrt_fluctuations);
float average = 0.f;
float average_compl = 0.f;
float average_squared = 0.f;
int count = 0;
for (int i = 0; i < 10; ++i) {
float bin_mid = (i + 0.5f) * kBinSizeLrt;
average += lrt_histogram[i] * bin_mid;
count += lrt_histogram[i];
}
if (count > 0) {
average = average / count;
}
for (int i = 0; i < kHistogramSize; ++i) {
float bin_mid = (i + 0.5f) * kBinSizeLrt;
average_squared += lrt_histogram[i] * bin_mid * bin_mid;
average_compl += lrt_histogram[i] * bin_mid;
}
constexpr float kOneFeatureUpdateWindowSize = 1.f / kFeatureUpdateWindowSize;
average_squared = average_squared * kOneFeatureUpdateWindowSize;
average_compl = average_compl * kOneFeatureUpdateWindowSize;
// Fluctuation limit of LRT feature.
*low_lrt_fluctuations = average_squared - average * average_compl < 0.05f;
// Get threshold for LRT feature.
constexpr float kMaxLrt = 1.f;
constexpr float kMinLrt = .2f;
if (*low_lrt_fluctuations) {
// Very low fluctuation, so likely noise.
*prior_model_lrt = kMaxLrt;
} else {
*prior_model_lrt = std::min(kMaxLrt, std::max(kMinLrt, 1.2f * average));
}
}
} // namespace
PriorSignalModelEstimator::PriorSignalModelEstimator(float lrt_initial_value)
: prior_model_(lrt_initial_value) {}
// Extract thresholds for feature parameters and computes the threshold/weights.
void PriorSignalModelEstimator::Update(const Histograms& histograms) {
bool low_lrt_fluctuations;
UpdateLrt(histograms.get_lrt(), &prior_model_.lrt, &low_lrt_fluctuations);
// For spectral flatness and spectral difference: compute the main peaks of
// the histograms.
float spectral_flatness_peak_position;
int spectral_flatness_peak_weight;
FindFirstOfTwoLargestPeaks(
kBinSizeSpecFlat, histograms.get_spectral_flatness(),
&spectral_flatness_peak_position, &spectral_flatness_peak_weight);
float spectral_diff_peak_position = 0.f;
int spectral_diff_peak_weight = 0;
FindFirstOfTwoLargestPeaks(kBinSizeSpecDiff, histograms.get_spectral_diff(),
&spectral_diff_peak_position,
&spectral_diff_peak_weight);
// Reject if weight of peaks is not large enough, or peak value too small.
// Peak limit for spectral flatness (varies between 0 and 1).
const int use_spec_flat = spectral_flatness_peak_weight < 0.3f * 500 ||
spectral_flatness_peak_position < 0.6f
? 0
: 1;
// Reject if weight of peaks is not large enough or if fluctuation of the LRT
// feature are very low, indicating a noise state.
const int use_spec_diff =
spectral_diff_peak_weight < 0.3f * 500 || low_lrt_fluctuations ? 0 : 1;
// Update the model.
prior_model_.template_diff_threshold = 1.2f * spectral_diff_peak_position;
prior_model_.template_diff_threshold =
std::min(1.f, std::max(0.16f, prior_model_.template_diff_threshold));
float one_by_feature_sum = 1.f / (1.f + use_spec_flat + use_spec_diff);
prior_model_.lrt_weighting = one_by_feature_sum;
if (use_spec_flat == 1) {
prior_model_.flatness_threshold = 0.9f * spectral_flatness_peak_position;
prior_model_.flatness_threshold =
std::min(.95f, std::max(0.1f, prior_model_.flatness_threshold));
prior_model_.flatness_weighting = one_by_feature_sum;
} else {
prior_model_.flatness_weighting = 0.f;
}
if (use_spec_diff == 1) {
prior_model_.difference_weighting = one_by_feature_sum;
} else {
prior_model_.difference_weighting = 0.f;
}
}
} // namespace webrtc

View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_
#include "modules/audio_processing/ns/histograms.h"
#include "modules/audio_processing/ns/prior_signal_model.h"
namespace webrtc {
// Estimator of the prior signal model parameters.
class PriorSignalModelEstimator {
public:
explicit PriorSignalModelEstimator(float lrt_initial_value);
PriorSignalModelEstimator(const PriorSignalModelEstimator&) = delete;
PriorSignalModelEstimator& operator=(const PriorSignalModelEstimator&) =
delete;
// Updates the model estimate.
void Update(const Histograms& h);
// Returns the estimated model.
const PriorSignalModel& get_prior_model() const { return prior_model_; }
private:
PriorSignalModel prior_model_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/quantile_noise_estimator.h"
#include <algorithm>
#include "modules/audio_processing/ns/fast_math.h"
namespace webrtc {
QuantileNoiseEstimator::QuantileNoiseEstimator() {
quantile_.fill(0.f);
density_.fill(0.3f);
log_quantile_.fill(8.f);
constexpr float kOneBySimult = 1.f / kSimult;
for (size_t i = 0; i < kSimult; ++i) {
counter_[i] = floor(kLongStartupPhaseBlocks * (i + 1.f) * kOneBySimult);
}
}
void QuantileNoiseEstimator::Estimate(
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
rtc::ArrayView<float, kFftSizeBy2Plus1> noise_spectrum) {
std::array<float, kFftSizeBy2Plus1> log_spectrum;
LogApproximation(signal_spectrum, log_spectrum);
int quantile_index_to_return = -1;
// Loop over simultaneous estimates.
for (int s = 0, k = 0; s < kSimult;
++s, k += static_cast<int>(kFftSizeBy2Plus1)) {
const float one_by_counter_plus_1 = 1.f / (counter_[s] + 1.f);
for (int i = 0, j = k; i < static_cast<int>(kFftSizeBy2Plus1); ++i, ++j) {
// Update log quantile estimate.
const float delta = density_[j] > 1.f ? 40.f / density_[j] : 40.f;
const float multiplier = delta * one_by_counter_plus_1;
if (log_spectrum[i] > log_quantile_[j]) {
log_quantile_[j] += 0.25f * multiplier;
} else {
log_quantile_[j] -= 0.75f * multiplier;
}
// Update density estimate.
constexpr float kWidth = 0.01f;
constexpr float kOneByWidthPlus2 = 1.f / (2.f * kWidth);
if (fabs(log_spectrum[i] - log_quantile_[j]) < kWidth) {
density_[j] = (counter_[s] * density_[j] + kOneByWidthPlus2) *
one_by_counter_plus_1;
}
}
if (counter_[s] >= kLongStartupPhaseBlocks) {
counter_[s] = 0;
if (num_updates_ >= kLongStartupPhaseBlocks) {
quantile_index_to_return = k;
}
}
++counter_[s];
}
// Sequentially update the noise during startup.
if (num_updates_ < kLongStartupPhaseBlocks) {
// Use the last "s" to get noise during startup that differ from zero.
quantile_index_to_return = kFftSizeBy2Plus1 * (kSimult - 1);
++num_updates_;
}
if (quantile_index_to_return >= 0) {
ExpApproximation(
rtc::ArrayView<const float>(&log_quantile_[quantile_index_to_return],
kFftSizeBy2Plus1),
quantile_);
}
std::copy(quantile_.begin(), quantile_.end(), noise_spectrum.begin());
}
} // namespace webrtc

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_
#include <math.h>
#include <array>
#include "api/array_view.h"
#include "modules/audio_processing/ns/ns_common.h"
namespace webrtc {
constexpr int kSimult = 3;
// For quantile noise estimation.
class QuantileNoiseEstimator {
public:
QuantileNoiseEstimator();
QuantileNoiseEstimator(const QuantileNoiseEstimator&) = delete;
QuantileNoiseEstimator& operator=(const QuantileNoiseEstimator&) = delete;
// Estimate noise.
void Estimate(rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
rtc::ArrayView<float, kFftSizeBy2Plus1> noise_spectrum);
private:
std::array<float, kSimult * kFftSizeBy2Plus1> density_;
std::array<float, kSimult * kFftSizeBy2Plus1> log_quantile_;
std::array<float, kFftSizeBy2Plus1> quantile_;
std::array<int, kSimult> counter_;
int num_updates_ = 1;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_

View File

@ -0,0 +1,24 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/signal_model.h"
namespace webrtc {
SignalModel::SignalModel() {
constexpr float kSfFeatureThr = 0.5f;
lrt = kLtrFeatureThr;
spectral_flatness = kSfFeatureThr;
spectral_diff = kSfFeatureThr;
avg_log_lrt.fill(kLtrFeatureThr);
}
} // namespace webrtc

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_
#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_
#include <array>
#include "modules/audio_processing/ns/ns_common.h"
namespace webrtc {
struct SignalModel {
SignalModel();
SignalModel(const SignalModel&) = delete;
SignalModel& operator=(const SignalModel&) = delete;
float lrt;
float spectral_diff;
float spectral_flatness;
// Log LRT factor with time-smoothing.
std::array<float, kFftSizeBy2Plus1> avg_log_lrt;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_

View File

@ -0,0 +1,175 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/signal_model_estimator.h"
#include "modules/audio_processing/ns/fast_math.h"
namespace webrtc {
namespace {
constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1;
// Computes the difference measure between input spectrum and a template/learned
// noise spectrum.
float ComputeSpectralDiff(
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
float signal_spectral_sum,
float diff_normalization) {
// spectral_diff = var(signal_spectrum) - cov(signal_spectrum, magnAvgPause)^2
// / var(magnAvgPause)
// Compute average quantities.
float noise_average = 0.f;
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
// Conservative smooth noise spectrum from pause frames.
noise_average += conservative_noise_spectrum[i];
}
noise_average = noise_average * kOneByFftSizeBy2Plus1;
float signal_average = signal_spectral_sum * kOneByFftSizeBy2Plus1;
// Compute variance and covariance quantities.
float covariance = 0.f;
float noise_variance = 0.f;
float signal_variance = 0.f;
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
float signal_diff = signal_spectrum[i] - signal_average;
float noise_diff = conservative_noise_spectrum[i] - noise_average;
covariance += signal_diff * noise_diff;
noise_variance += noise_diff * noise_diff;
signal_variance += signal_diff * signal_diff;
}
covariance *= kOneByFftSizeBy2Plus1;
noise_variance *= kOneByFftSizeBy2Plus1;
signal_variance *= kOneByFftSizeBy2Plus1;
// Update of average magnitude spectrum.
float spectral_diff =
signal_variance - (covariance * covariance) / (noise_variance + 0.0001f);
// Normalize.
return spectral_diff / (diff_normalization + 0.0001f);
}
// Updates the spectral flatness based on the input spectrum.
void UpdateSpectralFlatness(
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
float signal_spectral_sum,
float* spectral_flatness) {
RTC_DCHECK(spectral_flatness);
// Compute log of ratio of the geometric to arithmetic mean (handle the log(0)
// separately).
constexpr float kAveraging = 0.3f;
float avg_spect_flatness_num = 0.f;
for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) {
if (signal_spectrum[i] == 0.f) {
*spectral_flatness -= kAveraging * (*spectral_flatness);
return;
}
}
for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) {
avg_spect_flatness_num += LogApproximation(signal_spectrum[i]);
}
float avg_spect_flatness_denom = signal_spectral_sum - signal_spectrum[0];
avg_spect_flatness_denom = avg_spect_flatness_denom * kOneByFftSizeBy2Plus1;
avg_spect_flatness_num = avg_spect_flatness_num * kOneByFftSizeBy2Plus1;
float spectral_tmp =
ExpApproximation(avg_spect_flatness_num) / avg_spect_flatness_denom;
// Time-avg update of spectral flatness feature.
*spectral_flatness += kAveraging * (spectral_tmp - *spectral_flatness);
}
// Updates the log LRT measures.
void UpdateSpectralLrt(rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
rtc::ArrayView<float, kFftSizeBy2Plus1> avg_log_lrt,
float* lrt) {
RTC_DCHECK(lrt);
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
float tmp1 = 1.f + 2.f * prior_snr[i];
float tmp2 = 2.f * prior_snr[i] / (tmp1 + 0.0001f);
float bessel_tmp = (post_snr[i] + 1.f) * tmp2;
avg_log_lrt[i] +=
.5f * (bessel_tmp - LogApproximation(tmp1) - avg_log_lrt[i]);
}
float log_lrt_time_avg_k_sum = 0.f;
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
log_lrt_time_avg_k_sum += avg_log_lrt[i];
}
*lrt = log_lrt_time_avg_k_sum * kOneByFftSizeBy2Plus1;
}
} // namespace
SignalModelEstimator::SignalModelEstimator()
: prior_model_estimator_(kLtrFeatureThr) {}
void SignalModelEstimator::AdjustNormalization(int32_t num_analyzed_frames,
float signal_energy) {
diff_normalization_ *= num_analyzed_frames;
diff_normalization_ += signal_energy;
diff_normalization_ /= (num_analyzed_frames + 1);
}
// Update the noise features.
void SignalModelEstimator::Update(
rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
float signal_spectral_sum,
float signal_energy) {
// Compute spectral flatness on input spectrum.
UpdateSpectralFlatness(signal_spectrum, signal_spectral_sum,
&features_.spectral_flatness);
// Compute difference of input spectrum with learned/estimated noise spectrum.
float spectral_diff =
ComputeSpectralDiff(conservative_noise_spectrum, signal_spectrum,
signal_spectral_sum, diff_normalization_);
// Compute time-avg update of difference feature.
features_.spectral_diff += 0.3f * (spectral_diff - features_.spectral_diff);
signal_energy_sum_ += signal_energy;
// Compute histograms for parameter decisions (thresholds and weights for
// features). Parameters are extracted periodically.
if (--histogram_analysis_counter_ > 0) {
histograms_.Update(features_);
} else {
// Compute model parameters.
prior_model_estimator_.Update(histograms_);
// Clear histograms for next update.
histograms_.Clear();
histogram_analysis_counter_ = kFeatureUpdateWindowSize;
// Update every window:
// Compute normalization for the spectral difference for next estimation.
signal_energy_sum_ = signal_energy_sum_ / kFeatureUpdateWindowSize;
diff_normalization_ = 0.5f * (signal_energy_sum_ + diff_normalization_);
signal_energy_sum_ = 0.f;
}
// Compute the LRT.
UpdateSpectralLrt(prior_snr, post_snr, features_.avg_log_lrt, &features_.lrt);
}
} // namespace webrtc

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_
#include <array>
#include "api/array_view.h"
#include "modules/audio_processing/ns/histograms.h"
#include "modules/audio_processing/ns/ns_common.h"
#include "modules/audio_processing/ns/prior_signal_model.h"
#include "modules/audio_processing/ns/prior_signal_model_estimator.h"
#include "modules/audio_processing/ns/signal_model.h"
namespace webrtc {
class SignalModelEstimator {
public:
SignalModelEstimator();
SignalModelEstimator(const SignalModelEstimator&) = delete;
SignalModelEstimator& operator=(const SignalModelEstimator&) = delete;
// Compute signal normalization during the initial startup phase.
void AdjustNormalization(int32_t num_analyzed_frames, float signal_energy);
void Update(
rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
float signal_spectral_sum,
float signal_energy);
const PriorSignalModel& get_prior_model() const {
return prior_model_estimator_.get_prior_model();
}
const SignalModel& get_model() { return features_; }
private:
float diff_normalization_ = 0.f;
float signal_energy_sum_ = 0.f;
Histograms histograms_;
int histogram_analysis_counter_ = 500;
PriorSignalModelEstimator prior_model_estimator_;
SignalModel features_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_

View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/speech_probability_estimator.h"
#include <math.h>
#include <algorithm>
#include "modules/audio_processing/ns/fast_math.h"
#include "rtc_base/checks.h"
namespace webrtc {
SpeechProbabilityEstimator::SpeechProbabilityEstimator() {
speech_probability_.fill(0.f);
}
void SpeechProbabilityEstimator::Update(
int32_t num_analyzed_frames,
rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
float signal_spectral_sum,
float signal_energy) {
// Update models.
if (num_analyzed_frames < kLongStartupPhaseBlocks) {
signal_model_estimator_.AdjustNormalization(num_analyzed_frames,
signal_energy);
}
signal_model_estimator_.Update(prior_snr, post_snr,
conservative_noise_spectrum, signal_spectrum,
signal_spectral_sum, signal_energy);
const SignalModel& model = signal_model_estimator_.get_model();
const PriorSignalModel& prior_model =
signal_model_estimator_.get_prior_model();
// Width parameter in sigmoid map for prior model.
constexpr float kWidthPrior0 = 4.f;
// Width for pause region: lower range, so increase width in tanh map.
constexpr float kWidthPrior1 = 2.f * kWidthPrior0;
// Average LRT feature: use larger width in tanh map for pause regions.
float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0;
// Compute indicator function: sigmoid map.
float indicator0 =
0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f);
// Spectral flatness feature: use larger width in tanh map for pause regions.
width_prior = model.spectral_flatness > prior_model.flatness_threshold
? kWidthPrior1
: kWidthPrior0;
// Compute indicator function: sigmoid map.
float indicator1 =
0.5f * (tanh(1.f * width_prior *
(prior_model.flatness_threshold - model.spectral_flatness)) +
1.f);
// For template spectrum-difference : use larger width in tanh map for pause
// regions.
width_prior = model.spectral_diff < prior_model.template_diff_threshold
? kWidthPrior1
: kWidthPrior0;
// Compute indicator function: sigmoid map.
float indicator2 =
0.5f * (tanh(width_prior * (model.spectral_diff -
prior_model.template_diff_threshold)) +
1.f);
// Combine the indicator function with the feature weights.
float ind_prior = prior_model.lrt_weighting * indicator0 +
prior_model.flatness_weighting * indicator1 +
prior_model.difference_weighting * indicator2;
// Compute the prior probability.
prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_);
// Make sure probabilities are within range: keep floor to 0.01.
prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f);
// Final speech probability: combine prior model with LR factor:.
float gain_prior =
(1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f);
std::array<float, kFftSizeBy2Plus1> inv_lrt;
ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt);
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]);
}
}
} // namespace webrtc

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_
#include <array>
#include "api/array_view.h"
#include "modules/audio_processing/ns/ns_common.h"
#include "modules/audio_processing/ns/signal_model_estimator.h"
namespace webrtc {
// Class for estimating the probability of speech.
class SpeechProbabilityEstimator {
public:
SpeechProbabilityEstimator();
SpeechProbabilityEstimator(const SpeechProbabilityEstimator&) = delete;
SpeechProbabilityEstimator& operator=(const SpeechProbabilityEstimator&) =
delete;
// Compute speech probability.
void Update(
int32_t num_analyzed_frames,
rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
float signal_spectral_sum,
float signal_energy);
float get_prior_probability() const { return prior_speech_prob_; }
rtc::ArrayView<const float> get_probability() { return speech_probability_; }
private:
SignalModelEstimator signal_model_estimator_;
float prior_speech_prob_ = .5f;
std::array<float, kFftSizeBy2Plus1> speech_probability_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/suppression_params.h"
#include "rtc_base/checks.h"
namespace webrtc {
SuppressionParams::SuppressionParams(
NsConfig::SuppressionLevel suppression_level) {
switch (suppression_level) {
case NsConfig::SuppressionLevel::k6dB:
over_subtraction_factor = 1.f;
// 6 dB attenuation.
minimum_attenuating_gain = 0.5f;
use_attenuation_adjustment = false;
break;
case NsConfig::SuppressionLevel::k12dB:
over_subtraction_factor = 1.f;
// 12 dB attenuation.
minimum_attenuating_gain = 0.25f;
use_attenuation_adjustment = true;
break;
case NsConfig::SuppressionLevel::k18dB:
over_subtraction_factor = 1.1f;
// 18 dB attenuation.
minimum_attenuating_gain = 0.125f;
use_attenuation_adjustment = true;
break;
case NsConfig::SuppressionLevel::k21dB:
over_subtraction_factor = 1.25f;
// 20.9 dB attenuation.
minimum_attenuating_gain = 0.09f;
use_attenuation_adjustment = true;
break;
default:
RTC_DCHECK_NOTREACHED();
}
}
} // namespace webrtc

View File

@ -0,0 +1,30 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_
#define MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_
#include "modules/audio_processing/ns/ns_config.h"
namespace webrtc {
struct SuppressionParams {
explicit SuppressionParams(NsConfig::SuppressionLevel suppression_level);
SuppressionParams(const SuppressionParams&) = delete;
SuppressionParams& operator=(const SuppressionParams&) = delete;
float over_subtraction_factor;
float minimum_attenuating_gain;
bool use_attenuation_adjustment;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_

View File

@ -0,0 +1,121 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/ns/wiener_filter.h"
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include "modules/audio_processing/ns/fast_math.h"
#include "rtc_base/checks.h"
namespace webrtc {
WienerFilter::WienerFilter(const SuppressionParams& suppression_params)
: suppression_params_(suppression_params) {
filter_.fill(1.f);
initial_spectral_estimate_.fill(0.f);
spectrum_prev_process_.fill(0.f);
}
void WienerFilter::Update(
int32_t num_analyzed_frames,
rtc::ArrayView<const float, kFftSizeBy2Plus1> noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> prev_noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> parametric_noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) {
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
// Previous estimate based on previous frame with gain filter.
float prev_tsa = spectrum_prev_process_[i] /
(prev_noise_spectrum[i] + 0.0001f) * filter_[i];
// Current estimate.
float current_tsa;
if (signal_spectrum[i] > noise_spectrum[i]) {
current_tsa = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f;
} else {
current_tsa = 0.f;
}
// Directed decision estimate is sum of two terms: current estimate and
// previous estimate.
float snr_prior = 0.98f * prev_tsa + (1.f - 0.98f) * current_tsa;
filter_[i] =
snr_prior / (suppression_params_.over_subtraction_factor + snr_prior);
filter_[i] = std::max(std::min(filter_[i], 1.f),
suppression_params_.minimum_attenuating_gain);
}
if (num_analyzed_frames < kShortStartupPhaseBlocks) {
for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
initial_spectral_estimate_[i] += signal_spectrum[i];
float filter_initial = initial_spectral_estimate_[i] -
suppression_params_.over_subtraction_factor *
parametric_noise_spectrum[i];
filter_initial /= initial_spectral_estimate_[i] + 0.0001f;
filter_initial = std::max(std::min(filter_initial, 1.f),
suppression_params_.minimum_attenuating_gain);
// Weight the two suppression filters.
constexpr float kOnyByShortStartupPhaseBlocks =
1.f / kShortStartupPhaseBlocks;
filter_initial *= kShortStartupPhaseBlocks - num_analyzed_frames;
filter_[i] *= num_analyzed_frames;
filter_[i] += filter_initial;
filter_[i] *= kOnyByShortStartupPhaseBlocks;
}
}
std::copy(signal_spectrum.begin(), signal_spectrum.end(),
spectrum_prev_process_.begin());
}
float WienerFilter::ComputeOverallScalingFactor(
int32_t num_analyzed_frames,
float prior_speech_probability,
float energy_before_filtering,
float energy_after_filtering) const {
if (!suppression_params_.use_attenuation_adjustment ||
num_analyzed_frames <= kLongStartupPhaseBlocks) {
return 1.f;
}
float gain = SqrtFastApproximation(energy_after_filtering /
(energy_before_filtering + 1.f));
// Scaling for new version. Threshold in final energy gain factor calculation.
constexpr float kBLim = 0.5f;
float scale_factor1 = 1.f;
if (gain > kBLim) {
scale_factor1 = 1.f + 1.3f * (gain - kBLim);
if (gain * scale_factor1 > 1.f) {
scale_factor1 = 1.f / gain;
}
}
float scale_factor2 = 1.f;
if (gain < kBLim) {
// Do not reduce scale too much for pause regions: attenuation here should
// be controlled by flooring.
gain = std::max(gain, suppression_params_.minimum_attenuating_gain);
scale_factor2 = 1.f - 0.3f * (kBLim - gain);
}
// Combine both scales with speech/noise prob: note prior
// (prior_speech_probability) is not frequency dependent.
return prior_speech_probability * scale_factor1 +
(1.f - prior_speech_probability) * scale_factor2;
}
} // namespace webrtc

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_
#define MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_
#include <array>
#include "api/array_view.h"
#include "modules/audio_processing/ns/ns_common.h"
#include "modules/audio_processing/ns/suppression_params.h"
namespace webrtc {
// Estimates a Wiener-filter based frequency domain noise reduction filter.
class WienerFilter {
public:
explicit WienerFilter(const SuppressionParams& suppression_params);
WienerFilter(const WienerFilter&) = delete;
WienerFilter& operator=(const WienerFilter&) = delete;
// Updates the filter estimate.
void Update(
int32_t num_analyzed_frames,
rtc::ArrayView<const float, kFftSizeBy2Plus1> noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> prev_noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> parametric_noise_spectrum,
rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum);
// Compute an overall gain scaling factor.
float ComputeOverallScalingFactor(int32_t num_analyzed_frames,
float prior_speech_probability,
float energy_before_filtering,
float energy_after_filtering) const;
// Returns the filter.
rtc::ArrayView<const float, kFftSizeBy2Plus1> get_filter() const {
return filter_;
}
private:
const SuppressionParams& suppression_params_;
std::array<float, kFftSizeBy2Plus1> spectrum_prev_process_;
std::array<float, kFftSizeBy2Plus1> initial_spectral_estimate_;
std::array<float, kFftSizeBy2Plus1> filter_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_