add vad code.

This commit is contained in:
luocai
2024-09-06 18:26:45 +08:00
parent 35bf68338f
commit 2bed1dacf2
93 changed files with 12362 additions and 2 deletions

View File

@ -0,0 +1,24 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
#include <stdint.h>
typedef struct {
int in_use;
int32_t send_bw_avg;
int32_t send_max_delay_avg;
int16_t bottleneck_idx;
int16_t jitter_info;
} IsacBandwidthInfo;
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_

View File

@ -0,0 +1,195 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <memory.h>
#include <string.h>
#ifdef WEBRTC_ANDROID
#include <stdlib.h>
#endif
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
static void WebRtcIsac_AllPoleFilter(double* InOut,
double* Coef,
size_t lengthInOut,
int orderCoef) {
/* the state of filter is assumed to be in InOut[-1] to InOut[-orderCoef] */
double scal;
double sum;
size_t n;
int k;
//if (fabs(Coef[0]-1.0)<0.001) {
if ( (Coef[0] > 0.9999) && (Coef[0] < 1.0001) )
{
for(n = 0; n < lengthInOut; n++)
{
sum = Coef[1] * InOut[-1];
for(k = 2; k <= orderCoef; k++){
sum += Coef[k] * InOut[-k];
}
*InOut++ -= sum;
}
}
else
{
scal = 1.0 / Coef[0];
for(n=0;n<lengthInOut;n++)
{
*InOut *= scal;
for(k=1;k<=orderCoef;k++){
*InOut -= scal*Coef[k]*InOut[-k];
}
InOut++;
}
}
}
static void WebRtcIsac_AllZeroFilter(double* In,
double* Coef,
size_t lengthInOut,
int orderCoef,
double* Out) {
/* the state of filter is assumed to be in In[-1] to In[-orderCoef] */
size_t n;
int k;
double tmp;
for(n = 0; n < lengthInOut; n++)
{
tmp = In[0] * Coef[0];
for(k = 1; k <= orderCoef; k++){
tmp += Coef[k] * In[-k];
}
*Out++ = tmp;
In++;
}
}
static void WebRtcIsac_ZeroPoleFilter(double* In,
double* ZeroCoef,
double* PoleCoef,
size_t lengthInOut,
int orderCoef,
double* Out) {
/* the state of the zero section is assumed to be in In[-1] to In[-orderCoef] */
/* the state of the pole section is assumed to be in Out[-1] to Out[-orderCoef] */
WebRtcIsac_AllZeroFilter(In,ZeroCoef,lengthInOut,orderCoef,Out);
WebRtcIsac_AllPoleFilter(Out,PoleCoef,lengthInOut,orderCoef);
}
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order) {
size_t lag, n;
double sum, prod;
const double *x_lag;
for (lag = 0; lag <= order; lag++)
{
sum = 0.0f;
x_lag = &x[lag];
prod = x[0] * x_lag[0];
for (n = 1; n < N - lag; n++) {
sum += prod;
prod = x[n] * x_lag[n];
}
sum += prod;
r[lag] = sum;
}
}
static void WebRtcIsac_BwExpand(double* out,
double* in,
double coef,
size_t length) {
size_t i;
double chirp;
chirp = coef;
out[0] = in[0];
for (i = 1; i < length; i++) {
out[i] = chirp * in[i];
chirp *= coef;
}
}
void WebRtcIsac_WeightingFilter(const double* in,
double* weiout,
double* whiout,
WeightFiltstr* wfdata) {
double tmpbuffer[PITCH_FRAME_LEN + PITCH_WLPCBUFLEN];
double corr[PITCH_WLPCORDER+1], rc[PITCH_WLPCORDER+1];
double apol[PITCH_WLPCORDER+1], apolr[PITCH_WLPCORDER+1];
double rho=0.9, *inp, *dp, *dp2;
double whoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER];
double weoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER];
double *weo, *who, opol[PITCH_WLPCORDER+1], ext[PITCH_WLPCWINLEN];
int k, n, endpos, start;
/* Set up buffer and states */
memcpy(tmpbuffer, wfdata->buffer, sizeof(double) * PITCH_WLPCBUFLEN);
memcpy(tmpbuffer+PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN);
memcpy(wfdata->buffer, tmpbuffer+PITCH_FRAME_LEN, sizeof(double) * PITCH_WLPCBUFLEN);
dp=weoutbuf;
dp2=whoutbuf;
for (k=0;k<PITCH_WLPCORDER;k++) {
*dp++ = wfdata->weostate[k];
*dp2++ = wfdata->whostate[k];
opol[k]=0.0;
}
opol[0]=1.0;
opol[PITCH_WLPCORDER]=0.0;
weo=dp;
who=dp2;
endpos=PITCH_WLPCBUFLEN + PITCH_SUBFRAME_LEN;
inp=tmpbuffer + PITCH_WLPCBUFLEN;
for (n=0; n<PITCH_SUBFRAMES; n++) {
/* Windowing */
start=endpos-PITCH_WLPCWINLEN;
for (k=0; k<PITCH_WLPCWINLEN; k++) {
ext[k]=wfdata->window[k]*tmpbuffer[start+k];
}
/* Get LPC polynomial */
WebRtcIsac_AutoCorr(corr, ext, PITCH_WLPCWINLEN, PITCH_WLPCORDER);
corr[0]=1.01*corr[0]+1.0; /* White noise correction */
WebRtcIsac_LevDurb(apol, rc, corr, PITCH_WLPCORDER);
WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER+1);
/* Filtering */
WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, weo);
WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, who);
inp+=PITCH_SUBFRAME_LEN;
endpos+=PITCH_SUBFRAME_LEN;
weo+=PITCH_SUBFRAME_LEN;
who+=PITCH_SUBFRAME_LEN;
}
/* Export filter states */
for (k=0;k<PITCH_WLPCORDER;k++) {
wfdata->weostate[k]=weoutbuf[PITCH_FRAME_LEN+k];
wfdata->whostate[k]=whoutbuf[PITCH_FRAME_LEN+k];
}
/* Export output data */
memcpy(weiout, weoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN);
memcpy(whiout, whoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN);
}

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
#include <stddef.h>
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order);
void WebRtcIsac_WeightingFilter(const double* in,
double* weiout,
double* whiout,
WeightFiltstr* wfdata);
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_

View File

@ -0,0 +1,409 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
#include <math.h>
void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata) {
int k;
for (k = 0; k < PITCH_BUFFSIZE; k++) {
pitchfiltdata->ubuf[k] = 0.0;
}
pitchfiltdata->ystate[0] = 0.0;
for (k = 1; k < (PITCH_DAMPORDER); k++) {
pitchfiltdata->ystate[k] = 0.0;
}
pitchfiltdata->oldlagp[0] = 50.0;
pitchfiltdata->oldgainp[0] = 0.0;
}
static void WebRtcIsac_InitWeightingFilter(WeightFiltstr* wfdata) {
int k;
double t, dtmp, dtmp2, denum, denum2;
for (k = 0; k < PITCH_WLPCBUFLEN; k++)
wfdata->buffer[k] = 0.0;
for (k = 0; k < PITCH_WLPCORDER; k++) {
wfdata->istate[k] = 0.0;
wfdata->weostate[k] = 0.0;
wfdata->whostate[k] = 0.0;
}
/* next part should be in Matlab, writing to a global table */
t = 0.5;
denum = 1.0 / ((double)PITCH_WLPCWINLEN);
denum2 = denum * denum;
for (k = 0; k < PITCH_WLPCWINLEN; k++) {
dtmp = PITCH_WLPCASYM * t * denum + (1 - PITCH_WLPCASYM) * t * t * denum2;
dtmp *= 3.14159265;
dtmp2 = sin(dtmp);
wfdata->window[k] = dtmp2 * dtmp2;
t++;
}
}
void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* State) {
int k;
for (k = 0; k < PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
PITCH_FRAME_LEN / 2 + 2;
k++)
State->dec_buffer[k] = 0.0;
for (k = 0; k < 2 * ALLPASSSECTIONS + 1; k++)
State->decimator_state[k] = 0.0;
for (k = 0; k < 2; k++)
State->hp_state[k] = 0.0;
for (k = 0; k < QLOOKAHEAD; k++)
State->whitened_buf[k] = 0.0;
for (k = 0; k < QLOOKAHEAD; k++)
State->inbuf[k] = 0.0;
WebRtcIsac_InitPitchFilter(&(State->PFstr_wght));
WebRtcIsac_InitPitchFilter(&(State->PFstr));
WebRtcIsac_InitWeightingFilter(&(State->Wghtstr));
}
void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata) {
int k;
for (k = 0; k < QLOOKAHEAD; k++) {
prefiltdata->INLABUF1[k] = 0;
prefiltdata->INLABUF2[k] = 0;
prefiltdata->INLABUF1_float[k] = 0;
prefiltdata->INLABUF2_float[k] = 0;
}
for (k = 0; k < 2 * (QORDER - 1); k++) {
prefiltdata->INSTAT1[k] = 0;
prefiltdata->INSTAT2[k] = 0;
prefiltdata->INSTATLA1[k] = 0;
prefiltdata->INSTATLA2[k] = 0;
prefiltdata->INSTAT1_float[k] = 0;
prefiltdata->INSTAT2_float[k] = 0;
prefiltdata->INSTATLA1_float[k] = 0;
prefiltdata->INSTATLA2_float[k] = 0;
}
/* High pass filter states */
prefiltdata->HPstates[0] = 0.0;
prefiltdata->HPstates[1] = 0.0;
prefiltdata->HPstates_float[0] = 0.0f;
prefiltdata->HPstates_float[1] = 0.0f;
return;
}
double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order) {
const double LEVINSON_EPS = 1.0e-10;
double sum, alpha;
size_t m, m_h, i;
alpha = 0; // warning -DH
a[0] = 1.0;
if (r[0] < LEVINSON_EPS) { /* if r[0] <= 0, set LPC coeff. to zero */
for (i = 0; i < order; i++) {
k[i] = 0;
a[i + 1] = 0;
}
} else {
a[1] = k[0] = -r[1] / r[0];
alpha = r[0] + r[1] * k[0];
for (m = 1; m < order; m++) {
sum = r[m + 1];
for (i = 0; i < m; i++) {
sum += a[i + 1] * r[m - i];
}
k[m] = -sum / alpha;
alpha += k[m] * sum;
m_h = (m + 1) >> 1;
for (i = 0; i < m_h; i++) {
sum = a[i + 1] + k[m] * a[m - i];
a[m - i] += k[m] * a[i + 1];
a[i + 1] = sum;
}
a[m + 1] = k[m];
}
}
return alpha;
}
/* The upper channel all-pass filter factors */
const float WebRtcIsac_kUpperApFactorsFloat[2] = {0.03470000000000f,
0.38260000000000f};
/* The lower channel all-pass filter factors */
const float WebRtcIsac_kLowerApFactorsFloat[2] = {0.15440000000000f,
0.74400000000000f};
/* This function performs all-pass filtering--a series of first order all-pass
* sections are used to filter the input in a cascade manner.
* The input is overwritten!!
*/
void WebRtcIsac_AllPassFilter2Float(float* InOut,
const float* APSectionFactors,
int lengthInOut,
int NumberOfSections,
float* FilterState) {
int n, j;
float temp;
for (j = 0; j < NumberOfSections; j++) {
for (n = 0; n < lengthInOut; n++) {
temp = FilterState[j] + APSectionFactors[j] * InOut[n];
FilterState[j] = -APSectionFactors[j] * temp + InOut[n];
InOut[n] = temp;
}
}
}
/* The number of composite all-pass filter factors */
#define NUMBEROFCOMPOSITEAPSECTIONS 4
/* Function WebRtcIsac_SplitAndFilter
* This function creates low-pass and high-pass decimated versions of part of
the input signal, and part of the signal in the input 'lookahead buffer'.
INPUTS:
in: a length FRAMESAMPLES array of input samples
prefiltdata: input data structure containing the filterbank states
and lookahead samples from the previous encoding
iteration.
OUTPUTS:
LP: a FRAMESAMPLES_HALF array of low-pass filtered samples that
have been phase equalized. The first QLOOKAHEAD samples are
based on the samples in the two prefiltdata->INLABUFx arrays
each of length QLOOKAHEAD.
The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based
on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input
array in[].
HP: a FRAMESAMPLES_HALF array of high-pass filtered samples that
have been phase equalized. The first QLOOKAHEAD samples are
based on the samples in the two prefiltdata->INLABUFx arrays
each of length QLOOKAHEAD.
The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based
on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input
array in[].
LP_la: a FRAMESAMPLES_HALF array of low-pass filtered samples.
These samples are not phase equalized. They are computed
from the samples in the in[] array.
HP_la: a FRAMESAMPLES_HALF array of high-pass filtered samples
that are not phase equalized. They are computed from
the in[] vector.
prefiltdata: this input data structure's filterbank state and
lookahead sample buffers are updated for the next
encoding iteration.
*/
void WebRtcIsac_SplitAndFilterFloat(float* pin,
float* LP,
float* HP,
double* LP_la,
double* HP_la,
PreFiltBankstr* prefiltdata) {
int k, n;
float CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS];
float ForTransform_CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS];
float ForTransform_CompositeAPFilterState2[NUMBEROFCOMPOSITEAPSECTIONS];
float tempinoutvec[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
float tempin_ch1[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
float tempin_ch2[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
float in[FRAMESAMPLES];
float ftmp;
/* HPstcoeff_in = {a1, a2, b1 - b0 * a1, b2 - b0 * a2}; */
static const float kHpStCoefInFloat[4] = {
-1.94895953203325f, 0.94984516000000f, -0.05101826139794f,
0.05015484000000f};
/* The composite all-pass filter factors */
static const float WebRtcIsac_kCompositeApFactorsFloat[4] = {
0.03470000000000f, 0.15440000000000f, 0.38260000000000f,
0.74400000000000f};
// The matrix for transforming the backward composite state to upper channel
// state.
static const float WebRtcIsac_kTransform1Float[8] = {
-0.00158678506084f, 0.00127157815343f, -0.00104805672709f,
0.00084837248079f, 0.00134467983258f, -0.00107756549387f,
0.00088814793277f, -0.00071893072525f};
// The matrix for transforming the backward composite state to lower channel
// state.
static const float WebRtcIsac_kTransform2Float[8] = {
-0.00170686041697f, 0.00136780109829f, -0.00112736532350f,
0.00091257055385f, 0.00103094281812f, -0.00082615076557f,
0.00068092756088f, -0.00055119165484f};
/* High pass filter */
for (k = 0; k < FRAMESAMPLES; k++) {
in[k] = pin[k] + kHpStCoefInFloat[2] * prefiltdata->HPstates_float[0] +
kHpStCoefInFloat[3] * prefiltdata->HPstates_float[1];
ftmp = pin[k] - kHpStCoefInFloat[0] * prefiltdata->HPstates_float[0] -
kHpStCoefInFloat[1] * prefiltdata->HPstates_float[1];
prefiltdata->HPstates_float[1] = prefiltdata->HPstates_float[0];
prefiltdata->HPstates_float[0] = ftmp;
}
/* First Channel */
/*initial state of composite filter is zero */
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
CompositeAPFilterState[k] = 0.0;
}
/* put every other sample of input into a temporary vector in reverse
* (backward) order*/
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempinoutvec[k] = in[FRAMESAMPLES - 1 - 2 * k];
}
/* now all-pass filter the backwards vector. Output values overwrite the
* input vector. */
WebRtcIsac_AllPassFilter2Float(
tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF,
NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
/* save the backwards filtered output for later forward filtering,
but write it in forward order*/
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempin_ch1[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k];
}
/* save the backwards filter state becaue it will be transformed
later into a forward state */
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
ForTransform_CompositeAPFilterState[k] = CompositeAPFilterState[k];
}
/* now backwards filter the samples in the lookahead buffer. The samples were
placed there in the encoding of the previous frame. The output samples
overwrite the input samples */
WebRtcIsac_AllPassFilter2Float(
prefiltdata->INLABUF1_float, WebRtcIsac_kCompositeApFactorsFloat,
QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
/* save the output, but write it in forward order */
/* write the lookahead samples for the next encoding iteration. Every other
sample at the end of the input frame is written in reverse order for the
lookahead length. Exported in the prefiltdata structure. */
for (k = 0; k < QLOOKAHEAD; k++) {
tempin_ch1[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF1_float[k];
prefiltdata->INLABUF1_float[k] = in[FRAMESAMPLES - 1 - 2 * k];
}
/* Second Channel. This is exactly like the first channel, except that the
even samples are now filtered instead (lower channel). */
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
CompositeAPFilterState[k] = 0.0;
}
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempinoutvec[k] = in[FRAMESAMPLES - 2 - 2 * k];
}
WebRtcIsac_AllPassFilter2Float(
tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF,
NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempin_ch2[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k];
}
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
ForTransform_CompositeAPFilterState2[k] = CompositeAPFilterState[k];
}
WebRtcIsac_AllPassFilter2Float(
prefiltdata->INLABUF2_float, WebRtcIsac_kCompositeApFactorsFloat,
QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
for (k = 0; k < QLOOKAHEAD; k++) {
tempin_ch2[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF2_float[k];
prefiltdata->INLABUF2_float[k] = in[FRAMESAMPLES - 2 - 2 * k];
}
/* Transform filter states from backward to forward */
/*At this point, each of the states of the backwards composite filters for the
two channels are transformed into forward filtering states for the
corresponding forward channel filters. Each channel's forward filtering
state from the previous
encoding iteration is added to the transformed state to get a proper forward
state */
/* So the existing NUMBEROFCOMPOSITEAPSECTIONS x 1 (4x1) state vector is
multiplied by a NUMBEROFCHANNELAPSECTIONSxNUMBEROFCOMPOSITEAPSECTIONS (2x4)
transform matrix to get the new state that is added to the previous 2x1
input state */
for (k = 0; k < NUMBEROFCHANNELAPSECTIONS; k++) { /* k is row variable */
for (n = 0; n < NUMBEROFCOMPOSITEAPSECTIONS;
n++) { /* n is column variable */
prefiltdata->INSTAT1_float[k] +=
ForTransform_CompositeAPFilterState[n] *
WebRtcIsac_kTransform1Float[k * NUMBEROFCHANNELAPSECTIONS + n];
prefiltdata->INSTAT2_float[k] +=
ForTransform_CompositeAPFilterState2[n] *
WebRtcIsac_kTransform2Float[k * NUMBEROFCHANNELAPSECTIONS + n];
}
}
/*obtain polyphase components by forward all-pass filtering through each
* channel */
/* the backward filtered samples are now forward filtered with the
* corresponding channel filters */
/* The all pass filtering automatically updates the filter states which are
exported in the prefiltdata structure */
WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat,
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
prefiltdata->INSTAT1_float);
WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat,
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
prefiltdata->INSTAT2_float);
/* Now Construct low-pass and high-pass signals as combinations of polyphase
* components */
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
LP[k] = 0.5f * (tempin_ch1[k] + tempin_ch2[k]); /* low pass signal*/
HP[k] = 0.5f * (tempin_ch1[k] - tempin_ch2[k]); /* high pass signal*/
}
/* Lookahead LP and HP signals */
/* now create low pass and high pass signals of the input vector. However, no
backwards filtering is performed, and hence no phase equalization is
involved. Also, the input contains some samples that are lookahead samples.
The high pass and low pass signals that are created are used outside this
function for analysis (not encoding) purposes */
/* set up input */
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempin_ch1[k] = in[2 * k + 1];
tempin_ch2[k] = in[2 * k];
}
/* the input filter states are passed in and updated by the all-pass filtering
routine and exported in the prefiltdata structure*/
WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat,
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
prefiltdata->INSTATLA1_float);
WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat,
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
prefiltdata->INSTATLA2_float);
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
LP_la[k] = (float)(0.5f * (tempin_ch1[k] + tempin_ch2[k])); /*low pass */
HP_la[k] = (double)(0.5f * (tempin_ch1[k] - tempin_ch2[k])); /* high pass */
}
}

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
#include <stddef.h>
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata);
void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* state);
void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata);
double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order);
/* The number of all-pass filter factors in an upper or lower channel*/
#define NUMBEROFCHANNELAPSECTIONS 2
/* The upper channel all-pass filter factors */
extern const float WebRtcIsac_kUpperApFactorsFloat[2];
/* The lower channel all-pass filter factors */
extern const float WebRtcIsac_kLowerApFactorsFloat[2];
void WebRtcIsac_AllPassFilter2Float(float* InOut,
const float* APSectionFactors,
int lengthInOut,
int NumberOfSections,
float* FilterState);
void WebRtcIsac_SplitAndFilterFloat(float* in,
float* LP,
float* HP,
double* LP_la,
double* HP_la,
PreFiltBankstr* prefiltdata);
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
#include <math.h>
#include "rtc_base/system/arch.h"
#if defined(WEBRTC_POSIX)
#define WebRtcIsac_lrint lrint
#elif (defined(WEBRTC_ARCH_X86) && defined(WIN32))
static __inline long int WebRtcIsac_lrint(double x_dbl) {
long int x_int;
__asm {
fld x_dbl
fistp x_int
}
;
return x_int;
}
#else // Do a slow but correct implementation of lrint
static __inline long int WebRtcIsac_lrint(double x_dbl) {
long int x_int;
x_int = (long int)floor(x_dbl + 0.499999999999);
return x_int;
}
#endif
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_

View File

@ -0,0 +1,695 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include <math.h>
#include <memory.h>
#include <string.h>
#ifdef WEBRTC_ANDROID
#include <stdlib.h>
#endif
#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h"
#include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h"
#include "rtc_base/system/ignore_warnings.h"
static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160,
0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640};
/* interpolation filter */
__inline static void IntrepolFilter(double *data_ptr, double *intrp)
{
*intrp = kInterpolWin[0] * data_ptr[-3];
*intrp += kInterpolWin[1] * data_ptr[-2];
*intrp += kInterpolWin[2] * data_ptr[-1];
*intrp += kInterpolWin[3] * data_ptr[0];
*intrp += kInterpolWin[4] * data_ptr[1];
*intrp += kInterpolWin[5] * data_ptr[2];
*intrp += kInterpolWin[6] * data_ptr[3];
*intrp += kInterpolWin[7] * data_ptr[4];
}
/* 2D parabolic interpolation */
/* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */
__inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val)
{
double c, b[2], A[2][2];
double t1, t2, d;
double delta1, delta2;
// double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}};
// should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0
c = T[1][1];
b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]);
b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]);
A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]);
t1 = 0.5 * (T[0][0] + T[2][2]) - c;
t2 = 0.5 * (T[2][0] + T[0][2]) - c;
d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2;
A[0][0] = -t1 - 0.5 * d;
A[1][1] = -t2 - 0.5 * d;
/* deal with singularities or ill-conditioned cases */
if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) {
*peak_val = T[1][1];
return;
}
/* Cholesky decomposition: replace A by upper-triangular factor */
A[0][0] = sqrt(A[0][0]);
A[0][1] = A[0][1] / A[0][0];
A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]);
/* compute [x; y] = -0.5 * inv(A) * b */
t1 = b[0] / A[0][0];
t2 = (b[1] - t1 * A[0][1]) / A[1][1];
delta2 = t2 / A[1][1];
delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0];
delta2 *= 0.5;
/* limit norm */
t1 = delta1 * delta1 + delta2 * delta2;
if (t1 > 1.0) {
delta1 /= t1;
delta2 /= t1;
}
*peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c;
*x += delta1;
*y += delta2;
}
static void PCorr(const double *in, double *outcorr)
{
double sum, ysum, prod;
const double *x, *inptr;
int k, n;
//ysum = 1e-6; /* use this with float (i.s.o. double)! */
ysum = 1e-13;
sum = 0.0;
x = in + PITCH_MAX_LAG/2 + 2;
for (n = 0; n < PITCH_CORR_LEN2; n++) {
ysum += in[n] * in[n];
sum += x[n] * in[n];
}
outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */
*outcorr = sum / sqrt(ysum);
for (k = 1; k < PITCH_LAG_SPAN2; k++) {
ysum -= in[k-1] * in[k-1];
ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1];
sum = 0.0;
inptr = &in[k];
prod = x[0] * inptr[0];
for (n = 1; n < PITCH_CORR_LEN2; n++) {
sum += prod;
prod = x[n] * inptr[n];
}
sum += prod;
outcorr--;
*outcorr = sum / sqrt(ysum);
}
}
static void WebRtcIsac_AllpassFilterForDec(double* InOut,
const double* APSectionFactors,
size_t lengthInOut,
double* FilterState) {
// This performs all-pass filtering--a series of first order all-pass
// sections are used to filter the input in a cascade manner.
size_t n, j;
double temp;
for (j = 0; j < ALLPASSSECTIONS; j++) {
for (n = 0; n < lengthInOut; n += 2) {
temp = InOut[n]; // store input
InOut[n] = FilterState[j] + APSectionFactors[j] * temp;
FilterState[j] = -APSectionFactors[j] * InOut[n] + temp;
}
}
}
static void WebRtcIsac_DecimateAllpass(
const double* in,
double* state_in, // array of size: 2*ALLPASSSECTIONS+1
size_t N, // number of input samples
double* out) { // array of size N/2
static const double APupper[ALLPASSSECTIONS] = {0.0347, 0.3826};
static const double APlower[ALLPASSSECTIONS] = {0.1544, 0.744};
size_t n;
double data_vec[PITCH_FRAME_LEN];
/* copy input */
memcpy(data_vec + 1, in, sizeof(double) * (N - 1));
data_vec[0] = state_in[2 * ALLPASSSECTIONS]; // the z^(-1) state
state_in[2 * ALLPASSSECTIONS] = in[N - 1];
WebRtcIsac_AllpassFilterForDec(data_vec + 1, APupper, N, state_in);
WebRtcIsac_AllpassFilterForDec(data_vec, APlower, N,
state_in + ALLPASSSECTIONS);
for (n = 0; n < N / 2; n++)
out[n] = data_vec[2 * n] + data_vec[2 * n + 1];
}
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
static void WebRtcIsac_InitializePitch(const double* in,
const double old_lag,
const double old_gain,
PitchAnalysisStruct* State,
double* lags) {
double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2];
double ratio, log_lag, gain_bias;
double bias;
double corrvec1[PITCH_LAG_SPAN2];
double corrvec2[PITCH_LAG_SPAN2];
int m, k;
// Allocating 10 extra entries at the begining of the CorrSurf
double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)];
double* CorrSurf[2*PITCH_BW+3];
double *CorrSurfPtr1, *CorrSurfPtr2;
double LagWin[3] = {0.2, 0.5, 0.98};
int ind1, ind2, peaks_ind, peak, max_ind;
int peaks[PITCH_MAX_NUM_PEAKS];
double adj, gain_tmp;
double corr, corr_max;
double intrp_a, intrp_b, intrp_c, intrp_d;
double peak_vals[PITCH_MAX_NUM_PEAKS];
double lags1[PITCH_MAX_NUM_PEAKS];
double lags2[PITCH_MAX_NUM_PEAKS];
double T[3][3];
int row;
for(k = 0; k < 2*PITCH_BW+3; k++)
{
CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)];
}
/* reset CorrSurf matrix */
memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4)));
//warnings -DH
max_ind = 0;
peak = 0;
/* copy old values from state buffer */
memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
/* decimation; put result after the old values */
WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN,
&buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]);
/* low-pass filtering */
for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++)
buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2];
/* copy end part back into state buffer */
memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
/* compute correlation for first and second half of the frame */
PCorr(buf_dec, corrvec1);
PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2);
/* bias towards pitch lag of previous frame */
log_lag = log(0.5 * old_lag);
gain_bias = 4.0 * old_gain * old_gain;
if (gain_bias > 0.8) gain_bias = 0.8;
for (k = 0; k < PITCH_LAG_SPAN2; k++)
{
ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag;
bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio);
corrvec1[k] *= bias;
}
/* taper correlation functions */
for (k = 0; k < 3; k++) {
gain_tmp = LagWin[k];
corrvec1[k] *= gain_tmp;
corrvec2[k] *= gain_tmp;
corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
}
corr_max = 0.0;
/* fill middle row of correlation surface */
ind1 = 0;
ind2 = 0;
CorrSurfPtr1 = &CorrSurf[PITCH_BW][2];
for (k = 0; k < PITCH_LAG_SPAN2; k++) {
corr = corrvec1[ind1++] + corrvec2[ind2++];
CorrSurfPtr1[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
}
}
/* fill first and last rows of correlation surface */
ind1 = 0;
ind2 = PITCH_BW;
CorrSurfPtr1 = &CorrSurf[0][2];
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2];
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) {
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
CorrSurfPtr1[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
}
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
CorrSurfPtr2[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
}
}
/* fill second and next to last rows of correlation surface */
ind1 = 0;
ind2 = PITCH_BW-1;
CorrSurfPtr1 = &CorrSurf[1][2];
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1];
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) {
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
CorrSurfPtr1[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
}
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
CorrSurfPtr2[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
}
}
/* fill remainder of correlation surface */
for (m = 2; m < PITCH_BW; m++) {
ind1 = 0;
ind2 = PITCH_BW - m; /* always larger than ind1 */
CorrSurfPtr1 = &CorrSurf[m][2];
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m];
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) {
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
CorrSurfPtr1[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
}
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
CorrSurfPtr2[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
}
}
}
/* threshold value to qualify as a peak */
corr_max *= 0.6;
peaks_ind = 0;
/* find peaks */
for (m = 1; m < PITCH_BW+1; m++) {
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
CorrSurfPtr1 = &CorrSurf[m][2];
for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) {
corr = CorrSurfPtr1[k];
if (corr > corr_max) {
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
/* found a peak; store index into matrix */
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
}
}
}
}
}
for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) {
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
CorrSurfPtr1 = &CorrSurf[m][2];
for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) {
corr = CorrSurfPtr1[k];
if (corr > corr_max) {
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
/* found a peak; store index into matrix */
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
}
}
}
}
}
if (peaks_ind > 0) {
/* examine each peak */
CorrSurfPtr1 = &CorrSurf[0][0];
for (k = 0; k < peaks_ind; k++) {
peak = peaks[k];
/* compute four interpolated values around current peak */
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a);
IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b);
IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c);
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d);
/* determine maximum of the interpolated values */
corr = CorrSurfPtr1[peak];
corr_max = intrp_a;
if (intrp_b > corr_max) corr_max = intrp_b;
if (intrp_c > corr_max) corr_max = intrp_c;
if (intrp_d > corr_max) corr_max = intrp_d;
/* determine where the peak sits and fill a 3x3 matrix around it */
row = peak / (PITCH_LAG_SPAN2+4);
lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
lags2[k] = (double) (lags1[k] + PITCH_BW - row);
if ( corr > corr_max ) {
T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
T[1][1] = corr;
T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
T[1][0] = intrp_a;
T[0][1] = intrp_b;
T[2][1] = intrp_c;
T[1][2] = intrp_d;
} else {
if (intrp_a == corr_max) {
lags1[k] -= 0.5;
lags2[k] += 0.5;
IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]);
IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]);
T[1][1] = intrp_a;
T[0][2] = intrp_b;
T[2][2] = intrp_c;
T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)];
T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
T[1][2] = corr;
} else if (intrp_b == corr_max) {
lags1[k] -= 0.5;
lags2[k] -= 0.5;
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]);
T[2][0] = intrp_a;
T[1][1] = intrp_b;
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]);
T[2][2] = intrp_d;
T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
T[0][1] = CorrSurfPtr1[peak - 1];
T[2][1] = corr;
T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
} else if (intrp_c == corr_max) {
lags1[k] += 0.5;
lags2[k] += 0.5;
T[0][0] = intrp_a;
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]);
T[1][1] = intrp_c;
T[0][2] = intrp_d;
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]);
T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
T[0][1] = corr;
T[2][1] = CorrSurfPtr1[peak + 1];
T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
} else {
lags1[k] += 0.5;
lags2[k] -= 0.5;
T[0][0] = intrp_b;
T[2][0] = intrp_c;
T[1][1] = intrp_d;
IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]);
IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]);
T[1][0] = corr;
T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)];
}
}
/* 2D parabolic interpolation gives more accurate lags and peak value */
Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]);
}
/* determine the highest peak, after applying a bias towards short lags */
corr_max = 0.0;
for (k = 0; k < peaks_ind; k++) {
corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k]));
if (corr > corr_max) {
corr_max = corr;
peak = k;
}
}
lags1[peak] *= 2.0;
lags2[peak] *= 2.0;
if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG;
if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG;
if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG;
if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG;
/* store lags of highest peak in output array */
lags[0] = lags1[peak];
lags[1] = lags1[peak];
lags[2] = lags2[peak];
lags[3] = lags2[peak];
}
else
{
row = max_ind / (PITCH_LAG_SPAN2+4);
lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
lags2[0] = (double) (lags1[0] + PITCH_BW - row);
if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG;
if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG;
if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG;
if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG;
/* store lags of highest peak in output array */
lags[0] = lags1[0];
lags[1] = lags1[0];
lags[2] = lags2[0];
lags[3] = lags2[0];
}
}
RTC_POP_IGNORING_WFRAME_LARGER_THAN()
/* create weighting matrix by orthogonalizing a basis of polynomials of increasing order
* t = (0:4)';
* A = [t.^0, t.^1, t.^2, t.^3, t.^4];
* [Q, dummy] = qr(A);
* P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
static const double kWeight[5][5] = {
{ 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286},
{-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857},
{-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286},
{ 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143},
{ 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286}
};
/* second order high-pass filter */
static void WebRtcIsac_Highpass(const double* in,
double* out,
double* state,
size_t N) {
/* create high-pass filter ocefficients
* z = 0.998 * exp(j*2*pi*35/8000);
* p = 0.94 * exp(j*2*pi*140/8000);
* HP_b = [1, -2*real(z), abs(z)^2];
* HP_a = [1, -2*real(p), abs(p)^2]; */
static const double a_coef[2] = { 1.86864659625574, -0.88360000000000};
static const double b_coef[2] = {-1.99524591718270, 0.99600400000000};
size_t k;
for (k=0; k<N; k++) {
*out = *in + state[1];
state[1] = state[0] + b_coef[0] * *in + a_coef[0] * *out;
state[0] = b_coef[1] * *in++ + a_coef[1] * *out++;
}
}
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */
double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
PitchAnalysisStruct *State,
double *lags,
double *gains)
{
double HPin[PITCH_FRAME_LEN];
double Weighted[PITCH_FRAME_LEN];
double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD];
double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD];
double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead
double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD];
double old_lag, old_gain;
double nrg_wht, tmp;
double Wnrg, Wfluct, Wgain;
double H[4][4];
double grad[4];
double dG[4];
int k, m, n, iter;
/* high pass filtering using second order pole-zero filter */
WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN);
/* copy from state into buffer */
memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD);
/* compute weighted and whitened signals */
WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr));
/* copy from buffer into state */
memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD);
old_lag = State->PFstr_wght.oldlagp[0];
old_gain = State->PFstr_wght.oldgainp[0];
/* inital pitch estimate */
WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags);
/* Iterative optimization of lags - to be done */
/* compute energy of whitened signal */
nrg_wht = 0.0;
for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++)
nrg_wht += Whitened[k] * Whitened[k];
/* Iterative optimization of gains */
/* set weights for energy, gain fluctiation, and spectral gain penalty functions */
Wnrg = 1.0 / nrg_wht;
Wgain = 0.005;
Wfluct = 3.0;
/* set initial gains */
for (k = 0; k < 4; k++)
gains[k] = PITCH_MAX_GAIN_06;
/* two iterations should be enough */
for (iter = 0; iter < 2; iter++) {
/* compute Jacobian of pre-filter output towards gains */
WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains);
/* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */
for (k = 0; k < 4; k++) {
tmp = 0.0;
for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
tmp += out_G[n] * out_dG[k][n];
grad[k] = tmp * Wnrg;
}
for (k = 0; k < 4; k++) {
for (m = 0; m <= k; m++) {
tmp = 0.0;
for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
tmp += out_dG[m][n] * out_dG[k][n];
H[k][m] = tmp * Wnrg;
}
}
/* add gradient and Hessian (lower triangle) for dampening fast gain changes */
for (k = 0; k < 4; k++) {
tmp = kWeight[k+1][0] * old_gain;
for (m = 0; m < 4; m++)
tmp += kWeight[k+1][m+1] * gains[m];
grad[k] += tmp * Wfluct;
}
for (k = 0; k < 4; k++) {
for (m = 0; m <= k; m++) {
H[k][m] += kWeight[k+1][m+1] * Wfluct;
}
}
/* add gradient and Hessian for dampening gain */
for (k = 0; k < 3; k++) {
tmp = 1.0 / (1 - gains[k]);
grad[k] += tmp * tmp * Wgain;
H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain);
}
tmp = 1.0 / (1 - gains[3]);
grad[3] += 1.33 * (tmp * tmp * Wgain);
H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain);
/* compute Cholesky factorization of Hessian
* by overwritting the upper triangle; scale factors on diagonal
* (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */
H[0][1] = H[1][0] / H[0][0];
H[0][2] = H[2][0] / H[0][0];
H[0][3] = H[3][0] / H[0][0];
H[1][1] -= H[0][0] * H[0][1] * H[0][1];
H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1];
H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1];
H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2];
H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2];
H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3];
/* Compute update as delta_gains = -inv(H) * grad */
/* copy and negate */
for (k = 0; k < 4; k++)
dG[k] = -grad[k];
/* back substitution */
dG[1] -= dG[0] * H[0][1];
dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2];
dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3];
/* scale */
for (k = 0; k < 4; k++)
dG[k] /= H[k][k];
/* back substitution */
dG[2] -= dG[3] * H[2][3];
dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2];
dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1];
/* update gains and check range */
for (k = 0; k < 4; k++) {
gains[k] += dG[k];
if (gains[k] > PITCH_MAX_GAIN)
gains[k] = PITCH_MAX_GAIN;
else if (gains[k] < 0.0)
gains[k] = 0.0;
}
}
/* update state for next frame */
WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains);
/* concatenate previous input's end and current input */
memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD);
memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN);
/* lookahead pitch filtering for masking analysis */
WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains);
/* store last part of input */
for (k = 0; k < QLOOKAHEAD; k++)
State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN];
}
RTC_POP_IGNORING_WFRAME_LARGER_THAN()

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* pitch_estimator.h
*
* Pitch functions
*
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_
#include <stddef.h>
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
void WebRtcIsac_PitchAnalysis(
const double* in, /* PITCH_FRAME_LEN samples */
double* out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
PitchAnalysisStruct* State,
double* lags,
double* gains);
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ */

View File

@ -0,0 +1,388 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <memory.h>
#include <stdlib.h>
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h"
#include "rtc_base/compile_assert_c.h"
/*
* We are implementing the following filters;
*
* Pre-filtering:
* y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
*
* Post-filtering:
* y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
*
* Note that `lag` is a floating number so we perform an interpolation to
* obtain the correct `lag`.
*
*/
static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25,
-0.07};
/* interpolation coefficients; generated by design_pitch_filter.m */
static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
{-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125,
0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109,
0.01754159521746},
{-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643,
0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084,
0.01654127246314},
{-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787,
0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311,
0.01295781500709},
{-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393,
0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166,
0.00719783432422},
{-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001,
0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000,
-0.00000000000000},
{0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562,
0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377,
-0.00764851320885},
{0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650,
0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059,
-0.01463300534216},
{0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190,
0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865,
-0.01985640750433}
};
/*
* Enumerating the operation of the filter.
* iSAC has 4 different pitch-filter which are very similar in their structure.
*
* kPitchFilterPre : In this mode the filter is operating as pitch
* pre-filter. This is used at the encoder.
* kPitchFilterPost : In this mode the filter is operating as pitch
* post-filter. This is the inverse of pre-filter and used
* in the decoder.
* kPitchFilterPreLa : This is, in structure, similar to pre-filtering but
* utilizing 3 millisecond lookahead. It is used to
* obtain the signal for LPC analysis.
* kPitchFilterPreGain : This is, in structure, similar to pre-filtering but
* differential changes in gain is considered. This is
* used to find the optimal gain.
*/
typedef enum {
kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain
} PitchFilterOperation;
/*
* Structure with parameters used for pitch-filtering.
* buffer : a buffer where the sum of previous inputs and outputs
* are stored.
* damper_state : the state of the damping filter. The filter is defined by
* `kDampFilter`.
* interpol_coeff : pointer to a set of coefficient which are used to utilize
* fractional pitch by interpolation.
* gain : pitch-gain to be applied to the current segment of input.
* lag : pitch-lag for the current segment of input.
* lag_offset : the offset of lag w.r.t. current sample.
* sub_frame : sub-frame index, there are 4 pitch sub-frames in an iSAC
* frame.
* This specifies the usage of the filter. See
* 'PitchFilterOperation' for operational modes.
* num_samples : number of samples to be processed in each segment.
* index : index of the input and output sample.
* damper_state_dg : state of damping filter for different trial gains.
* gain_mult : differential changes to gain.
*/
typedef struct {
double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD];
double damper_state[PITCH_DAMPORDER];
const double *interpol_coeff;
double gain;
double lag;
int lag_offset;
int sub_frame;
PitchFilterOperation mode;
int num_samples;
int index;
double damper_state_dg[4][PITCH_DAMPORDER];
double gain_mult[4];
} PitchFilterParam;
/**********************************************************************
* FilterSegment()
* Filter one segment, a quarter of a frame.
*
* Inputs
* in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
* filter_param : pitch filter parameters.
*
* Outputs
* out_data : pointer to a buffer where the filtered signal is written to.
* out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
* where the output of different gain values (differential
* change to gain) is written.
*/
static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
double* out_data,
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
int n;
int m;
int j;
double sum;
double sum2;
/* Index of `parameters->buffer` where the output is written to. */
int pos = parameters->index + PITCH_BUFFSIZE;
/* Index of `parameters->buffer` where samples are read for fractional-lag
* computation. */
int pos_lag = pos - parameters->lag_offset;
for (n = 0; n < parameters->num_samples; ++n) {
/* Shift low pass filter states. */
for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
parameters->damper_state[m] = parameters->damper_state[m - 1];
}
/* Filter to get fractional pitch. */
sum = 0.0;
for (m = 0; m < PITCH_FRACORDER; ++m) {
sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m];
}
/* Multiply with gain. */
parameters->damper_state[0] = parameters->gain * sum;
if (parameters->mode == kPitchFilterPreGain) {
int lag_index = parameters->index - parameters->lag_offset;
int m_tmp = (lag_index < 0) ? -lag_index : 0;
/* Update the damper state for the new sample. */
for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
for (j = 0; j < 4; ++j) {
parameters->damper_state_dg[j][m] =
parameters->damper_state_dg[j][m - 1];
}
}
for (j = 0; j < parameters->sub_frame + 1; ++j) {
/* Filter for fractional pitch. */
sum2 = 0.0;
for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) {
/* `lag_index + m` is always larger than or equal to zero, see how
* m_tmp is computed. This is equivalent to assume samples outside
* `out_dg[j]` are zero. */
sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
}
/* Add the contribution of differential gain change. */
parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum +
parameters->gain * sum2;
}
/* Filter with damping filter, and store the results. */
for (j = 0; j < parameters->sub_frame + 1; ++j) {
sum = 0.0;
for (m = 0; m < PITCH_DAMPORDER; ++m) {
sum -= parameters->damper_state_dg[j][m] * kDampFilter[m];
}
out_dg[j][parameters->index] = sum;
}
}
/* Filter with damping filter. */
sum = 0.0;
for (m = 0; m < PITCH_DAMPORDER; ++m) {
sum += parameters->damper_state[m] * kDampFilter[m];
}
/* Subtract from input and update buffer. */
out_data[parameters->index] = in_data[parameters->index] - sum;
parameters->buffer[pos] = in_data[parameters->index] +
out_data[parameters->index];
++parameters->index;
++pos;
++pos_lag;
}
return;
}
/* Update filter parameters based on the pitch-gains and pitch-lags. */
static void Update(PitchFilterParam* parameters) {
double fraction;
int fraction_index;
/* Compute integer lag-offset. */
parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY +
0.5);
/* Find correct set of coefficients for computing fractional pitch. */
fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY);
fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5);
parameters->interpol_coeff = kIntrpCoef[fraction_index];
if (parameters->mode == kPitchFilterPreGain) {
/* If in this mode make a differential change to pitch gain. */
parameters->gain_mult[parameters->sub_frame] += 0.2;
if (parameters->gain_mult[parameters->sub_frame] > 1.0) {
parameters->gain_mult[parameters->sub_frame] = 1.0;
}
if (parameters->sub_frame > 0) {
parameters->gain_mult[parameters->sub_frame - 1] -= 0.2;
}
}
}
/******************************************************************************
* FilterFrame()
* Filter a frame of 30 millisecond, given pitch-lags and pitch-gains.
*
* Inputs
* in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
* lags : pointer to pitch-lags, 4 lags per frame.
* gains : pointer to pitch-gians, 4 gains per frame.
* mode : defining the functionality of the filter. It takes the
* following values.
* kPitchFilterPre: Pitch pre-filter, used at encoder.
* kPitchFilterPost: Pitch post-filter, used at decoder.
* kPitchFilterPreLa: Pitch pre-filter with lookahead.
* kPitchFilterPreGain: Pitch pre-filter used to otain optimal
* pitch-gains.
*
* Outputs
* out_data : pointer to a buffer where the filtered signal is written to.
* out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
* where the output of different gain values (differential
* change to gain) is written.
*/
static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
double* lags, double* gains, PitchFilterOperation mode,
double* out_data,
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
PitchFilterParam filter_parameters;
double gain_delta, lag_delta;
double old_lag, old_gain;
int n;
int m;
const double kEnhancer = 1.3;
/* Set up buffer and states. */
filter_parameters.index = 0;
filter_parameters.lag_offset = 0;
filter_parameters.mode = mode;
/* Copy states to local variables. */
memcpy(filter_parameters.buffer, filter_state->ubuf,
sizeof(filter_state->ubuf));
RTC_COMPILE_ASSERT(sizeof(filter_parameters.buffer) >=
sizeof(filter_state->ubuf));
memset(filter_parameters.buffer +
sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]),
0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf));
memcpy(filter_parameters.damper_state, filter_state->ystate,
sizeof(filter_state->ystate));
if (mode == kPitchFilterPreGain) {
/* Clear buffers. */
memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult));
memset(filter_parameters.damper_state_dg, 0,
sizeof(filter_parameters.damper_state_dg));
for (n = 0; n < PITCH_SUBFRAMES; ++n) {
//memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD));
memset(out_dg[n], 0, sizeof(out_dg[n]));
}
} else if (mode == kPitchFilterPost) {
/* Make output more periodic. Negative sign is to change the structure
* of the filter. */
for (n = 0; n < PITCH_SUBFRAMES; ++n) {
gains[n] *= -kEnhancer;
}
}
old_lag = *filter_state->oldlagp;
old_gain = *filter_state->oldgainp;
/* No interpolation if pitch lag step is big. */
if ((lags[0] > (PITCH_UPSTEP * old_lag)) ||
(lags[0] < (PITCH_DOWNSTEP * old_lag))) {
old_lag = lags[0];
old_gain = gains[0];
if (mode == kPitchFilterPreGain) {
filter_parameters.gain_mult[0] = 1.0;
}
}
filter_parameters.num_samples = PITCH_UPDATE;
for (m = 0; m < PITCH_SUBFRAMES; ++m) {
/* Set the sub-frame value. */
filter_parameters.sub_frame = m;
/* Calculate interpolation steps for pitch-lag and pitch-gain. */
lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME;
filter_parameters.lag = old_lag;
gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME;
filter_parameters.gain = old_gain;
/* Store for the next sub-frame. */
old_lag = lags[m];
old_gain = gains[m];
for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) {
/* Step-wise interpolation of pitch gains and lags. As pitch-lag changes,
* some parameters of filter need to be update. */
filter_parameters.gain += gain_delta;
filter_parameters.lag += lag_delta;
/* Update parameters according to new lag value. */
Update(&filter_parameters);
/* Filter a segment of input. */
FilterSegment(in_data, &filter_parameters, out_data, out_dg);
}
}
if (mode != kPitchFilterPreGain) {
/* Export buffer and states. */
memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN],
sizeof(filter_state->ubuf));
memcpy(filter_state->ystate, filter_parameters.damper_state,
sizeof(filter_state->ystate));
/* Store for the next frame. */
*filter_state->oldlagp = old_lag;
*filter_state->oldgainp = old_gain;
}
if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) {
/* Filter the lookahead segment, this is treated as the last sub-frame. So
* set `pf_param` to last sub-frame. */
filter_parameters.sub_frame = PITCH_SUBFRAMES - 1;
filter_parameters.num_samples = QLOOKAHEAD;
FilterSegment(in_data, &filter_parameters, out_data, out_dg);
}
}
void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data,
PitchFiltstr* pf_state, double* lags,
double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL);
}
void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data,
PitchFiltstr* pf_state, double* lags,
double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data,
NULL);
}
void WebRtcIsac_PitchfilterPre_gains(
double* in_data, double* out_data,
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state,
double* lags, double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data,
out_dg);
}
void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data,
PitchFiltstr* pf_state, double* lags,
double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL);
}

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
void WebRtcIsac_PitchfilterPre(double* indat,
double* outdat,
PitchFiltstr* pfp,
double* lags,
double* gains);
void WebRtcIsac_PitchfilterPost(double* indat,
double* outdat,
PitchFiltstr* pfp,
double* lags,
double* gains);
void WebRtcIsac_PitchfilterPre_la(double* indat,
double* outdat,
PitchFiltstr* pfp,
double* lags,
double* gains);
void WebRtcIsac_PitchfilterPre_gains(
double* indat,
double* outdat,
double out_dG[][PITCH_FRAME_LEN + QLOOKAHEAD],
PitchFiltstr* pfp,
double* lags,
double* gains);
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_

View File

@ -0,0 +1,196 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* settings.h
*
* Declaration of #defines used in the iSAC codec
*
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_
/* sampling frequency (Hz) */
#define FS 16000
/* number of samples per frame (either 320 (20ms), 480 (30ms) or 960 (60ms)) */
#define INITIAL_FRAMESAMPLES 960
/* do not modify the following; this will have to be modified if we
* have a 20ms framesize option */
/**********************************************************************/
/* miliseconds */
#define FRAMESIZE 30
/* number of samples per frame processed in the encoder, 480 */
#define FRAMESAMPLES 480 /* ((FRAMESIZE*FS)/1000) */
#define FRAMESAMPLES_HALF 240
#define FRAMESAMPLES_QUARTER 120
/**********************************************************************/
/* max number of samples per frame (= 60 ms frame) */
#define MAX_FRAMESAMPLES 960
#define MAX_SWBFRAMESAMPLES (MAX_FRAMESAMPLES * 2)
/* number of samples per 10ms frame */
#define FRAMESAMPLES_10ms ((10 * FS) / 1000)
#define SWBFRAMESAMPLES_10ms (FRAMESAMPLES_10ms * 2)
/* number of samples in 30 ms frame */
#define FRAMESAMPLES_30ms 480
/* number of subframes */
#define SUBFRAMES 6
/* length of a subframe */
#define UPDATE 80
/* length of half a subframe (low/high band) */
#define HALF_SUBFRAMELEN (UPDATE / 2)
/* samples of look ahead (in a half-band, so actually
* half the samples of look ahead @ FS) */
#define QLOOKAHEAD 24 /* 3 ms */
/* order of AR model in spectral entropy coder */
#define AR_ORDER 6
/* order of LP model in spectral entropy coder */
#define LP_ORDER 0
/* window length (masking analysis) */
#define WINLEN 256
/* order of low-band pole filter used to approximate masking curve */
#define ORDERLO 12
/* order of hi-band pole filter used to approximate masking curve */
#define ORDERHI 6
#define UB_LPC_ORDER 4
#define UB_LPC_VEC_PER_FRAME 2
#define UB16_LPC_VEC_PER_FRAME 4
#define UB_ACTIVE_SUBFRAMES 2
#define UB_MAX_LPC_ORDER 6
#define UB_INTERPOL_SEGMENTS 1
#define UB16_INTERPOL_SEGMENTS 3
#define LB_TOTAL_DELAY_SAMPLES 48
enum ISACBandwidth { isac8kHz = 8, isac12kHz = 12, isac16kHz = 16 };
enum ISACBand {
kIsacLowerBand = 0,
kIsacUpperBand12 = 1,
kIsacUpperBand16 = 2
};
enum IsacSamplingRate { kIsacWideband = 16, kIsacSuperWideband = 32 };
#define UB_LPC_GAIN_DIM SUBFRAMES
#define FB_STATE_SIZE_WORD32 6
/* order for post_filter_bank */
#define POSTQORDER 3
/* order for pre-filterbank */
#define QORDER 3
/* another order */
#define QORDER_ALL (POSTQORDER + QORDER - 1)
/* for decimator */
#define ALLPASSSECTIONS 2
/* array size for byte stream in number of bytes. */
/* The old maximum size still needed for the decoding */
#define STREAM_SIZE_MAX 600
#define STREAM_SIZE_MAX_30 200 /* 200 bytes=53.4 kbps @ 30 ms.framelength */
#define STREAM_SIZE_MAX_60 400 /* 400 bytes=53.4 kbps @ 60 ms.framelength */
/* storage size for bit counts */
#define BIT_COUNTER_SIZE 30
/* maximum order of any AR model or filter */
#define MAX_AR_MODEL_ORDER 12 // 50
/* For pitch analysis */
#define PITCH_FRAME_LEN (FRAMESAMPLES_HALF) /* 30 ms */
#define PITCH_MAX_LAG 140 /* 57 Hz */
#define PITCH_MIN_LAG 20 /* 400 Hz */
#define PITCH_MAX_GAIN 0.45
#define PITCH_MAX_GAIN_06 0.27 /* PITCH_MAX_GAIN*0.6 */
#define PITCH_MAX_GAIN_Q12 1843
#define PITCH_LAG_SPAN2 (PITCH_MAX_LAG / 2 - PITCH_MIN_LAG / 2 + 5)
#define PITCH_CORR_LEN2 60 /* 15 ms */
#define PITCH_CORR_STEP2 (PITCH_FRAME_LEN / 4)
#define PITCH_BW 11 /* half the band width of correlation surface */
#define PITCH_SUBFRAMES 4
#define PITCH_GRAN_PER_SUBFRAME 5
#define PITCH_SUBFRAME_LEN (PITCH_FRAME_LEN / PITCH_SUBFRAMES)
#define PITCH_UPDATE (PITCH_SUBFRAME_LEN / PITCH_GRAN_PER_SUBFRAME)
/* maximum number of peaks to be examined in correlation surface */
#define PITCH_MAX_NUM_PEAKS 10
#define PITCH_PEAK_DECAY 0.85
/* For weighting filter */
#define PITCH_WLPCORDER 6
#define PITCH_WLPCWINLEN PITCH_FRAME_LEN
#define PITCH_WLPCASYM 0.3 /* asymmetry parameter */
#define PITCH_WLPCBUFLEN PITCH_WLPCWINLEN
/* For pitch filter */
/* Extra 50 for fraction and LP filters */
#define PITCH_BUFFSIZE (PITCH_MAX_LAG + 50)
#define PITCH_INTBUFFSIZE (PITCH_FRAME_LEN + PITCH_BUFFSIZE)
/* Max rel. step for interpolation */
#define PITCH_UPSTEP 1.5
/* Max rel. step for interpolation */
#define PITCH_DOWNSTEP 0.67
#define PITCH_FRACS 8
#define PITCH_FRACORDER 9
#define PITCH_DAMPORDER 5
#define PITCH_FILTDELAY 1.5f
/* stepsize for quantization of the pitch Gain */
#define PITCH_GAIN_STEPSIZE 0.125
/* Order of high pass filter */
#define HPORDER 2
/* some mathematical constants */
/* log2(exp) */
#define LOG2EXP 1.44269504088896
#define PI 3.14159265358979
/* Maximum number of iterations allowed to limit payload size */
#define MAX_PAYLOAD_LIMIT_ITERATION 5
/* Redundant Coding */
#define RCU_BOTTLENECK_BPS 16000
#define RCU_TRANSCODING_SCALE 0.40f
#define RCU_TRANSCODING_SCALE_INVERSE 2.5f
#define RCU_TRANSCODING_SCALE_UB 0.50f
#define RCU_TRANSCODING_SCALE_UB_INVERSE 2.0f
/* Define Error codes */
/* 6000 General */
#define ISAC_MEMORY_ALLOCATION_FAILED 6010
#define ISAC_MODE_MISMATCH 6020
#define ISAC_DISALLOWED_BOTTLENECK 6030
#define ISAC_DISALLOWED_FRAME_LENGTH 6040
#define ISAC_UNSUPPORTED_SAMPLING_FREQUENCY 6050
/* 6200 Bandwidth estimator */
#define ISAC_RANGE_ERROR_BW_ESTIMATOR 6240
/* 6400 Encoder */
#define ISAC_ENCODER_NOT_INITIATED 6410
#define ISAC_DISALLOWED_CODING_MODE 6420
#define ISAC_DISALLOWED_FRAME_MODE_ENCODER 6430
#define ISAC_DISALLOWED_BITSTREAM_LENGTH 6440
#define ISAC_PAYLOAD_LARGER_THAN_LIMIT 6450
#define ISAC_DISALLOWED_ENCODER_BANDWIDTH 6460
/* 6600 Decoder */
#define ISAC_DECODER_NOT_INITIATED 6610
#define ISAC_EMPTY_PACKET 6620
#define ISAC_DISALLOWED_FRAME_MODE_DECODER 6630
#define ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH 6640
#define ISAC_RANGE_ERROR_DECODE_BANDWIDTH 6650
#define ISAC_RANGE_ERROR_DECODE_PITCH_GAIN 6660
#define ISAC_RANGE_ERROR_DECODE_PITCH_LAG 6670
#define ISAC_RANGE_ERROR_DECODE_LPC 6680
#define ISAC_RANGE_ERROR_DECODE_SPECTRUM 6690
#define ISAC_LENGTH_MISMATCH 6730
#define ISAC_RANGE_ERROR_DECODE_BANDWITH 6740
#define ISAC_DISALLOWED_BANDWIDTH_MODE_DECODER 6750
#define ISAC_DISALLOWED_LPC_MODEL 6760
/* 6800 Call setup formats */
#define ISAC_INCOMPATIBLE_FORMATS 6810
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ */

View File

@ -0,0 +1,448 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* structs.h
*
* This header file contains all the structs used in the ISAC codec
*
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_
#include "modules/audio_coding/codecs/isac/bandwidth_info.h"
#include "modules/audio_coding/codecs/isac/main/source/settings.h"
#include "modules/third_party/fft/fft.h"
typedef struct Bitstreamstruct {
uint8_t stream[STREAM_SIZE_MAX];
uint32_t W_upper;
uint32_t streamval;
uint32_t stream_index;
} Bitstr;
typedef struct {
double DataBufferLo[WINLEN];
double DataBufferHi[WINLEN];
double CorrBufLo[ORDERLO + 1];
double CorrBufHi[ORDERHI + 1];
float PreStateLoF[ORDERLO + 1];
float PreStateLoG[ORDERLO + 1];
float PreStateHiF[ORDERHI + 1];
float PreStateHiG[ORDERHI + 1];
float PostStateLoF[ORDERLO + 1];
float PostStateLoG[ORDERLO + 1];
float PostStateHiF[ORDERHI + 1];
float PostStateHiG[ORDERHI + 1];
double OldEnergy;
} MaskFiltstr;
typedef struct {
// state vectors for each of the two analysis filters
double INSTAT1[2 * (QORDER - 1)];
double INSTAT2[2 * (QORDER - 1)];
double INSTATLA1[2 * (QORDER - 1)];
double INSTATLA2[2 * (QORDER - 1)];
double INLABUF1[QLOOKAHEAD];
double INLABUF2[QLOOKAHEAD];
float INSTAT1_float[2 * (QORDER - 1)];
float INSTAT2_float[2 * (QORDER - 1)];
float INSTATLA1_float[2 * (QORDER - 1)];
float INSTATLA2_float[2 * (QORDER - 1)];
float INLABUF1_float[QLOOKAHEAD];
float INLABUF2_float[QLOOKAHEAD];
/* High pass filter */
double HPstates[HPORDER];
float HPstates_float[HPORDER];
} PreFiltBankstr;
typedef struct {
// state vectors for each of the two analysis filters
double STATE_0_LOWER[2 * POSTQORDER];
double STATE_0_UPPER[2 * POSTQORDER];
/* High pass filter */
double HPstates1[HPORDER];
double HPstates2[HPORDER];
float STATE_0_LOWER_float[2 * POSTQORDER];
float STATE_0_UPPER_float[2 * POSTQORDER];
float HPstates1_float[HPORDER];
float HPstates2_float[HPORDER];
} PostFiltBankstr;
typedef struct {
// data buffer for pitch filter
double ubuf[PITCH_BUFFSIZE];
// low pass state vector
double ystate[PITCH_DAMPORDER];
// old lag and gain
double oldlagp[1];
double oldgainp[1];
} PitchFiltstr;
typedef struct {
// data buffer
double buffer[PITCH_WLPCBUFLEN];
// state vectors
double istate[PITCH_WLPCORDER];
double weostate[PITCH_WLPCORDER];
double whostate[PITCH_WLPCORDER];
// LPC window -> should be a global array because constant
double window[PITCH_WLPCWINLEN];
} WeightFiltstr;
typedef struct {
// for inital estimator
double dec_buffer[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
PITCH_FRAME_LEN / 2 + 2];
double decimator_state[2 * ALLPASSSECTIONS + 1];
double hp_state[2];
double whitened_buf[QLOOKAHEAD];
double inbuf[QLOOKAHEAD];
PitchFiltstr PFstr_wght;
PitchFiltstr PFstr;
WeightFiltstr Wghtstr;
} PitchAnalysisStruct;
/* Have instance of struct together with other iSAC structs */
typedef struct {
/* Previous frame length (in ms) */
int32_t prev_frame_length;
/* Previous RTP timestamp from received
packet (in samples relative beginning) */
int32_t prev_rec_rtp_number;
/* Send timestamp for previous packet (in ms using timeGetTime()) */
uint32_t prev_rec_send_ts;
/* Arrival time for previous packet (in ms using timeGetTime()) */
uint32_t prev_rec_arr_ts;
/* rate of previous packet, derived from RTP timestamps (in bits/s) */
float prev_rec_rtp_rate;
/* Time sinse the last update of the BN estimate (in ms) */
uint32_t last_update_ts;
/* Time sinse the last reduction (in ms) */
uint32_t last_reduction_ts;
/* How many times the estimate was update in the beginning */
int32_t count_tot_updates_rec;
/* The estimated bottle neck rate from there to here (in bits/s) */
int32_t rec_bw;
float rec_bw_inv;
float rec_bw_avg;
float rec_bw_avg_Q;
/* The estimated mean absolute jitter value,
as seen on this side (in ms) */
float rec_jitter;
float rec_jitter_short_term;
float rec_jitter_short_term_abs;
float rec_max_delay;
float rec_max_delay_avg_Q;
/* (assumed) bitrate for headers (bps) */
float rec_header_rate;
/* The estimated bottle neck rate from here to there (in bits/s) */
float send_bw_avg;
/* The estimated mean absolute jitter value, as seen on
the other siee (in ms) */
float send_max_delay_avg;
// number of packets received since last update
int num_pkts_rec;
int num_consec_rec_pkts_over_30k;
// flag for marking that a high speed network has been
// detected downstream
int hsn_detect_rec;
int num_consec_snt_pkts_over_30k;
// flag for marking that a high speed network has
// been detected upstream
int hsn_detect_snd;
uint32_t start_wait_period;
int in_wait_period;
int change_to_WB;
uint32_t senderTimestamp;
uint32_t receiverTimestamp;
// enum IsacSamplingRate incomingStreamSampFreq;
uint16_t numConsecLatePkts;
float consecLatency;
int16_t inWaitLatePkts;
IsacBandwidthInfo external_bw_info;
} BwEstimatorstr;
typedef struct {
/* boolean, flags if previous packet exceeded B.N. */
int PrevExceed;
/* ms */
int ExceedAgo;
/* packets left to send in current burst */
int BurstCounter;
/* packets */
int InitCounter;
/* ms remaining in buffer when next packet will be sent */
double StillBuffered;
} RateModel;
/* The following strutc is used to store data from encoding, to make it
fast and easy to construct a new bitstream with a different Bandwidth
estimate. All values (except framelength and minBytes) is double size to
handle 60 ms of data.
*/
typedef struct {
/* Used to keep track of if it is first or second part of 60 msec packet */
int startIdx;
/* Frame length in samples */
int16_t framelength;
/* Pitch Gain */
int pitchGain_index[2];
/* Pitch Lag */
double meanGain[2];
int pitchIndex[PITCH_SUBFRAMES * 2];
/* LPC */
int LPCindex_s[108 * 2]; /* KLT_ORDER_SHAPE = 108 */
int LPCindex_g[12 * 2]; /* KLT_ORDER_GAIN = 12 */
double LPCcoeffs_lo[(ORDERLO + 1) * SUBFRAMES * 2];
double LPCcoeffs_hi[(ORDERHI + 1) * SUBFRAMES * 2];
/* Encode Spec */
int16_t fre[FRAMESAMPLES];
int16_t fim[FRAMESAMPLES];
int16_t AvgPitchGain[2];
/* Used in adaptive mode only */
int minBytes;
} IsacSaveEncoderData;
typedef struct {
int indexLPCShape[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME];
double lpcGain[SUBFRAMES << 1];
int lpcGainIndex[SUBFRAMES << 1];
Bitstr bitStreamObj;
int16_t realFFT[FRAMESAMPLES_HALF];
int16_t imagFFT[FRAMESAMPLES_HALF];
} ISACUBSaveEncDataStruct;
typedef struct {
Bitstr bitstr_obj;
MaskFiltstr maskfiltstr_obj;
PreFiltBankstr prefiltbankstr_obj;
PitchFiltstr pitchfiltstr_obj;
PitchAnalysisStruct pitchanalysisstr_obj;
FFTstr fftstr_obj;
IsacSaveEncoderData SaveEnc_obj;
int buffer_index;
int16_t current_framesamples;
float data_buffer_float[FRAMESAMPLES_30ms];
int frame_nb;
double bottleneck;
int16_t new_framelength;
double s2nr;
/* Maximum allowed number of bits for a 30 msec packet */
int16_t payloadLimitBytes30;
/* Maximum allowed number of bits for a 30 msec packet */
int16_t payloadLimitBytes60;
/* Maximum allowed number of bits for both 30 and 60 msec packet */
int16_t maxPayloadBytes;
/* Maximum allowed rate in bytes per 30 msec packet */
int16_t maxRateInBytes;
/*---
If set to 1 iSAC will not adapt the frame-size, if used in
channel-adaptive mode. The initial value will be used for all rates.
---*/
int16_t enforceFrameSize;
/*-----
This records the BWE index the encoder injected into the bit-stream.
It will be used in RCU. The same BWE index of main payload will be in
the redundant payload. We can not retrieve it from BWE because it is
a recursive procedure (WebRtcIsac_GetDownlinkBwJitIndexImpl) and has to be
called only once per each encode.
-----*/
int16_t lastBWIdx;
} ISACLBEncStruct;
typedef struct {
Bitstr bitstr_obj;
MaskFiltstr maskfiltstr_obj;
PreFiltBankstr prefiltbankstr_obj;
FFTstr fftstr_obj;
ISACUBSaveEncDataStruct SaveEnc_obj;
int buffer_index;
float data_buffer_float[MAX_FRAMESAMPLES + LB_TOTAL_DELAY_SAMPLES];
double bottleneck;
/* Maximum allowed number of bits for a 30 msec packet */
// int16_t payloadLimitBytes30;
/* Maximum allowed number of bits for both 30 and 60 msec packet */
// int16_t maxPayloadBytes;
int16_t maxPayloadSizeBytes;
double lastLPCVec[UB_LPC_ORDER];
int16_t numBytesUsed;
int16_t lastJitterInfo;
} ISACUBEncStruct;
typedef struct {
Bitstr bitstr_obj;
MaskFiltstr maskfiltstr_obj;
PostFiltBankstr postfiltbankstr_obj;
PitchFiltstr pitchfiltstr_obj;
FFTstr fftstr_obj;
} ISACLBDecStruct;
typedef struct {
Bitstr bitstr_obj;
MaskFiltstr maskfiltstr_obj;
PostFiltBankstr postfiltbankstr_obj;
FFTstr fftstr_obj;
} ISACUBDecStruct;
typedef struct {
ISACLBEncStruct ISACencLB_obj;
ISACLBDecStruct ISACdecLB_obj;
} ISACLBStruct;
typedef struct {
ISACUBEncStruct ISACencUB_obj;
ISACUBDecStruct ISACdecUB_obj;
} ISACUBStruct;
/*
This struct is used to take a snapshot of the entropy coder and LPC gains
right before encoding LPC gains. This allows us to go back to that state
if we like to limit the payload size.
*/
typedef struct {
/* 6 lower-band & 6 upper-band */
double loFiltGain[SUBFRAMES];
double hiFiltGain[SUBFRAMES];
/* Upper boundary of interval W */
uint32_t W_upper;
uint32_t streamval;
/* Index to the current position in bytestream */
uint32_t stream_index;
uint8_t stream[3];
} transcode_obj;
typedef struct {
// TODO(kwiberg): The size of these tables could be reduced by storing floats
// instead of doubles, and by making use of the identity cos(x) =
// sin(x+pi/2). They could also be made global constants that we fill in at
// compile time.
double costab1[FRAMESAMPLES_HALF];
double sintab1[FRAMESAMPLES_HALF];
double costab2[FRAMESAMPLES_QUARTER];
double sintab2[FRAMESAMPLES_QUARTER];
} TransformTables;
typedef struct {
// lower-band codec instance
ISACLBStruct instLB;
// upper-band codec instance
ISACUBStruct instUB;
// Bandwidth Estimator and model for the rate.
BwEstimatorstr bwestimator_obj;
RateModel rate_data_obj;
double MaxDelay;
/* 0 = adaptive; 1 = instantaneous */
int16_t codingMode;
// overall bottleneck of the codec
int32_t bottleneck;
// QMF Filter state
int32_t analysisFBState1[FB_STATE_SIZE_WORD32];
int32_t analysisFBState2[FB_STATE_SIZE_WORD32];
int32_t synthesisFBState1[FB_STATE_SIZE_WORD32];
int32_t synthesisFBState2[FB_STATE_SIZE_WORD32];
// Error Code
int16_t errorCode;
// bandwidth of the encoded audio 8, 12 or 16 kHz
enum ISACBandwidth bandwidthKHz;
// Sampling rate of audio, encoder and decode, 8 or 16 kHz
enum IsacSamplingRate encoderSamplingRateKHz;
enum IsacSamplingRate decoderSamplingRateKHz;
// Flag to keep track of initializations, lower & upper-band
// encoder and decoder.
int16_t initFlag;
// Flag to to indicate signal bandwidth switch
int16_t resetFlag_8kHz;
// Maximum allowed rate, measured in Bytes per 30 ms.
int16_t maxRateBytesPer30Ms;
// Maximum allowed payload-size, measured in Bytes.
int16_t maxPayloadSizeBytes;
/* The expected sampling rate of the input signal. Valid values are 16000
* and 32000. This is not the operation sampling rate of the codec. */
uint16_t in_sample_rate_hz;
// Trig tables for WebRtcIsac_Time2Spec and WebRtcIsac_Spec2time.
TransformTables transform_tables;
} ISACMainStruct;
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ */

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_
#define MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_
#include <memory>
#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"
#include "api/task_queue/task_queue_base.h"
#include "modules/audio_processing/include/aec_dump.h"
#include "rtc_base/system/file_wrapper.h"
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
class RTC_EXPORT AecDumpFactory {
public:
// The `worker_queue` must outlive the created AecDump instance.
// `max_log_size_bytes == -1` means the log size will be unlimited.
// The AecDump takes responsibility for `handle` and closes it in the
// destructor. A non-null return value indicates that the file has been
// sucessfully opened.
static absl::Nullable<std::unique_ptr<AecDump>> Create(
FileWrapper file,
int64_t max_log_size_bytes,
absl::Nonnull<TaskQueueBase*> worker_queue);
static absl::Nullable<std::unique_ptr<AecDump>> Create(
absl::string_view file_name,
int64_t max_log_size_bytes,
absl::Nonnull<TaskQueueBase*> worker_queue);
static absl::Nullable<std::unique_ptr<AecDump>> Create(
absl::Nonnull<FILE*> handle,
int64_t max_log_size_bytes,
absl::Nonnull<TaskQueueBase*> worker_queue);
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_

View File

@ -0,0 +1,92 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h"
#include <algorithm>
#include "api/array_view.h"
#include "modules/audio_processing/audio_buffer.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
AudioSamplesScaler::AudioSamplesScaler(float initial_gain)
: previous_gain_(initial_gain), target_gain_(initial_gain) {}
void AudioSamplesScaler::Process(AudioBuffer& audio_buffer) {
if (static_cast<int>(audio_buffer.num_frames()) != samples_per_channel_) {
// Update the members depending on audio-buffer length if needed.
RTC_DCHECK_GT(audio_buffer.num_frames(), 0);
samples_per_channel_ = static_cast<int>(audio_buffer.num_frames());
one_by_samples_per_channel_ = 1.f / samples_per_channel_;
}
if (target_gain_ == 1.f && previous_gain_ == target_gain_) {
// If only a gain of 1 is to be applied, do an early return without applying
// any gain.
return;
}
float gain = previous_gain_;
if (previous_gain_ == target_gain_) {
// Apply a non-changing gain.
for (size_t channel = 0; channel < audio_buffer.num_channels(); ++channel) {
rtc::ArrayView<float> channel_view(audio_buffer.channels()[channel],
samples_per_channel_);
for (float& sample : channel_view) {
sample *= gain;
}
}
} else {
const float increment =
(target_gain_ - previous_gain_) * one_by_samples_per_channel_;
if (increment > 0.f) {
// Apply an increasing gain.
for (size_t channel = 0; channel < audio_buffer.num_channels();
++channel) {
gain = previous_gain_;
rtc::ArrayView<float> channel_view(audio_buffer.channels()[channel],
samples_per_channel_);
for (float& sample : channel_view) {
gain = std::min(gain + increment, target_gain_);
sample *= gain;
}
}
} else {
// Apply a decreasing gain.
for (size_t channel = 0; channel < audio_buffer.num_channels();
++channel) {
gain = previous_gain_;
rtc::ArrayView<float> channel_view(audio_buffer.channels()[channel],
samples_per_channel_);
for (float& sample : channel_view) {
gain = std::max(gain + increment, target_gain_);
sample *= gain;
}
}
}
}
previous_gain_ = target_gain_;
// Saturate the samples to be in the S16 range.
for (size_t channel = 0; channel < audio_buffer.num_channels(); ++channel) {
rtc::ArrayView<float> channel_view(audio_buffer.channels()[channel],
samples_per_channel_);
for (float& sample : channel_view) {
constexpr float kMinFloatS16Value = -32768.f;
constexpr float kMaxFloatS16Value = 32767.f;
sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value);
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_
#define MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_
#include <stddef.h>
#include "modules/audio_processing/audio_buffer.h"
namespace webrtc {
// Handles and applies a gain to the samples in an audio buffer.
// The gain is applied for each sample and any changes in the gain take effect
// gradually (in a linear manner) over one frame.
class AudioSamplesScaler {
public:
// C-tor. The supplied `initial_gain` is used immediately at the first call to
// Process(), i.e., in contrast to the gain supplied by SetGain(...) there is
// no gradual change to the `initial_gain`.
explicit AudioSamplesScaler(float initial_gain);
AudioSamplesScaler(const AudioSamplesScaler&) = delete;
AudioSamplesScaler& operator=(const AudioSamplesScaler&) = delete;
// Applies the specified gain to the audio in `audio_buffer`.
void Process(AudioBuffer& audio_buffer);
// Sets the gain to apply to each sample.
void SetGain(float gain) { target_gain_ = gain; }
private:
float previous_gain_ = 1.f;
float target_gain_ = 1.f;
int samples_per_channel_ = -1;
float one_by_samples_per_channel_ = -1.f;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h"
#include "modules/audio_processing/audio_buffer.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
constexpr int kMinAnalogMicGainLevel = 0;
constexpr int kMaxAnalogMicGainLevel = 255;
float ComputeLevelBasedGain(int emulated_analog_mic_gain_level) {
static_assert(
kMinAnalogMicGainLevel == 0,
"The minimum gain level must be 0 for the maths below to work.");
static_assert(kMaxAnalogMicGainLevel > 0,
"The minimum gain level must be larger than 0 for the maths "
"below to work.");
constexpr float kGainToLevelMultiplier = 1.f / kMaxAnalogMicGainLevel;
RTC_DCHECK_GE(emulated_analog_mic_gain_level, kMinAnalogMicGainLevel);
RTC_DCHECK_LE(emulated_analog_mic_gain_level, kMaxAnalogMicGainLevel);
return kGainToLevelMultiplier * emulated_analog_mic_gain_level;
}
float ComputePreGain(float pre_gain,
int emulated_analog_mic_gain_level,
bool emulated_analog_mic_gain_enabled) {
return emulated_analog_mic_gain_enabled
? pre_gain * ComputeLevelBasedGain(emulated_analog_mic_gain_level)
: pre_gain;
}
} // namespace
CaptureLevelsAdjuster::CaptureLevelsAdjuster(
bool emulated_analog_mic_gain_enabled,
int emulated_analog_mic_gain_level,
float pre_gain,
float post_gain)
: emulated_analog_mic_gain_enabled_(emulated_analog_mic_gain_enabled),
emulated_analog_mic_gain_level_(emulated_analog_mic_gain_level),
pre_gain_(pre_gain),
pre_adjustment_gain_(ComputePreGain(pre_gain_,
emulated_analog_mic_gain_level_,
emulated_analog_mic_gain_enabled_)),
pre_scaler_(pre_adjustment_gain_),
post_scaler_(post_gain) {}
void CaptureLevelsAdjuster::ApplyPreLevelAdjustment(AudioBuffer& audio_buffer) {
pre_scaler_.Process(audio_buffer);
}
void CaptureLevelsAdjuster::ApplyPostLevelAdjustment(
AudioBuffer& audio_buffer) {
post_scaler_.Process(audio_buffer);
}
void CaptureLevelsAdjuster::SetPreGain(float pre_gain) {
pre_gain_ = pre_gain;
UpdatePreAdjustmentGain();
}
void CaptureLevelsAdjuster::SetPostGain(float post_gain) {
post_scaler_.SetGain(post_gain);
}
void CaptureLevelsAdjuster::SetAnalogMicGainLevel(int level) {
RTC_DCHECK_GE(level, kMinAnalogMicGainLevel);
RTC_DCHECK_LE(level, kMaxAnalogMicGainLevel);
int clamped_level =
rtc::SafeClamp(level, kMinAnalogMicGainLevel, kMaxAnalogMicGainLevel);
emulated_analog_mic_gain_level_ = clamped_level;
UpdatePreAdjustmentGain();
}
void CaptureLevelsAdjuster::UpdatePreAdjustmentGain() {
pre_adjustment_gain_ =
ComputePreGain(pre_gain_, emulated_analog_mic_gain_level_,
emulated_analog_mic_gain_enabled_);
pre_scaler_.SetGain(pre_adjustment_gain_);
}
} // namespace webrtc

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_
#define MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_
#include <stddef.h>
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h"
namespace webrtc {
// Adjusts the level of the capture signal before and after all capture-side
// processing is done using a combination of explicitly specified gains
// and an emulated analog gain functionality where a specified analog level
// results in an additional gain. The pre-adjustment is achieved by combining
// the gain value `pre_gain` and the level `emulated_analog_mic_gain_level` to
// form a combined gain of `pre_gain`*`emulated_analog_mic_gain_level`/255 which
// is multiplied to each sample. The intention of the
// `emulated_analog_mic_gain_level` is to be controlled by the analog AGC
// functionality and to produce an emulated analog mic gain equal to
// `emulated_analog_mic_gain_level`/255. The post level adjustment is achieved
// by multiplying each sample with the value of `post_gain`. Any changes in the
// gains take are done smoothly over one frame and the scaled samples are
// clamped to fit into the allowed S16 sample range.
class CaptureLevelsAdjuster {
public:
// C-tor. The values for the level and the gains must fulfill
// 0 <= emulated_analog_mic_gain_level <= 255.
// 0.f <= pre_gain.
// 0.f <= post_gain.
CaptureLevelsAdjuster(bool emulated_analog_mic_gain_enabled,
int emulated_analog_mic_gain_level,
float pre_gain,
float post_gain);
CaptureLevelsAdjuster(const CaptureLevelsAdjuster&) = delete;
CaptureLevelsAdjuster& operator=(const CaptureLevelsAdjuster&) = delete;
// Adjusts the level of the signal. This should be called before any of the
// other processing is performed.
void ApplyPreLevelAdjustment(AudioBuffer& audio_buffer);
// Adjusts the level of the signal. This should be called after all of the
// other processing have been performed.
void ApplyPostLevelAdjustment(AudioBuffer& audio_buffer);
// Sets the gain to apply to each sample before any of the other processing is
// performed.
void SetPreGain(float pre_gain);
// Returns the total pre-adjustment gain applied, comprising both the pre_gain
// as well as the gain from the emulated analog mic, to each sample before any
// of the other processing is performed.
float GetPreAdjustmentGain() const { return pre_adjustment_gain_; }
// Sets the gain to apply to each sample after all of the other processing
// have been performed.
void SetPostGain(float post_gain);
// Sets the analog gain level to use for the emulated analog gain.
// `level` must be in the range [0...255].
void SetAnalogMicGainLevel(int level);
// Returns the current analog gain level used for the emulated analog gain.
int GetAnalogMicGainLevel() const { return emulated_analog_mic_gain_level_; }
private:
// Updates the value of `pre_adjustment_gain_` based on the supplied values
// for `pre_gain` and `emulated_analog_mic_gain_level_`.
void UpdatePreAdjustmentGain();
const bool emulated_analog_mic_gain_enabled_;
int emulated_analog_mic_gain_level_;
float pre_gain_;
float pre_adjustment_gain_;
AudioSamplesScaler pre_scaler_;
AudioSamplesScaler post_scaler_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_

View File

@ -0,0 +1,287 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/echo_control_mobile_impl.h"
#include <string.h>
#include <cstdint>
#include "api/audio/audio_processing.h"
#include "modules/audio_processing/aecm/echo_control_mobile.h"
#include "modules/audio_processing/audio_buffer.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
int16_t MapSetting(EchoControlMobileImpl::RoutingMode mode) {
switch (mode) {
case EchoControlMobileImpl::kQuietEarpieceOrHeadset:
return 0;
case EchoControlMobileImpl::kEarpiece:
return 1;
case EchoControlMobileImpl::kLoudEarpiece:
return 2;
case EchoControlMobileImpl::kSpeakerphone:
return 3;
case EchoControlMobileImpl::kLoudSpeakerphone:
return 4;
}
RTC_DCHECK_NOTREACHED();
return -1;
}
AudioProcessing::Error MapError(int err) {
switch (err) {
case AECM_UNSUPPORTED_FUNCTION_ERROR:
return AudioProcessing::kUnsupportedFunctionError;
case AECM_NULL_POINTER_ERROR:
return AudioProcessing::kNullPointerError;
case AECM_BAD_PARAMETER_ERROR:
return AudioProcessing::kBadParameterError;
case AECM_BAD_PARAMETER_WARNING:
return AudioProcessing::kBadStreamParameterWarning;
default:
// AECM_UNSPECIFIED_ERROR
// AECM_UNINITIALIZED_ERROR
return AudioProcessing::kUnspecifiedError;
}
}
} // namespace
struct EchoControlMobileImpl::StreamProperties {
StreamProperties() = delete;
StreamProperties(int sample_rate_hz,
size_t num_reverse_channels,
size_t num_output_channels)
: sample_rate_hz(sample_rate_hz),
num_reverse_channels(num_reverse_channels),
num_output_channels(num_output_channels) {}
int sample_rate_hz;
size_t num_reverse_channels;
size_t num_output_channels;
};
class EchoControlMobileImpl::Canceller {
public:
Canceller() {
state_ = WebRtcAecm_Create();
RTC_CHECK(state_);
}
~Canceller() {
RTC_DCHECK(state_);
WebRtcAecm_Free(state_);
}
Canceller(const Canceller&) = delete;
Canceller& operator=(const Canceller&) = delete;
void* state() {
RTC_DCHECK(state_);
return state_;
}
void Initialize(int sample_rate_hz) {
RTC_DCHECK(state_);
int error = WebRtcAecm_Init(state_, sample_rate_hz);
RTC_DCHECK_EQ(AudioProcessing::kNoError, error);
}
private:
void* state_;
};
EchoControlMobileImpl::EchoControlMobileImpl()
: routing_mode_(kSpeakerphone), comfort_noise_enabled_(false) {}
EchoControlMobileImpl::~EchoControlMobileImpl() {}
void EchoControlMobileImpl::ProcessRenderAudio(
rtc::ArrayView<const int16_t> packed_render_audio) {
RTC_DCHECK(stream_properties_);
size_t buffer_index = 0;
size_t num_frames_per_band =
packed_render_audio.size() / (stream_properties_->num_output_channels *
stream_properties_->num_reverse_channels);
for (auto& canceller : cancellers_) {
WebRtcAecm_BufferFarend(canceller->state(),
&packed_render_audio[buffer_index],
num_frames_per_band);
buffer_index += num_frames_per_band;
}
}
void EchoControlMobileImpl::PackRenderAudioBuffer(
const AudioBuffer* audio,
size_t num_output_channels,
size_t num_channels,
std::vector<int16_t>* packed_buffer) {
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
audio->num_frames_per_band());
RTC_DCHECK_EQ(num_channels, audio->num_channels());
// The ordering convention must be followed to pass to the correct AECM.
packed_buffer->clear();
int render_channel = 0;
for (size_t i = 0; i < num_output_channels; i++) {
for (size_t j = 0; j < audio->num_channels(); j++) {
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> data_to_buffer;
FloatS16ToS16(audio->split_bands_const(render_channel)[kBand0To8kHz],
audio->num_frames_per_band(), data_to_buffer.data());
// Buffer the samples in the render queue.
packed_buffer->insert(
packed_buffer->end(), data_to_buffer.data(),
data_to_buffer.data() + audio->num_frames_per_band());
render_channel = (render_channel + 1) % audio->num_channels();
}
}
}
size_t EchoControlMobileImpl::NumCancellersRequired(
size_t num_output_channels,
size_t num_reverse_channels) {
return num_output_channels * num_reverse_channels;
}
int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
int stream_delay_ms) {
RTC_DCHECK(stream_properties_);
RTC_DCHECK_GE(160, audio->num_frames_per_band());
RTC_DCHECK_EQ(audio->num_channels(), stream_properties_->num_output_channels);
RTC_DCHECK_GE(cancellers_.size(), stream_properties_->num_reverse_channels *
audio->num_channels());
int err = AudioProcessing::kNoError;
// The ordering convention must be followed to pass to the correct AECM.
size_t handle_index = 0;
for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
// TODO(ajm): improve how this works, possibly inside AECM.
// This is kind of hacked up.
RTC_DCHECK_LT(capture, low_pass_reference_.size());
const int16_t* noisy =
reference_copied_ ? low_pass_reference_[capture].data() : nullptr;
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
audio->num_frames_per_band());
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> split_bands_data;
int16_t* split_bands = split_bands_data.data();
const int16_t* clean = split_bands_data.data();
if (audio->split_bands(capture)[kBand0To8kHz]) {
FloatS16ToS16(audio->split_bands(capture)[kBand0To8kHz],
audio->num_frames_per_band(), split_bands_data.data());
} else {
clean = nullptr;
split_bands = nullptr;
}
if (noisy == NULL) {
noisy = clean;
clean = NULL;
}
for (size_t render = 0; render < stream_properties_->num_reverse_channels;
++render) {
err = WebRtcAecm_Process(cancellers_[handle_index]->state(), noisy, clean,
split_bands, audio->num_frames_per_band(),
stream_delay_ms);
if (split_bands) {
S16ToFloatS16(split_bands, audio->num_frames_per_band(),
audio->split_bands(capture)[kBand0To8kHz]);
}
if (err != AudioProcessing::kNoError) {
return MapError(err);
}
++handle_index;
}
for (size_t band = 1u; band < audio->num_bands(); ++band) {
memset(audio->split_bands_f(capture)[band], 0,
audio->num_frames_per_band() *
sizeof(audio->split_bands_f(capture)[band][0]));
}
}
return AudioProcessing::kNoError;
}
int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) {
if (MapSetting(mode) == -1) {
return AudioProcessing::kBadParameterError;
}
routing_mode_ = mode;
return Configure();
}
EchoControlMobileImpl::RoutingMode EchoControlMobileImpl::routing_mode() const {
return routing_mode_;
}
int EchoControlMobileImpl::enable_comfort_noise(bool enable) {
comfort_noise_enabled_ = enable;
return Configure();
}
bool EchoControlMobileImpl::is_comfort_noise_enabled() const {
return comfort_noise_enabled_;
}
void EchoControlMobileImpl::Initialize(int sample_rate_hz,
size_t num_reverse_channels,
size_t num_output_channels) {
low_pass_reference_.resize(num_output_channels);
for (auto& reference : low_pass_reference_) {
reference.fill(0);
}
stream_properties_.reset(new StreamProperties(
sample_rate_hz, num_reverse_channels, num_output_channels));
// AECM only supports 16 kHz or lower sample rates.
RTC_DCHECK_LE(stream_properties_->sample_rate_hz,
AudioProcessing::kSampleRate16kHz);
cancellers_.resize(
NumCancellersRequired(stream_properties_->num_output_channels,
stream_properties_->num_reverse_channels));
for (auto& canceller : cancellers_) {
if (!canceller) {
canceller.reset(new Canceller());
}
canceller->Initialize(sample_rate_hz);
}
Configure();
}
int EchoControlMobileImpl::Configure() {
AecmConfig config;
config.cngMode = comfort_noise_enabled_;
config.echoMode = MapSetting(routing_mode_);
int error = AudioProcessing::kNoError;
for (auto& canceller : cancellers_) {
int handle_error = WebRtcAecm_set_config(canceller->state(), config);
if (handle_error != AudioProcessing::kNoError) {
error = handle_error;
}
}
return error;
}
} // namespace webrtc

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_
#define MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <vector>
#include "api/array_view.h"
namespace webrtc {
class AudioBuffer;
// The acoustic echo control for mobile (AECM) component is a low complexity
// robust option intended for use on mobile devices.
class EchoControlMobileImpl {
public:
EchoControlMobileImpl();
~EchoControlMobileImpl();
// Recommended settings for particular audio routes. In general, the louder
// the echo is expected to be, the higher this value should be set. The
// preferred setting may vary from device to device.
enum RoutingMode {
kQuietEarpieceOrHeadset,
kEarpiece,
kLoudEarpiece,
kSpeakerphone,
kLoudSpeakerphone
};
// Sets echo control appropriate for the audio routing `mode` on the device.
// It can and should be updated during a call if the audio routing changes.
int set_routing_mode(RoutingMode mode);
RoutingMode routing_mode() const;
// Comfort noise replaces suppressed background noise to maintain a
// consistent signal level.
int enable_comfort_noise(bool enable);
bool is_comfort_noise_enabled() const;
void ProcessRenderAudio(rtc::ArrayView<const int16_t> packed_render_audio);
int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
void Initialize(int sample_rate_hz,
size_t num_reverse_channels,
size_t num_output_channels);
static void PackRenderAudioBuffer(const AudioBuffer* audio,
size_t num_output_channels,
size_t num_channels,
std::vector<int16_t>* packed_buffer);
static size_t NumCancellersRequired(size_t num_output_channels,
size_t num_reverse_channels);
private:
class Canceller;
struct StreamProperties;
int Configure();
RoutingMode routing_mode_;
bool comfort_noise_enabled_;
std::vector<std::unique_ptr<Canceller>> cancellers_;
std::unique_ptr<StreamProperties> stream_properties_;
std::vector<std::array<int16_t, 160>> low_pass_reference_;
bool reference_copied_ = false;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/include/aec_dump.h"
namespace webrtc {
InternalAPMConfig::InternalAPMConfig() = default;
InternalAPMConfig::InternalAPMConfig(const InternalAPMConfig&) = default;
InternalAPMConfig::InternalAPMConfig(InternalAPMConfig&&) = default;
InternalAPMConfig& InternalAPMConfig::operator=(const InternalAPMConfig&) =
default;
bool InternalAPMConfig::operator==(const InternalAPMConfig& other) const {
return aec_enabled == other.aec_enabled &&
aec_delay_agnostic_enabled == other.aec_delay_agnostic_enabled &&
aec_drift_compensation_enabled ==
other.aec_drift_compensation_enabled &&
aec_extended_filter_enabled == other.aec_extended_filter_enabled &&
aec_suppression_level == other.aec_suppression_level &&
aecm_enabled == other.aecm_enabled &&
aecm_comfort_noise_enabled == other.aecm_comfort_noise_enabled &&
aecm_routing_mode == other.aecm_routing_mode &&
agc_enabled == other.agc_enabled && agc_mode == other.agc_mode &&
agc_limiter_enabled == other.agc_limiter_enabled &&
hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled &&
ns_level == other.ns_level &&
transient_suppression_enabled == other.transient_suppression_enabled &&
noise_robust_agc_enabled == other.noise_robust_agc_enabled &&
pre_amplifier_enabled == other.pre_amplifier_enabled &&
pre_amplifier_fixed_gain_factor ==
other.pre_amplifier_fixed_gain_factor &&
experiments_description == other.experiments_description;
}
} // namespace webrtc

View File

@ -0,0 +1,116 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_
#define MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_
#include <stdint.h>
#include <string>
#include "absl/base/attributes.h"
#include "absl/types/optional.h"
#include "api/audio/audio_processing.h"
#include "modules/audio_processing/include/audio_frame_view.h"
namespace webrtc {
// Struct for passing current config from APM without having to
// include protobuf headers.
struct InternalAPMConfig {
InternalAPMConfig();
InternalAPMConfig(const InternalAPMConfig&);
InternalAPMConfig(InternalAPMConfig&&);
InternalAPMConfig& operator=(const InternalAPMConfig&);
InternalAPMConfig& operator=(InternalAPMConfig&&) = delete;
bool operator==(const InternalAPMConfig& other) const;
bool aec_enabled = false;
bool aec_delay_agnostic_enabled = false;
bool aec_drift_compensation_enabled = false;
bool aec_extended_filter_enabled = false;
int aec_suppression_level = 0;
bool aecm_enabled = false;
bool aecm_comfort_noise_enabled = false;
int aecm_routing_mode = 0;
bool agc_enabled = false;
int agc_mode = 0;
bool agc_limiter_enabled = false;
bool hpf_enabled = false;
bool ns_enabled = false;
int ns_level = 0;
bool transient_suppression_enabled = false;
bool noise_robust_agc_enabled = false;
bool pre_amplifier_enabled = false;
float pre_amplifier_fixed_gain_factor = 1.f;
std::string experiments_description = "";
};
// An interface for recording configuration and input/output streams
// of the Audio Processing Module. The recordings are called
// 'aec-dumps' and are stored in a protobuf format defined in
// debug.proto.
// The Write* methods are always safe to call concurrently or
// otherwise for all implementing subclasses. The intended mode of
// operation is to create a protobuf object from the input, and send
// it away to be written to file asynchronously.
class AecDump {
public:
struct AudioProcessingState {
int delay;
int drift;
absl::optional<int> applied_input_volume;
bool keypress;
};
virtual ~AecDump() = default;
// Logs Event::Type INIT message.
virtual void WriteInitMessage(const ProcessingConfig& api_format,
int64_t time_now_ms) = 0;
ABSL_DEPRECATED("")
void WriteInitMessage(const ProcessingConfig& api_format) {
WriteInitMessage(api_format, 0);
}
// Logs Event::Type STREAM message. To log an input/output pair,
// call the AddCapture* and AddAudioProcessingState methods followed
// by a WriteCaptureStreamMessage call.
virtual void AddCaptureStreamInput(
const AudioFrameView<const float>& src) = 0;
virtual void AddCaptureStreamOutput(
const AudioFrameView<const float>& src) = 0;
virtual void AddCaptureStreamInput(const int16_t* const data,
int num_channels,
int samples_per_channel) = 0;
virtual void AddCaptureStreamOutput(const int16_t* const data,
int num_channels,
int samples_per_channel) = 0;
virtual void AddAudioProcessingState(const AudioProcessingState& state) = 0;
virtual void WriteCaptureStreamMessage() = 0;
// Logs Event::Type REVERSE_STREAM message.
virtual void WriteRenderStreamMessage(const int16_t* const data,
int num_channels,
int samples_per_channel) = 0;
virtual void WriteRenderStreamMessage(
const AudioFrameView<const float>& src) = 0;
virtual void WriteRuntimeSetting(
const AudioProcessing::RuntimeSetting& runtime_setting) = 0;
// Logs Event::Type CONFIG message.
virtual void WriteConfig(const InternalAPMConfig& config) = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/include/audio_frame_proxies.h"
#include "api/audio/audio_frame.h"
#include "api/audio/audio_processing.h"
namespace webrtc {
int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
if (!frame || !ap) {
return AudioProcessing::Error::kNullPointerError;
}
StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_);
StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_);
RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames());
int result = ap->ProcessStream(frame->data(), input_config, output_config,
frame->mutable_data());
AudioProcessingStats stats = ap->GetStatistics();
if (stats.voice_detected) {
frame->vad_activity_ = *stats.voice_detected
? AudioFrame::VADActivity::kVadActive
: AudioFrame::VADActivity::kVadPassive;
}
return result;
}
int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
if (!frame || !ap) {
return AudioProcessing::Error::kNullPointerError;
}
// Must be a native rate.
if (frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate8kHz &&
frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate16kHz &&
frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate32kHz &&
frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate48kHz) {
return AudioProcessing::Error::kBadSampleRateError;
}
if (frame->num_channels_ <= 0) {
return AudioProcessing::Error::kBadNumberChannelsError;
}
StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_);
StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_);
int result = ap->ProcessReverseStream(frame->data(), input_config,
output_config, frame->mutable_data());
return result;
}
} // namespace webrtc

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
namespace webrtc {
class AudioFrame;
class AudioProcessing;
// Processes a 10 ms `frame` of the primary audio stream using the provided
// AudioProcessing object. On the client-side, this is the near-end (or
// captured) audio. The `sample_rate_hz_`, `num_channels_`, and
// `samples_per_channel_` members of `frame` must be valid. If changed from the
// previous call to this function, it will trigger an initialization of the
// provided AudioProcessing object.
// The function returns any error codes passed from the AudioProcessing
// ProcessStream method.
int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame);
// Processes a 10 ms `frame` of the reverse direction audio stream using the
// provided AudioProcessing object. The frame may be modified. On the
// client-side, this is the far-end (or to be rendered) audio. The
// `sample_rate_hz_`, `num_channels_`, and `samples_per_channel_` members of
// `frame` must be valid. If changed from the previous call to this function, it
// will trigger an initialization of the provided AudioProcessing object.
// The function returns any error codes passed from the AudioProcessing
// ProcessReverseStream method.
int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame);
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_
#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_
#include "api/audio/audio_view.h"
namespace webrtc {
// Class to pass audio data in T** format, where T is a numeric type.
template <class T>
class AudioFrameView {
public:
// `num_channels` and `channel_size` describe the T**
// `audio_samples`. `audio_samples` is assumed to point to a
// two-dimensional |num_channels * channel_size| array of floats.
//
// Note: The implementation now only requires the first channel pointer.
// The previous implementation retained a pointer to externally owned array
// of channel pointers, but since the channel size and count are provided
// and the array is assumed to be a single two-dimensional array, the other
// channel pointers can be calculated based on that (which is what the class
// now uses `DeinterleavedView<>` internally for).
AudioFrameView(T* const* audio_samples, int num_channels, int channel_size)
: view_(num_channels && channel_size ? audio_samples[0] : nullptr,
channel_size,
num_channels) {
RTC_DCHECK_GE(view_.num_channels(), 0);
RTC_DCHECK_GE(view_.samples_per_channel(), 0);
}
// Implicit cast to allow converting AudioFrameView<float> to
// AudioFrameView<const float>.
template <class U>
AudioFrameView(AudioFrameView<U> other) : view_(other.view()) {}
// Allow constructing AudioFrameView from a DeinterleavedView.
template <class U>
explicit AudioFrameView(DeinterleavedView<U> view) : view_(view) {}
AudioFrameView() = delete;
int num_channels() const { return view_.num_channels(); }
int samples_per_channel() const { return view_.samples_per_channel(); }
MonoView<T> channel(int idx) { return view_[idx]; }
MonoView<const T> channel(int idx) const { return view_[idx]; }
MonoView<T> operator[](int idx) { return view_[idx]; }
MonoView<const T> operator[](int idx) const { return view_[idx]; }
DeinterleavedView<T> view() { return view_; }
DeinterleavedView<const T> view() const { return view_; }
private:
DeinterleavedView<T> view_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_
#define MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_
#include <vector>
namespace webrtc {
// Functor to use when supplying a verifier function for the queue item
// verifcation.
template <typename T>
class RenderQueueItemVerifier {
public:
explicit RenderQueueItemVerifier(size_t minimum_capacity)
: minimum_capacity_(minimum_capacity) {}
bool operator()(const std::vector<T>& v) const {
return v.capacity() >= minimum_capacity_;
}
private:
size_t minimum_capacity_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H__

View File

@ -0,0 +1,138 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/rms_level.h"
#include <algorithm>
#include <cmath>
#include <numeric>
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
static constexpr float kMaxSquaredLevel = 32768 * 32768;
// kMinLevel is the level corresponding to kMinLevelDb, that is 10^(-127/10).
static constexpr float kMinLevel = 1.995262314968883e-13f;
// Calculates the normalized RMS value from a mean square value. The input
// should be the sum of squared samples divided by the number of samples. The
// value will be normalized to full range before computing the RMS, wich is
// returned as a negated dBfs. That is, 0 is full amplitude while 127 is very
// faint.
int ComputeRms(float mean_square) {
if (mean_square <= kMinLevel * kMaxSquaredLevel) {
// Very faint; simply return the minimum value.
return RmsLevel::kMinLevelDb;
}
// Normalize by the max level.
const float mean_square_norm = mean_square / kMaxSquaredLevel;
RTC_DCHECK_GT(mean_square_norm, kMinLevel);
// 20log_10(x^0.5) = 10log_10(x)
const float rms = 10.f * std::log10(mean_square_norm);
RTC_DCHECK_LE(rms, 0.f);
RTC_DCHECK_GT(rms, -RmsLevel::kMinLevelDb);
// Return the negated value.
return static_cast<int>(-rms + 0.5f);
}
} // namespace
RmsLevel::RmsLevel() {
Reset();
}
RmsLevel::~RmsLevel() = default;
void RmsLevel::Reset() {
sum_square_ = 0.f;
sample_count_ = 0;
max_sum_square_ = 0.f;
block_size_ = absl::nullopt;
}
void RmsLevel::Analyze(rtc::ArrayView<const int16_t> data) {
if (data.empty()) {
return;
}
CheckBlockSize(data.size());
const float sum_square =
std::accumulate(data.begin(), data.end(), 0.f,
[](float a, int16_t b) { return a + b * b; });
RTC_DCHECK_GE(sum_square, 0.f);
sum_square_ += sum_square;
sample_count_ += data.size();
max_sum_square_ = std::max(max_sum_square_, sum_square);
}
void RmsLevel::Analyze(rtc::ArrayView<const float> data) {
if (data.empty()) {
return;
}
CheckBlockSize(data.size());
float sum_square = 0.f;
for (float data_k : data) {
int16_t tmp =
static_cast<int16_t>(std::min(std::max(data_k, -32768.f), 32767.f));
sum_square += tmp * tmp;
}
RTC_DCHECK_GE(sum_square, 0.f);
sum_square_ += sum_square;
sample_count_ += data.size();
max_sum_square_ = std::max(max_sum_square_, sum_square);
}
void RmsLevel::AnalyzeMuted(size_t length) {
CheckBlockSize(length);
sample_count_ += length;
}
int RmsLevel::Average() {
const bool have_samples = (sample_count_ != 0);
int rms = have_samples ? ComputeRms(sum_square_ / sample_count_)
: RmsLevel::kMinLevelDb;
// To ensure that kMinLevelDb represents digital silence (muted audio
// sources) we'll check here if the sum_square is actually 0. If it's not
// we'll bump up the return value to `kInaudibleButNotMuted`.
// https://datatracker.ietf.org/doc/html/rfc6464
if (have_samples && rms == RmsLevel::kMinLevelDb && sum_square_ != 0.0f) {
rms = kInaudibleButNotMuted;
}
Reset();
return rms;
}
RmsLevel::Levels RmsLevel::AverageAndPeak() {
// Note that block_size_ should by design always be non-empty when
// sample_count_ != 0. Also, the * operator of absl::optional enforces this
// with a DCHECK.
Levels levels = (sample_count_ == 0)
? Levels{RmsLevel::kMinLevelDb, RmsLevel::kMinLevelDb}
: Levels{ComputeRms(sum_square_ / sample_count_),
ComputeRms(max_sum_square_ / *block_size_)};
Reset();
return levels;
}
void RmsLevel::CheckBlockSize(size_t block_size) {
if (block_size_ != block_size) {
Reset();
block_size_ = block_size;
}
}
} // namespace webrtc

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_
#define MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_
#include <stddef.h>
#include <stdint.h>
#include "absl/types/optional.h"
#include "api/array_view.h"
namespace webrtc {
// Computes the root mean square (RMS) level in dBFs (decibels from digital
// full-scale) of audio data. The computation follows RFC 6465:
// https://tools.ietf.org/html/rfc6465
// with the intent that it can provide the RTP audio level indication.
//
// The expected approach is to provide constant-sized chunks of audio to
// Analyze(). When enough chunks have been accumulated to form a packet, call
// Average() to get the audio level indicator for the RTP header.
class RmsLevel {
public:
struct Levels {
int average;
int peak;
};
enum : int { kMinLevelDb = 127, kInaudibleButNotMuted = 126 };
RmsLevel();
~RmsLevel();
// Can be called to reset internal states, but is not required during normal
// operation.
void Reset();
// Pass each chunk of audio to Analyze() to accumulate the level.
void Analyze(rtc::ArrayView<const int16_t> data);
void Analyze(rtc::ArrayView<const float> data);
// If all samples with the given `length` have a magnitude of zero, this is
// a shortcut to avoid some computation.
void AnalyzeMuted(size_t length);
// Computes the RMS level over all data passed to Analyze() since the last
// call to Average(). The returned value is positive but should be interpreted
// as negative as per the RFC. It is constrained to [0, 127]. Resets the
// internal state to start a new measurement period.
int Average();
// Like Average() above, but also returns the RMS peak value. Resets the
// internal state to start a new measurement period.
Levels AverageAndPeak();
private:
// Compares `block_size` with `block_size_`. If they are different, calls
// Reset() and stores the new size.
void CheckBlockSize(size_t block_size);
float sum_square_;
size_t sample_count_;
float max_sum_square_;
absl::optional<size_t> block_size_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
#define MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
#include <stddef.h>
static const int kSampleRateHz = 16000;
static const size_t kLength10Ms = kSampleRateHz / 100;
static const size_t kMaxNumFrames = 4;
struct AudioFeatures {
double log_pitch_gain[kMaxNumFrames];
double pitch_lag_hz[kMaxNumFrames];
double spectral_peak[kMaxNumFrames];
double rms[kMaxNumFrames];
size_t num_frames;
bool silence;
};
#endif // MODULES_AUDIO_PROCESSING_VAD_COMMON_H_

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/vad/gmm.h"
#include <math.h>
namespace webrtc {
static const int kMaxDimension = 10;
static void RemoveMean(const double* in,
const double* mean_vec,
int dimension,
double* out) {
for (int n = 0; n < dimension; ++n)
out[n] = in[n] - mean_vec[n];
}
static double ComputeExponent(const double* in,
const double* covar_inv,
int dimension) {
double q = 0;
for (int i = 0; i < dimension; ++i) {
double v = 0;
for (int j = 0; j < dimension; j++)
v += (*covar_inv++) * in[j];
q += v * in[i];
}
q *= -0.5;
return q;
}
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) {
if (gmm_parameters.dimension > kMaxDimension) {
return -1; // This is invalid pdf so the caller can check this.
}
double f = 0;
double v[kMaxDimension];
const double* mean_vec = gmm_parameters.mean;
const double* covar_inv = gmm_parameters.covar_inverse;
for (int n = 0; n < gmm_parameters.num_mixtures; n++) {
RemoveMean(x, mean_vec, gmm_parameters.dimension, v);
double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) +
gmm_parameters.weight[n];
f += exp(q);
mean_vec += gmm_parameters.dimension;
covar_inv += gmm_parameters.dimension * gmm_parameters.dimension;
}
return f;
}
} // namespace webrtc

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_GMM_H_
#define MODULES_AUDIO_PROCESSING_VAD_GMM_H_
namespace webrtc {
// A structure that specifies a GMM.
// A GMM is formulated as
// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... +
// w[num_mixtures - 1] * mixture[num_mixtures - 1];
// Where a 'mixture' is a Gaussian density.
struct GmmParameters {
// weight[n] = log(w[n]) - `dimension`/2 * log(2*pi) - 1/2 * log(det(cov[n]));
// where cov[n] is the covariance matrix of mixture n;
const double* weight;
// pointer to the first element of a `num_mixtures`x`dimension` matrix
// where kth row is the mean of the kth mixture.
const double* mean;
// pointer to the first element of a `num_mixtures`x`dimension`x`dimension`
// 3D-matrix, where the kth 2D-matrix is the inverse of the covariance
// matrix of the kth mixture.
const double* covar_inverse;
// Dimensionality of the mixtures.
int dimension;
// number of the mixtures.
int num_mixtures;
};
// Evaluate the given GMM, according to `gmm_parameters`, at the given point
// `x`. If the dimensionality of the given GMM is larger that the maximum
// acceptable dimension by the following function -1 is returned.
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_GMM_H_

View File

@ -0,0 +1,82 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// GMM tables for inactive segments. Generated by MakeGmmTables.m.
#ifndef MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
#define MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
namespace webrtc {
static const int kNoiseGmmNumMixtures = 12;
static const int kNoiseGmmDim = 3;
static const double
kNoiseGmmCovarInverse[kNoiseGmmNumMixtures][kNoiseGmmDim][kNoiseGmmDim] = {
{{7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02},
{4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04},
{1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}},
{{8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03},
{-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04},
{5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}},
{{4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03},
{-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05},
{-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}},
{{9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03},
{-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07},
{-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}},
{{7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02},
{-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06},
{2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}},
{{8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02},
{-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06},
{-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}},
{{9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03},
{5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07},
{-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}},
{{8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03},
{5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07},
{6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}},
{{6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03},
{-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05},
{5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}},
{{6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03},
{4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08},
{-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}},
{{1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02},
{-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07},
{-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}},
{{4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03},
{-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07},
{5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}};
static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = {
{-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01},
{-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02},
{-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02},
{-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02},
{-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01},
{-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02},
{-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02},
{-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02},
{-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02},
{-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02},
{-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02},
{-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}};
static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = {
-1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01,
-1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01,
-1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01,
-1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_

View File

@ -0,0 +1,120 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/vad/pitch_based_vad.h"
#include <string.h>
#include "modules/audio_processing/vad/common.h"
#include "modules/audio_processing/vad/noise_gmm_tables.h"
#include "modules/audio_processing/vad/vad_circular_buffer.h"
#include "modules/audio_processing/vad/voice_gmm_tables.h"
namespace webrtc {
static_assert(kNoiseGmmDim == kVoiceGmmDim,
"noise and voice gmm dimension not equal");
// These values should match MATLAB counterparts for unit-tests to pass.
static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
static const double kInitialPriorProbability = 0.3;
static const int kTransientWidthThreshold = 7;
static const double kLowProbabilityThreshold = 0.2;
static double LimitProbability(double p) {
const double kLimHigh = 0.99;
const double kLimLow = 0.01;
if (p > kLimHigh)
p = kLimHigh;
else if (p < kLimLow)
p = kLimLow;
return p;
}
PitchBasedVad::PitchBasedVad()
: p_prior_(kInitialPriorProbability),
circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
// Setup noise GMM.
noise_gmm_.dimension = kNoiseGmmDim;
noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
noise_gmm_.weight = kNoiseGmmWeights;
noise_gmm_.mean = &kNoiseGmmMean[0][0];
noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
// Setup voice GMM.
voice_gmm_.dimension = kVoiceGmmDim;
voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
voice_gmm_.weight = kVoiceGmmWeights;
voice_gmm_.mean = &kVoiceGmmMean[0][0];
voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
}
PitchBasedVad::~PitchBasedVad() {}
int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
double* p_combined) {
double p;
double gmm_features[3];
double pdf_features_given_voice;
double pdf_features_given_noise;
// These limits are the same in matlab implementation 'VoicingProbGMM().'
const double kLimLowLogPitchGain = -2.0;
const double kLimHighLogPitchGain = -0.9;
const double kLimLowSpectralPeak = 200;
const double kLimHighSpectralPeak = 2000;
const double kEps = 1e-12;
for (size_t n = 0; n < features.num_frames; n++) {
gmm_features[0] = features.log_pitch_gain[n];
gmm_features[1] = features.spectral_peak[n];
gmm_features[2] = features.pitch_lag_hz[n];
pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
if (features.spectral_peak[n] < kLimLowSpectralPeak ||
features.spectral_peak[n] > kLimHighSpectralPeak ||
features.log_pitch_gain[n] < kLimLowLogPitchGain) {
pdf_features_given_voice = kEps * pdf_features_given_noise;
} else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
pdf_features_given_noise = kEps * pdf_features_given_voice;
}
p = p_prior_ * pdf_features_given_voice /
(pdf_features_given_voice * p_prior_ +
pdf_features_given_noise * (1 - p_prior_));
p = LimitProbability(p);
// Combine pitch-based probability with standalone probability, before
// updating prior probabilities.
double prod_active = p * p_combined[n];
double prod_inactive = (1 - p) * (1 - p_combined[n]);
p_combined[n] = prod_active / (prod_active + prod_inactive);
if (UpdatePrior(p_combined[n]) < 0)
return -1;
// Limit prior probability. With a zero prior probability the posterior
// probability is always zero.
p_prior_ = LimitProbability(p_prior_);
}
return 0;
}
int PitchBasedVad::UpdatePrior(double p) {
circular_buffer_->Insert(p);
if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
kLowProbabilityThreshold) < 0)
return -1;
p_prior_ = circular_buffer_->Mean();
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
#define MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
#include <memory>
#include "modules/audio_processing/vad/common.h"
#include "modules/audio_processing/vad/gmm.h"
namespace webrtc {
class VadCircularBuffer;
// Computes the probability of the input audio frame to be active given
// the corresponding pitch-gain and lag of the frame.
class PitchBasedVad {
public:
PitchBasedVad();
~PitchBasedVad();
// Compute pitch-based voicing probability, given the features.
// features: a structure containing features required for computing voicing
// probabilities.
//
// p_combined: an array which contains the combined activity probabilities
// computed prior to the call of this function. The method,
// then, computes the voicing probabilities and combine them
// with the given values. The result are returned in `p`.
int VoicingProbability(const AudioFeatures& features, double* p_combined);
private:
int UpdatePrior(double p);
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
// all the code recognize it as "no-error."
static const int kNoError = 0;
GmmParameters noise_gmm_;
GmmParameters voice_gmm_;
double p_prior_;
std::unique_ptr<VadCircularBuffer> circular_buffer_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/vad/pitch_internal.h"
#include <cmath>
namespace webrtc {
// A 4-to-3 linear interpolation.
// The interpolation constants are derived as following:
// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval
// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is
// like interpolating 4-to-6 and keep the odd samples.
// The reason behind this is that LPC coefficients are computed for the first
// half of each 10ms interval.
static void PitchInterpolation(double old_val, const double* in, double* out) {
out[0] = 1. / 6. * old_val + 5. / 6. * in[0];
out[1] = 5. / 6. * in[1] + 1. / 6. * in[2];
out[2] = 0.5 * in[2] + 0.5 * in[3];
}
void GetSubframesPitchParameters(int sampling_rate_hz,
double* gains,
double* lags,
int num_in_frames,
int num_out_frames,
double* log_old_gain,
double* old_lag,
double* log_pitch_gain,
double* pitch_lag_hz) {
// Gain interpolation is in log-domain, also returned in log-domain.
for (int n = 0; n < num_in_frames; n++)
gains[n] = log(gains[n] + 1e-12);
// Interpolate lags and gains.
PitchInterpolation(*log_old_gain, gains, log_pitch_gain);
*log_old_gain = gains[num_in_frames - 1];
PitchInterpolation(*old_lag, lags, pitch_lag_hz);
*old_lag = lags[num_in_frames - 1];
// Convert pitch-lags to Hertz.
for (int n = 0; n < num_out_frames; n++) {
pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]);
}
}
} // namespace webrtc

View File

@ -0,0 +1,30 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
#define MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
namespace webrtc {
// TODO(turajs): Write a description of this function. Also be consistent with
// usage of `sampling_rate_hz` vs `kSamplingFreqHz`.
void GetSubframesPitchParameters(int sampling_rate_hz,
double* gains,
double* lags,
int num_in_frames,
int num_out_frames,
double* log_old_gain,
double* old_lag,
double* log_pitch_gain,
double* pitch_lag_hz);
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_

View File

@ -0,0 +1,107 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/vad/pole_zero_filter.h"
#include <string.h>
#include <algorithm>
namespace webrtc {
PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients,
size_t order_numerator,
const float* denominator_coefficients,
size_t order_denominator) {
if (order_numerator > kMaxFilterOrder ||
order_denominator > kMaxFilterOrder || denominator_coefficients[0] == 0 ||
numerator_coefficients == NULL || denominator_coefficients == NULL)
return NULL;
return new PoleZeroFilter(numerator_coefficients, order_numerator,
denominator_coefficients, order_denominator);
}
PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients,
size_t order_numerator,
const float* denominator_coefficients,
size_t order_denominator)
: past_input_(),
past_output_(),
numerator_coefficients_(),
denominator_coefficients_(),
order_numerator_(order_numerator),
order_denominator_(order_denominator),
highest_order_(std::max(order_denominator, order_numerator)) {
memcpy(numerator_coefficients_, numerator_coefficients,
sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1));
memcpy(denominator_coefficients_, denominator_coefficients,
sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1));
if (denominator_coefficients_[0] != 1) {
for (size_t n = 0; n <= order_numerator_; n++)
numerator_coefficients_[n] /= denominator_coefficients_[0];
for (size_t n = 0; n <= order_denominator_; n++)
denominator_coefficients_[n] /= denominator_coefficients_[0];
}
}
template <typename T>
static float FilterArPast(const T* past,
size_t order,
const float* coefficients) {
float sum = 0.0f;
size_t past_index = order - 1;
for (size_t k = 1; k <= order; k++, past_index--)
sum += coefficients[k] * past[past_index];
return sum;
}
int PoleZeroFilter::Filter(const int16_t* in,
size_t num_input_samples,
float* output) {
if (in == NULL || output == NULL)
return -1;
// This is the typical case, just a memcpy.
const size_t k = std::min(num_input_samples, highest_order_);
size_t n;
for (n = 0; n < k; n++) {
output[n] = in[n] * numerator_coefficients_[0];
output[n] += FilterArPast(&past_input_[n], order_numerator_,
numerator_coefficients_);
output[n] -= FilterArPast(&past_output_[n], order_denominator_,
denominator_coefficients_);
past_input_[n + order_numerator_] = in[n];
past_output_[n + order_denominator_] = output[n];
}
if (highest_order_ < num_input_samples) {
for (size_t m = 0; n < num_input_samples; n++, m++) {
output[n] = in[n] * numerator_coefficients_[0];
output[n] +=
FilterArPast(&in[m], order_numerator_, numerator_coefficients_);
output[n] -= FilterArPast(&output[m], order_denominator_,
denominator_coefficients_);
}
// Record into the past signal.
memcpy(past_input_, &in[num_input_samples - order_numerator_],
sizeof(in[0]) * order_numerator_);
memcpy(past_output_, &output[num_input_samples - order_denominator_],
sizeof(output[0]) * order_denominator_);
} else {
// Odd case that the length of the input is shorter that filter order.
memmove(past_input_, &past_input_[num_input_samples],
order_numerator_ * sizeof(past_input_[0]));
memmove(past_output_, &past_output_[num_input_samples],
order_denominator_ * sizeof(past_output_[0]));
}
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
#define MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
#include <stddef.h>
#include <stdint.h>
namespace webrtc {
class PoleZeroFilter {
public:
~PoleZeroFilter() {}
static PoleZeroFilter* Create(const float* numerator_coefficients,
size_t order_numerator,
const float* denominator_coefficients,
size_t order_denominator);
int Filter(const int16_t* in, size_t num_input_samples, float* output);
private:
PoleZeroFilter(const float* numerator_coefficients,
size_t order_numerator,
const float* denominator_coefficients,
size_t order_denominator);
static const int kMaxFilterOrder = 24;
int16_t past_input_[kMaxFilterOrder * 2];
float past_output_[kMaxFilterOrder * 2];
float numerator_coefficients_[kMaxFilterOrder + 1];
float denominator_coefficients_[kMaxFilterOrder + 1];
size_t order_numerator_;
size_t order_denominator_;
size_t highest_order_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/vad/standalone_vad.h"
#include <string.h>
#include "common_audio/vad/include/webrtc_vad.h"
#include "rtc_base/checks.h"
namespace webrtc {
static const int kDefaultStandaloneVadMode = 3;
StandaloneVad::StandaloneVad(VadInst* vad)
: vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) {}
StandaloneVad::~StandaloneVad() {
WebRtcVad_Free(vad_);
}
StandaloneVad* StandaloneVad::Create() {
VadInst* vad = WebRtcVad_Create();
if (!vad)
return nullptr;
int err = WebRtcVad_Init(vad);
err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode);
if (err != 0) {
WebRtcVad_Free(vad);
return nullptr;
}
return new StandaloneVad(vad);
}
int StandaloneVad::AddAudio(const int16_t* data, size_t length) {
if (length != kLength10Ms)
return -1;
if (index_ + length > kLength10Ms * kMaxNum10msFrames)
// Reset the buffer if it's full.
// TODO(ajm): Instead, consider just processing every 10 ms frame. Then we
// can forgo the buffering.
index_ = 0;
memcpy(&buffer_[index_], data, sizeof(int16_t) * length);
index_ += length;
return 0;
}
int StandaloneVad::GetActivity(double* p, size_t length_p) {
if (index_ == 0)
return -1;
const size_t num_frames = index_ / kLength10Ms;
if (num_frames > length_p)
return -1;
RTC_DCHECK_EQ(0, WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_));
int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_);
if (activity < 0)
return -1;
else if (activity == 0)
p[0] = 0.01; // Arbitrary but small and non-zero.
else
p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities.
for (size_t n = 1; n < num_frames; n++)
p[n] = p[0];
// Reset the buffer to start from the beginning.
index_ = 0;
return activity;
}
int StandaloneVad::set_mode(int mode) {
if (mode < 0 || mode > 3)
return -1;
if (WebRtcVad_set_mode(vad_, mode) != 0)
return -1;
mode_ = mode;
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
#define MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
#include <stddef.h>
#include <stdint.h>
#include "common_audio/vad/include/webrtc_vad.h"
#include "modules/audio_processing/vad/common.h"
namespace webrtc {
class StandaloneVad {
public:
static StandaloneVad* Create();
~StandaloneVad();
// Outputs
// p: a buffer where probabilities are written to.
// length_p: number of elements of `p`.
//
// return value:
// -1: if no audio is stored or VAD returns error.
// 0: in success.
// In case of error the content of `activity` is unchanged.
//
// Note that due to a high false-positive (VAD decision is active while the
// processed audio is just background noise) rate, stand-alone VAD is used as
// a one-sided indicator. The activity probability is 0.5 if the frame is
// classified as active, and the probability is 0.01 if the audio is
// classified as passive. In this way, when probabilities are combined, the
// effect of the stand-alone VAD is neutral if the input is classified as
// active.
int GetActivity(double* p, size_t length_p);
// Expecting 10 ms of 16 kHz audio to be pushed in.
int AddAudio(const int16_t* data, size_t length);
// Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most
// aggressive mode. Returns -1 if the input is less than 0 or larger than 3,
// otherwise 0 is returned.
int set_mode(int mode);
// Get the agressiveness of the current VAD.
int mode() const { return mode_; }
private:
explicit StandaloneVad(VadInst* vad);
static const size_t kMaxNum10msFrames = 3;
// TODO(turajs): Is there a way to use scoped-pointer here?
VadInst* vad_;
int16_t buffer_[kMaxNum10msFrames * kLength10Ms];
size_t index_;
int mode_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_

View File

@ -0,0 +1,275 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/vad/vad_audio_proc.h"
#include <math.h>
#include <stdio.h>
#include <string.h>
#include "common_audio/third_party/ooura/fft_size_256/fft4g.h"
#include "modules/audio_processing/vad/pitch_internal.h"
#include "modules/audio_processing/vad/pole_zero_filter.h"
#include "modules/audio_processing/vad/vad_audio_proc_internal.h"
#include "rtc_base/checks.h"
extern "C" {
#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h"
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
}
namespace webrtc {
// The following structures are declared anonymous in iSAC's structs.h. To
// forward declare them, we use this derived class trick.
struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {};
struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {};
static constexpr float kFrequencyResolution =
kSampleRateHz / static_cast<float>(VadAudioProc::kDftSize);
static constexpr int kSilenceRms = 5;
// TODO(turajs): Make a Create or Init for VadAudioProc.
VadAudioProc::VadAudioProc()
: audio_buffer_(),
num_buffer_samples_(kNumPastSignalSamples),
log_old_gain_(-2),
old_lag_(50), // Arbitrary but valid as pitch-lag (in samples).
pitch_analysis_handle_(new PitchAnalysisStruct),
pre_filter_handle_(new PreFiltBankstr),
high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator,
kFilterOrder,
kCoeffDenominator,
kFilterOrder)) {
static_assert(kNumPastSignalSamples + kNumSubframeSamples ==
sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]),
"lpc analysis window incorrect size");
static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]),
"correlation weight incorrect size");
// TODO(turajs): Are we doing too much in the constructor?
float data[kDftSize];
// Make FFT to initialize.
ip_[0] = 0;
WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
// TODO(turajs): Need to initialize high-pass filter.
// Initialize iSAC components.
WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get());
WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get());
}
VadAudioProc::~VadAudioProc() {}
void VadAudioProc::ResetBuffer() {
memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],
sizeof(audio_buffer_[0]) * kNumPastSignalSamples);
num_buffer_samples_ = kNumPastSignalSamples;
}
int VadAudioProc::ExtractFeatures(const int16_t* frame,
size_t length,
AudioFeatures* features) {
features->num_frames = 0;
if (length != kNumSubframeSamples) {
return -1;
}
// High-pass filter to remove the DC component and very low frequency content.
// We have experienced that this high-pass filtering improves voice/non-voiced
// classification.
if (high_pass_filter_->Filter(frame, kNumSubframeSamples,
&audio_buffer_[num_buffer_samples_]) != 0) {
return -1;
}
num_buffer_samples_ += kNumSubframeSamples;
if (num_buffer_samples_ < kBufferLength) {
return 0;
}
RTC_DCHECK_EQ(num_buffer_samples_, kBufferLength);
features->num_frames = kNum10msSubframes;
features->silence = false;
Rms(features->rms, kMaxNumFrames);
for (size_t i = 0; i < kNum10msSubframes; ++i) {
if (features->rms[i] < kSilenceRms) {
// PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.
// Bail out here instead.
features->silence = true;
ResetBuffer();
return 0;
}
}
PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,
kMaxNumFrames);
FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);
ResetBuffer();
return 0;
}
// Computes |kLpcOrder + 1| correlation coefficients.
void VadAudioProc::SubframeCorrelation(double* corr,
size_t length_corr,
size_t subframe_index) {
RTC_DCHECK_GE(length_corr, kLpcOrder + 1);
double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
size_t buffer_index = subframe_index * kNumSubframeSamples;
for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)
windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];
WebRtcIsac_AutoCorr(corr, windowed_audio,
kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);
}
// Compute `kNum10msSubframes` sets of LPC coefficients, one per 10 ms input.
// The analysis window is 15 ms long and it is centered on the first half of
// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
// first half of each 10 ms subframe.
void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) {
RTC_DCHECK_GE(length_lpc, kNum10msSubframes * (kLpcOrder + 1));
double corr[kLpcOrder + 1];
double reflec_coeff[kLpcOrder];
for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes;
i++, offset_lpc += kLpcOrder + 1) {
SubframeCorrelation(corr, kLpcOrder + 1, i);
corr[0] *= 1.0001;
// This makes Lev-Durb a bit more stable.
for (size_t k = 0; k < kLpcOrder + 1; k++) {
corr[k] *= kCorrWeight[k];
}
WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);
}
}
// Fit a second order curve to these 3 points and find the location of the
// extremum. The points are inverted before curve fitting.
static float QuadraticInterpolation(float prev_val,
float curr_val,
float next_val) {
// Doing the interpolation in |1 / A(z)|^2.
float fractional_index = 0;
next_val = 1.0f / next_val;
prev_val = 1.0f / prev_val;
curr_val = 1.0f / curr_val;
fractional_index =
-(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);
RTC_DCHECK_LT(fabs(fractional_index), 1);
return fractional_index;
}
// 1 / A(z), where A(z) is defined by `lpc` is a model of the spectral envelope
// of the input signal. The local maximum of the spectral envelope corresponds
// with the local minimum of A(z). It saves complexity, as we save one
// inversion. Furthermore, we find the first local maximum of magnitude squared,
// to save on one square root.
void VadAudioProc::FindFirstSpectralPeaks(double* f_peak,
size_t length_f_peak) {
RTC_DCHECK_GE(length_f_peak, kNum10msSubframes);
double lpc[kNum10msSubframes * (kLpcOrder + 1)];
// For all sub-frames.
GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
const size_t kNumDftCoefficients = kDftSize / 2 + 1;
float data[kDftSize];
for (size_t i = 0; i < kNum10msSubframes; i++) {
// Convert to float with zero pad.
memset(data, 0, sizeof(data));
for (size_t n = 0; n < kLpcOrder + 1; n++) {
data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]);
}
// Transform to frequency domain.
WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
size_t index_peak = 0;
float prev_magn_sqr = data[0] * data[0];
float curr_magn_sqr = data[2] * data[2] + data[3] * data[3];
float next_magn_sqr;
bool found_peak = false;
for (size_t n = 2; n < kNumDftCoefficients - 1; n++) {
next_magn_sqr =
data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1];
if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
found_peak = true;
index_peak = n - 1;
break;
}
prev_magn_sqr = curr_magn_sqr;
curr_magn_sqr = next_magn_sqr;
}
float fractional_index = 0;
if (!found_peak) {
// Checking if |kNumDftCoefficients - 1| is the local minimum.
next_magn_sqr = data[1] * data[1];
if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
index_peak = kNumDftCoefficients - 1;
}
} else {
// A peak is found, do a simple quadratic interpolation to get a more
// accurate estimate of the peak location.
fractional_index =
QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr);
}
f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;
}
}
// Using iSAC functions to estimate pitch gains & lags.
void VadAudioProc::PitchAnalysis(double* log_pitch_gains,
double* pitch_lags_hz,
size_t length) {
// TODO(turajs): This can be "imported" from iSAC & and the next two
// constants.
RTC_DCHECK_GE(length, kNum10msSubframes);
const int kNumPitchSubframes = 4;
double gains[kNumPitchSubframes];
double lags[kNumPitchSubframes];
const int kNumSubbandFrameSamples = 240;
const int kNumLookaheadSamples = 24;
float lower[kNumSubbandFrameSamples];
float upper[kNumSubbandFrameSamples];
double lower_lookahead[kNumSubbandFrameSamples];
double upper_lookahead[kNumSubbandFrameSamples];
double lower_lookahead_pre_filter[kNumSubbandFrameSamples +
kNumLookaheadSamples];
// Split signal to lower and upper bands
WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower,
upper, lower_lookahead, upper_lookahead,
pre_filter_handle_.get());
WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,
pitch_analysis_handle_.get(), lags, gains);
// Lags are computed on lower-band signal with sampling rate half of the
// input signal.
GetSubframesPitchParameters(
kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes,
&log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz);
}
void VadAudioProc::Rms(double* rms, size_t length_rms) {
RTC_DCHECK_GE(length_rms, kNum10msSubframes);
size_t offset = kNumPastSignalSamples;
for (size_t i = 0; i < kNum10msSubframes; i++) {
rms[i] = 0;
for (size_t n = 0; n < kNumSubframeSamples; n++, offset++)
rms[i] += audio_buffer_[offset] * audio_buffer_[offset];
rms[i] = sqrt(rms[i] / kNumSubframeSamples);
}
}
} // namespace webrtc

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR...
namespace webrtc {
class PoleZeroFilter;
class VadAudioProc {
public:
// Forward declare iSAC structs.
struct PitchAnalysisStruct;
struct PreFiltBankstr;
VadAudioProc();
~VadAudioProc();
int ExtractFeatures(const int16_t* audio_frame,
size_t length,
AudioFeatures* audio_features);
static constexpr size_t kDftSize = 512;
private:
void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
void SubframeCorrelation(double* corr,
size_t length_corr,
size_t subframe_index);
void GetLpcPolynomials(double* lpc, size_t length_lpc);
void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
void Rms(double* rms, size_t length_rms);
void ResetBuffer();
// To compute spectral peak we perform LPC analysis to get spectral envelope.
// For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
// LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
// we need 5 ms of past signal to create the input of LPC analysis.
static constexpr size_t kNumPastSignalSamples = size_t{kSampleRateHz / 200};
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
// all the code recognize it as "no-error."
static constexpr int kNoError = 0;
static constexpr size_t kNum10msSubframes = 3;
static constexpr size_t kNumSubframeSamples = size_t{kSampleRateHz / 100};
// Samples in 30 ms @ given sampling rate.
static constexpr size_t kNumSamplesToProcess =
kNum10msSubframes * kNumSubframeSamples;
static constexpr size_t kBufferLength =
kNumPastSignalSamples + kNumSamplesToProcess;
static constexpr size_t kIpLength = kDftSize >> 1;
static constexpr size_t kWLength = kDftSize >> 1;
static constexpr size_t kLpcOrder = 16;
size_t ip_[kIpLength];
float w_fft_[kWLength];
// A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
float audio_buffer_[kBufferLength];
size_t num_buffer_samples_;
double log_old_gain_;
double old_lag_;
std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
std::unique_ptr<PoleZeroFilter> high_pass_filter_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
#include <stddef.h>
namespace webrtc {
// These values should match MATLAB counterparts for unit-tests to pass.
static const double kCorrWeight[] = {
1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217,
0.913308, 0.899609, 0.886115, 0.872823, 0.859730, 0.846834,
0.834132, 0.821620, 0.809296, 0.797156, 0.785199};
static const double kLpcAnalWin[] = {
0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639,
0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883,
0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547,
0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438,
0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222,
0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713,
0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164,
0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546,
0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810,
0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148,
0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233,
0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442,
0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069,
0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512,
0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447,
0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979,
0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773,
0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158,
0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215,
0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840,
0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778,
0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639,
0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889,
0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814,
0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465,
0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574,
0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451,
0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858,
0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862,
0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664,
0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416,
0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008,
0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853,
0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642,
0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093,
0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687,
0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387,
0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358,
0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670,
0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000};
static const size_t kFilterOrder = 2;
static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f,
0.974827f};
static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f,
0.972457f};
static_assert(kFilterOrder + 1 ==
sizeof(kCoeffNumerator) / sizeof(kCoeffNumerator[0]),
"numerator coefficients incorrect size");
static_assert(kFilterOrder + 1 ==
sizeof(kCoeffDenominator) / sizeof(kCoeffDenominator[0]),
"denominator coefficients incorrect size");
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_

View File

@ -0,0 +1,135 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/vad/vad_circular_buffer.h"
#include <stdlib.h>
namespace webrtc {
VadCircularBuffer::VadCircularBuffer(int buffer_size)
: buffer_(new double[buffer_size]),
is_full_(false),
index_(0),
buffer_size_(buffer_size),
sum_(0) {}
VadCircularBuffer::~VadCircularBuffer() {}
void VadCircularBuffer::Reset() {
is_full_ = false;
index_ = 0;
sum_ = 0;
}
VadCircularBuffer* VadCircularBuffer::Create(int buffer_size) {
if (buffer_size <= 0)
return NULL;
return new VadCircularBuffer(buffer_size);
}
double VadCircularBuffer::Oldest() const {
if (!is_full_)
return buffer_[0];
else
return buffer_[index_];
}
double VadCircularBuffer::Mean() {
double m;
if (is_full_) {
m = sum_ / buffer_size_;
} else {
if (index_ > 0)
m = sum_ / index_;
else
m = 0;
}
return m;
}
void VadCircularBuffer::Insert(double value) {
if (is_full_) {
sum_ -= buffer_[index_];
}
sum_ += value;
buffer_[index_] = value;
index_++;
if (index_ >= buffer_size_) {
is_full_ = true;
index_ = 0;
}
}
int VadCircularBuffer::BufferLevel() {
if (is_full_)
return buffer_size_;
return index_;
}
int VadCircularBuffer::Get(int index, double* value) const {
int err = ConvertToLinearIndex(&index);
if (err < 0)
return -1;
*value = buffer_[index];
return 0;
}
int VadCircularBuffer::Set(int index, double value) {
int err = ConvertToLinearIndex(&index);
if (err < 0)
return -1;
sum_ -= buffer_[index];
buffer_[index] = value;
sum_ += value;
return 0;
}
int VadCircularBuffer::ConvertToLinearIndex(int* index) const {
if (*index < 0 || *index >= buffer_size_)
return -1;
if (!is_full_ && *index >= index_)
return -1;
*index = index_ - 1 - *index;
if (*index < 0)
*index += buffer_size_;
return 0;
}
int VadCircularBuffer::RemoveTransient(int width_threshold,
double val_threshold) {
if (!is_full_ && index_ < width_threshold + 2)
return 0;
int index_1 = 0;
int index_2 = width_threshold + 1;
double v = 0;
if (Get(index_1, &v) < 0)
return -1;
if (v < val_threshold) {
Set(index_1, 0);
int index;
for (index = index_2; index > index_1; index--) {
if (Get(index, &v) < 0)
return -1;
if (v < val_threshold)
break;
}
for (; index > index_1; index--) {
if (Set(index, 0.0) < 0)
return -1;
}
}
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
#define MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
#include <memory>
namespace webrtc {
// A circular buffer tailored to the need of this project. It stores last
// K samples of the input, and keeps track of the mean of the last samples.
//
// It is used in class "PitchBasedActivity" to keep track of posterior
// probabilities in the past few seconds. The posterior probabilities are used
// to recursively update prior probabilities.
class VadCircularBuffer {
public:
static VadCircularBuffer* Create(int buffer_size);
~VadCircularBuffer();
// If buffer is wrapped around.
bool is_full() const { return is_full_; }
// Get the oldest entry in the buffer.
double Oldest() const;
// Insert new value into the buffer.
void Insert(double value);
// Reset buffer, forget the past, start fresh.
void Reset();
// The mean value of the elements in the buffer. The return value is zero if
// buffer is empty, i.e. no value is inserted.
double Mean();
// Remove transients. If the values exceed `val_threshold` for a period
// shorter then or equal to `width_threshold`, then that period is considered
// transient and set to zero.
int RemoveTransient(int width_threshold, double val_threshold);
private:
explicit VadCircularBuffer(int buffer_size);
// Get previous values. |index = 0| corresponds to the most recent
// insertion. |index = 1| is the one before the most recent insertion, and
// so on.
int Get(int index, double* value) const;
// Set a given position to `value`. `index` is interpreted as above.
int Set(int index, double value);
// Return the number of valid elements in the buffer.
int BufferLevel();
// Convert an index with the interpretation as get() method to the
// corresponding linear index.
int ConvertToLinearIndex(int* index) const;
std::unique_ptr<double[]> buffer_;
bool is_full_;
int index_;
int buffer_size_;
double sum_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_

View File

@ -0,0 +1,85 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/vad/voice_activity_detector.h"
#include <algorithm>
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
const size_t kNumChannels = 1;
const double kDefaultVoiceValue = 1.0;
const double kNeutralProbability = 0.5;
const double kLowProbability = 0.01;
} // namespace
VoiceActivityDetector::VoiceActivityDetector()
: last_voice_probability_(kDefaultVoiceValue),
standalone_vad_(StandaloneVad::Create()) {}
VoiceActivityDetector::~VoiceActivityDetector() = default;
// Because ISAC has a different chunk length, it updates
// `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data.
// Otherwise it clears them.
void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
size_t length,
int sample_rate_hz) {
RTC_DCHECK_EQ(length, sample_rate_hz / 100);
// TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio.
// Resample to the required rate.
const int16_t* resampled_ptr = audio;
if (sample_rate_hz != kSampleRateHz) {
RTC_CHECK_EQ(
resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
0);
resampler_.Push(audio, length, resampled_, kLength10Ms, length);
resampled_ptr = resampled_;
}
RTC_DCHECK_EQ(length, kLength10Ms);
// Each chunk needs to be passed into `standalone_vad_`, because internally it
// buffers the audio and processes it all at once when GetActivity() is
// called.
RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
chunkwise_voice_probabilities_.resize(features_.num_frames);
chunkwise_rms_.resize(features_.num_frames);
std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
chunkwise_rms_.begin());
if (features_.num_frames > 0) {
if (features_.silence) {
// The other features are invalid, so set the voice probabilities to an
// arbitrary low value.
std::fill(chunkwise_voice_probabilities_.begin(),
chunkwise_voice_probabilities_.end(), kLowProbability);
} else {
std::fill(chunkwise_voice_probabilities_.begin(),
chunkwise_voice_probabilities_.end(), kNeutralProbability);
RTC_CHECK_GE(
standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
chunkwise_voice_probabilities_.size()),
0);
RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
features_, &chunkwise_voice_probabilities_[0]),
0);
}
last_voice_probability_ = chunkwise_voice_probabilities_.back();
}
}
} // namespace webrtc

View File

@ -0,0 +1,74 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <vector>
#include "common_audio/resampler/include/resampler.h"
#include "modules/audio_processing/vad/common.h"
#include "modules/audio_processing/vad/pitch_based_vad.h"
#include "modules/audio_processing/vad/standalone_vad.h"
#include "modules/audio_processing/vad/vad_audio_proc.h"
namespace webrtc {
// A Voice Activity Detector (VAD) that combines the voice probability from the
// StandaloneVad and PitchBasedVad to get a more robust estimation.
class VoiceActivityDetector {
public:
VoiceActivityDetector();
~VoiceActivityDetector();
// Processes each audio chunk and estimates the voice probability.
// TODO(bugs.webrtc.org/7494): Switch to rtc::ArrayView and remove
// `sample_rate_hz`.
void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);
// Returns a vector of voice probabilities for each chunk. It can be empty for
// some chunks, but it catches up afterwards returning multiple values at
// once.
const std::vector<double>& chunkwise_voice_probabilities() const {
return chunkwise_voice_probabilities_;
}
// Returns a vector of RMS values for each chunk. It has the same length as
// chunkwise_voice_probabilities().
const std::vector<double>& chunkwise_rms() const { return chunkwise_rms_; }
// Returns the last voice probability, regardless of the internal
// implementation, although it has a few chunks of delay.
float last_voice_probability() const { return last_voice_probability_; }
private:
// TODO(aluebs): Change these to float.
std::vector<double> chunkwise_voice_probabilities_;
std::vector<double> chunkwise_rms_;
float last_voice_probability_;
Resampler resampler_;
VadAudioProc audio_processing_;
std::unique_ptr<StandaloneVad> standalone_vad_;
PitchBasedVad pitch_based_vad_;
int16_t resampled_[kLength10Ms];
AudioFeatures features_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// GMM tables for active segments. Generated by MakeGmmTables.m.
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
static const int kVoiceGmmNumMixtures = 12;
static const int kVoiceGmmDim = 3;
static const double
kVoiceGmmCovarInverse[kVoiceGmmNumMixtures][kVoiceGmmDim][kVoiceGmmDim] = {
{{1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03},
{-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04},
{4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}},
{{6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03},
{-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05},
{-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}},
{{9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03},
{-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05},
{-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}},
{{3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02},
{-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05},
{-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}},
{{1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02},
{-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05},
{-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}},
{{1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02},
{-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06},
{-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}},
{{8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02},
{-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06},
{-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}},
{{2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04},
{-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06},
{7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}},
{{3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02},
{1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05},
{-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}},
{{6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04},
{-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06},
{-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}},
{{2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03},
{-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05},
{-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}},
{{1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02},
{-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05},
{-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}};
static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = {
{-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02},
{-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02},
{-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02},
{-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02},
{-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02},
{-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02},
{-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02},
{-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02},
{-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02},
{-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02},
{-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02},
{-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}};
static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = {
-1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01,
-1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01,
-1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01,
-1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00};
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_

View File

@ -0,0 +1,942 @@
/*
* Copyright(c)1995,97 Mark Olesen <olesen@me.QueensU.CA>
* Queen's Univ at Kingston (Canada)
*
* Permission to use, copy, modify, and distribute this software for
* any purpose without fee is hereby granted, provided that this
* entire notice is included in all copies of any software which is
* or includes a copy or modification of this software and in all
* copies of the supporting documentation for such software.
*
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR QUEEN'S
* UNIVERSITY AT KINGSTON MAKES ANY REPRESENTATION OR WARRANTY OF ANY
* KIND CONCERNING THE MERCHANTABILITY OF THIS SOFTWARE OR ITS
* FITNESS FOR ANY PARTICULAR PURPOSE.
*
* All of which is to say that you can do what you like with this
* source code provided you don't try to sell it as your own and you
* include an unaltered copy of this message (including the
* copyright).
*
* It is also implicitly understood that bug fixes and improvements
* should make their way back to the general Internet community so
* that everyone benefits.
*
* Changes:
* Trivial type modifications by the WebRTC authors.
*/
/*
* File:
* WebRtcIsac_Fftn.c
*
* Public:
* WebRtcIsac_Fftn / fftnf ();
*
* Private:
* WebRtcIsac_Fftradix / fftradixf ();
*
* Descript:
* multivariate complex Fourier transform, computed in place
* using mixed-radix Fast Fourier Transform algorithm.
*
* Fortran code by:
* RC Singleton, Stanford Research Institute, Sept. 1968
*
* translated by f2c (version 19950721).
*
* int WebRtcIsac_Fftn (int ndim, const int dims[], REAL Re[], REAL Im[],
* int iSign, double scaling);
*
* NDIM = the total number dimensions
* DIMS = a vector of array sizes
* if NDIM is zero then DIMS must be zero-terminated
*
* RE and IM hold the real and imaginary components of the data, and return
* the resulting real and imaginary Fourier coefficients. Multidimensional
* data *must* be allocated contiguously. There is no limit on the number
* of dimensions.
*
* ISIGN = the sign of the complex exponential (ie, forward or inverse FFT)
* the magnitude of ISIGN (normally 1) is used to determine the
* correct indexing increment (see below).
*
* SCALING = normalizing constant by which the final result is *divided*
* if SCALING == -1, normalize by total dimension of the transform
* if SCALING < -1, normalize by the square-root of the total dimension
*
* example:
* tri-variate transform with Re[n1][n2][n3], Im[n1][n2][n3]
*
* int dims[3] = {n1,n2,n3}
* WebRtcIsac_Fftn (3, dims, Re, Im, 1, scaling);
*
*-----------------------------------------------------------------------*
* int WebRtcIsac_Fftradix (REAL Re[], REAL Im[], size_t nTotal, size_t nPass,
* size_t nSpan, int iSign, size_t max_factors,
* size_t max_perm);
*
* RE, IM - see above documentation
*
* Although there is no limit on the number of dimensions, WebRtcIsac_Fftradix() must
* be called once for each dimension, but the calls may be in any order.
*
* NTOTAL = the total number of complex data values
* NPASS = the dimension of the current variable
* NSPAN/NPASS = the spacing of consecutive data values while indexing the
* current variable
* ISIGN - see above documentation
*
* example:
* tri-variate transform with Re[n1][n2][n3], Im[n1][n2][n3]
*
* WebRtcIsac_Fftradix (Re, Im, n1*n2*n3, n1, n1, 1, maxf, maxp);
* WebRtcIsac_Fftradix (Re, Im, n1*n2*n3, n2, n1*n2, 1, maxf, maxp);
* WebRtcIsac_Fftradix (Re, Im, n1*n2*n3, n3, n1*n2*n3, 1, maxf, maxp);
*
* single-variate transform,
* NTOTAL = N = NSPAN = (number of complex data values),
*
* WebRtcIsac_Fftradix (Re, Im, n, n, n, 1, maxf, maxp);
*
* The data can also be stored in a single array with alternating real and
* imaginary parts, the magnitude of ISIGN is changed to 2 to give correct
* indexing increment, and data [0] and data [1] used to pass the initial
* addresses for the sequences of real and imaginary values,
*
* example:
* REAL data [2*NTOTAL];
* WebRtcIsac_Fftradix ( &data[0], &data[1], NTOTAL, nPass, nSpan, 2, maxf, maxp);
*
* for temporary allocation:
*
* MAX_FACTORS >= the maximum prime factor of NPASS
* MAX_PERM >= the number of prime factors of NPASS. In addition,
* if the square-free portion K of NPASS has two or more prime
* factors, then MAX_PERM >= (K-1)
*
* storage in FACTOR for a maximum of 15 prime factors of NPASS. if NPASS
* has more than one square-free factor, the product of the square-free
* factors must be <= 210 array storage for maximum prime factor of 23 the
* following two constants should agree with the array dimensions.
*
*----------------------------------------------------------------------*/
#include <stdlib.h>
#include <math.h>
#include "modules/third_party/fft/fft.h"
/* double precision routine */
static int
WebRtcIsac_Fftradix (double Re[], double Im[],
size_t nTotal, size_t nPass, size_t nSpan, int isign,
int max_factors, unsigned int max_perm,
FFTstr *fftstate);
#ifndef M_PI
# define M_PI 3.14159265358979323846264338327950288
#endif
#ifndef SIN60
# define SIN60 0.86602540378443865 /* sin(60 deg) */
# define COS72 0.30901699437494742 /* cos(72 deg) */
# define SIN72 0.95105651629515357 /* sin(72 deg) */
#endif
# define REAL double
# define FFTN WebRtcIsac_Fftn
# define FFTNS "fftn"
# define FFTRADIX WebRtcIsac_Fftradix
# define FFTRADIXS "fftradix"
int WebRtcIsac_Fftns(unsigned int ndim, const int dims[],
double Re[],
double Im[],
int iSign,
double scaling,
FFTstr *fftstate)
{
size_t nSpan, nPass, nTotal;
unsigned int i;
int ret, max_factors, max_perm;
/*
* tally the number of elements in the data array
* and determine the number of dimensions
*/
nTotal = 1;
if (ndim && dims [0])
{
for (i = 0; i < ndim; i++)
{
if (dims [i] <= 0)
{
return -1;
}
nTotal *= dims [i];
}
}
else
{
ndim = 0;
for (i = 0; dims [i]; i++)
{
if (dims [i] <= 0)
{
return -1;
}
nTotal *= dims [i];
ndim++;
}
}
/* determine maximum number of factors and permuations */
#if 1
/*
* follow John Beale's example, just use the largest dimension and don't
* worry about excess allocation. May be someone else will do it?
*/
max_factors = max_perm = 1;
for (i = 0; i < ndim; i++)
{
nSpan = dims [i];
if ((int)nSpan > max_factors)
{
max_factors = (int)nSpan;
}
if ((int)nSpan > max_perm)
{
max_perm = (int)nSpan;
}
}
#else
/* use the constants used in the original Fortran code */
max_factors = 23;
max_perm = 209;
#endif
/* loop over the dimensions: */
nPass = 1;
for (i = 0; i < ndim; i++)
{
nSpan = dims [i];
nPass *= nSpan;
ret = FFTRADIX (Re, Im, nTotal, nSpan, nPass, iSign,
max_factors, max_perm, fftstate);
/* exit, clean-up already done */
if (ret)
return ret;
}
/* Divide through by the normalizing constant: */
if (scaling && scaling != 1.0)
{
if (iSign < 0) iSign = -iSign;
if (scaling < 0.0)
{
scaling = (double)nTotal;
if (scaling < -1.0)
scaling = sqrt (scaling);
}
scaling = 1.0 / scaling; /* multiply is often faster */
for (i = 0; i < nTotal; i += iSign)
{
Re [i] *= scaling;
Im [i] *= scaling;
}
}
return 0;
}
/*
* singleton's mixed radix routine
*
* could move allocation out to WebRtcIsac_Fftn(), but leave it here so that it's
* possible to make this a standalone function
*/
static int FFTRADIX (REAL Re[],
REAL Im[],
size_t nTotal,
size_t nPass,
size_t nSpan,
int iSign,
int max_factors,
unsigned int max_perm,
FFTstr *fftstate)
{
int ii, mfactor, kspan, ispan, inc;
int j, jc, jf, jj, k, k1, k2, k3, k4, kk, kt, nn, ns, nt;
REAL radf;
REAL c1, c2, c3, cd, aa, aj, ak, ajm, ajp, akm, akp;
REAL s1, s2, s3, sd, bb, bj, bk, bjm, bjp, bkm, bkp;
REAL *Rtmp = NULL; /* temp space for real part*/
REAL *Itmp = NULL; /* temp space for imaginary part */
REAL *Cos = NULL; /* Cosine values */
REAL *Sin = NULL; /* Sine values */
REAL s60 = SIN60; /* sin(60 deg) */
REAL c72 = COS72; /* cos(72 deg) */
REAL s72 = SIN72; /* sin(72 deg) */
REAL pi2 = M_PI; /* use PI first, 2 PI later */
fftstate->SpaceAlloced = 0;
fftstate->MaxPermAlloced = 0;
// initialize to avoid warnings
k3 = c2 = c3 = s2 = s3 = 0.0;
if (nPass < 2)
return 0;
/* allocate storage */
if (fftstate->SpaceAlloced < max_factors * sizeof (REAL))
{
#ifdef SUN_BROKEN_REALLOC
if (!fftstate->SpaceAlloced) /* first time */
{
fftstate->SpaceAlloced = max_factors * sizeof (REAL);
}
else
{
#endif
fftstate->SpaceAlloced = max_factors * sizeof (REAL);
#ifdef SUN_BROKEN_REALLOC
}
#endif
}
else
{
/* allow full use of alloc'd space */
max_factors = fftstate->SpaceAlloced / sizeof (REAL);
}
if (fftstate->MaxPermAlloced < max_perm)
{
#ifdef SUN_BROKEN_REALLOC
if (!fftstate->MaxPermAlloced) /* first time */
else
#endif
fftstate->MaxPermAlloced = max_perm;
}
else
{
/* allow full use of alloc'd space */
max_perm = fftstate->MaxPermAlloced;
}
/* assign pointers */
Rtmp = (REAL *) fftstate->Tmp0;
Itmp = (REAL *) fftstate->Tmp1;
Cos = (REAL *) fftstate->Tmp2;
Sin = (REAL *) fftstate->Tmp3;
/*
* Function Body
*/
inc = iSign;
if (iSign < 0) {
s72 = -s72;
s60 = -s60;
pi2 = -pi2;
inc = -inc; /* absolute value */
}
/* adjust for strange increments */
nt = inc * (int)nTotal;
ns = inc * (int)nSpan;
kspan = ns;
nn = nt - inc;
jc = ns / (int)nPass;
radf = pi2 * (double) jc;
pi2 *= 2.0; /* use 2 PI from here on */
ii = 0;
jf = 0;
/* determine the factors of n */
mfactor = 0;
k = (int)nPass;
while (k % 16 == 0) {
mfactor++;
fftstate->factor [mfactor - 1] = 4;
k /= 16;
}
j = 3;
jj = 9;
do {
while (k % jj == 0) {
mfactor++;
fftstate->factor [mfactor - 1] = j;
k /= jj;
}
j += 2;
jj = j * j;
} while (jj <= k);
if (k <= 4) {
kt = mfactor;
fftstate->factor [mfactor] = k;
if (k != 1)
mfactor++;
} else {
if (k - (k / 4 << 2) == 0) {
mfactor++;
fftstate->factor [mfactor - 1] = 2;
k /= 4;
}
kt = mfactor;
j = 2;
do {
if (k % j == 0) {
mfactor++;
fftstate->factor [mfactor - 1] = j;
k /= j;
}
j = ((j + 1) / 2 << 1) + 1;
} while (j <= k);
}
if (kt) {
j = kt;
do {
mfactor++;
fftstate->factor [mfactor - 1] = fftstate->factor [j - 1];
j--;
} while (j);
}
/* test that mfactors is in range */
if (mfactor > FFT_NFACTOR)
{
return -1;
}
/* compute fourier transform */
for (;;) {
sd = radf / (double) kspan;
cd = sin(sd);
cd = 2.0 * cd * cd;
sd = sin(sd + sd);
kk = 0;
ii++;
switch (fftstate->factor [ii - 1]) {
case 2:
/* transform for factor of 2 (including rotation factor) */
kspan /= 2;
k1 = kspan + 2;
do {
do {
k2 = kk + kspan;
ak = Re [k2];
bk = Im [k2];
Re [k2] = Re [kk] - ak;
Im [k2] = Im [kk] - bk;
Re [kk] += ak;
Im [kk] += bk;
kk = k2 + kspan;
} while (kk < nn);
kk -= nn;
} while (kk < jc);
if (kk >= kspan)
goto Permute_Results_Label; /* exit infinite loop */
do {
c1 = 1.0 - cd;
s1 = sd;
do {
do {
do {
k2 = kk + kspan;
ak = Re [kk] - Re [k2];
bk = Im [kk] - Im [k2];
Re [kk] += Re [k2];
Im [kk] += Im [k2];
Re [k2] = c1 * ak - s1 * bk;
Im [k2] = s1 * ak + c1 * bk;
kk = k2 + kspan;
} while (kk < (nt-1));
k2 = kk - nt;
c1 = -c1;
kk = k1 - k2;
} while (kk > k2);
ak = c1 - (cd * c1 + sd * s1);
s1 = sd * c1 - cd * s1 + s1;
c1 = 2.0 - (ak * ak + s1 * s1);
s1 *= c1;
c1 *= ak;
kk += jc;
} while (kk < k2);
k1 += inc + inc;
kk = (k1 - kspan + 1) / 2 + jc - 1;
} while (kk < (jc + jc));
break;
case 4: /* transform for factor of 4 */
ispan = kspan;
kspan /= 4;
do {
c1 = 1.0;
s1 = 0.0;
do {
do {
k1 = kk + kspan;
k2 = k1 + kspan;
k3 = k2 + kspan;
akp = Re [kk] + Re [k2];
akm = Re [kk] - Re [k2];
ajp = Re [k1] + Re [k3];
ajm = Re [k1] - Re [k3];
bkp = Im [kk] + Im [k2];
bkm = Im [kk] - Im [k2];
bjp = Im [k1] + Im [k3];
bjm = Im [k1] - Im [k3];
Re [kk] = akp + ajp;
Im [kk] = bkp + bjp;
ajp = akp - ajp;
bjp = bkp - bjp;
if (iSign < 0) {
akp = akm + bjm;
bkp = bkm - ajm;
akm -= bjm;
bkm += ajm;
} else {
akp = akm - bjm;
bkp = bkm + ajm;
akm += bjm;
bkm -= ajm;
}
/* avoid useless multiplies */
if (s1 == 0.0) {
Re [k1] = akp;
Re [k2] = ajp;
Re [k3] = akm;
Im [k1] = bkp;
Im [k2] = bjp;
Im [k3] = bkm;
} else {
Re [k1] = akp * c1 - bkp * s1;
Re [k2] = ajp * c2 - bjp * s2;
Re [k3] = akm * c3 - bkm * s3;
Im [k1] = akp * s1 + bkp * c1;
Im [k2] = ajp * s2 + bjp * c2;
Im [k3] = akm * s3 + bkm * c3;
}
kk = k3 + kspan;
} while (kk < nt);
c2 = c1 - (cd * c1 + sd * s1);
s1 = sd * c1 - cd * s1 + s1;
c1 = 2.0 - (c2 * c2 + s1 * s1);
s1 *= c1;
c1 *= c2;
/* values of c2, c3, s2, s3 that will get used next time */
c2 = c1 * c1 - s1 * s1;
s2 = 2.0 * c1 * s1;
c3 = c2 * c1 - s2 * s1;
s3 = c2 * s1 + s2 * c1;
kk = kk - nt + jc;
} while (kk < kspan);
kk = kk - kspan + inc;
} while (kk < jc);
if (kspan == jc)
goto Permute_Results_Label; /* exit infinite loop */
break;
default:
/* transform for odd factors */
#ifdef FFT_RADIX4
return -1;
break;
#else /* FFT_RADIX4 */
k = fftstate->factor [ii - 1];
ispan = kspan;
kspan /= k;
switch (k) {
case 3: /* transform for factor of 3 (optional code) */
do {
do {
k1 = kk + kspan;
k2 = k1 + kspan;
ak = Re [kk];
bk = Im [kk];
aj = Re [k1] + Re [k2];
bj = Im [k1] + Im [k2];
Re [kk] = ak + aj;
Im [kk] = bk + bj;
ak -= 0.5 * aj;
bk -= 0.5 * bj;
aj = (Re [k1] - Re [k2]) * s60;
bj = (Im [k1] - Im [k2]) * s60;
Re [k1] = ak - bj;
Re [k2] = ak + bj;
Im [k1] = bk + aj;
Im [k2] = bk - aj;
kk = k2 + kspan;
} while (kk < (nn - 1));
kk -= nn;
} while (kk < kspan);
break;
case 5: /* transform for factor of 5 (optional code) */
c2 = c72 * c72 - s72 * s72;
s2 = 2.0 * c72 * s72;
do {
do {
k1 = kk + kspan;
k2 = k1 + kspan;
k3 = k2 + kspan;
k4 = k3 + kspan;
akp = Re [k1] + Re [k4];
akm = Re [k1] - Re [k4];
bkp = Im [k1] + Im [k4];
bkm = Im [k1] - Im [k4];
ajp = Re [k2] + Re [k3];
ajm = Re [k2] - Re [k3];
bjp = Im [k2] + Im [k3];
bjm = Im [k2] - Im [k3];
aa = Re [kk];
bb = Im [kk];
Re [kk] = aa + akp + ajp;
Im [kk] = bb + bkp + bjp;
ak = akp * c72 + ajp * c2 + aa;
bk = bkp * c72 + bjp * c2 + bb;
aj = akm * s72 + ajm * s2;
bj = bkm * s72 + bjm * s2;
Re [k1] = ak - bj;
Re [k4] = ak + bj;
Im [k1] = bk + aj;
Im [k4] = bk - aj;
ak = akp * c2 + ajp * c72 + aa;
bk = bkp * c2 + bjp * c72 + bb;
aj = akm * s2 - ajm * s72;
bj = bkm * s2 - bjm * s72;
Re [k2] = ak - bj;
Re [k3] = ak + bj;
Im [k2] = bk + aj;
Im [k3] = bk - aj;
kk = k4 + kspan;
} while (kk < (nn-1));
kk -= nn;
} while (kk < kspan);
break;
default:
if (k != jf) {
jf = k;
s1 = pi2 / (double) k;
c1 = cos(s1);
s1 = sin(s1);
if (jf > max_factors){
return -1;
}
Cos [jf - 1] = 1.0;
Sin [jf - 1] = 0.0;
j = 1;
do {
Cos [j - 1] = Cos [k - 1] * c1 + Sin [k - 1] * s1;
Sin [j - 1] = Cos [k - 1] * s1 - Sin [k - 1] * c1;
k--;
Cos [k - 1] = Cos [j - 1];
Sin [k - 1] = -Sin [j - 1];
j++;
} while (j < k);
}
do {
do {
k1 = kk;
k2 = kk + ispan;
ak = aa = Re [kk];
bk = bb = Im [kk];
j = 1;
k1 += kspan;
do {
k2 -= kspan;
j++;
Rtmp [j - 1] = Re [k1] + Re [k2];
ak += Rtmp [j - 1];
Itmp [j - 1] = Im [k1] + Im [k2];
bk += Itmp [j - 1];
j++;
Rtmp [j - 1] = Re [k1] - Re [k2];
Itmp [j - 1] = Im [k1] - Im [k2];
k1 += kspan;
} while (k1 < k2);
Re [kk] = ak;
Im [kk] = bk;
k1 = kk;
k2 = kk + ispan;
j = 1;
do {
k1 += kspan;
k2 -= kspan;
jj = j;
ak = aa;
bk = bb;
aj = 0.0;
bj = 0.0;
k = 1;
do {
k++;
ak += Rtmp [k - 1] * Cos [jj - 1];
bk += Itmp [k - 1] * Cos [jj - 1];
k++;
aj += Rtmp [k - 1] * Sin [jj - 1];
bj += Itmp [k - 1] * Sin [jj - 1];
jj += j;
if (jj > jf) {
jj -= jf;
}
} while (k < jf);
k = jf - j;
Re [k1] = ak - bj;
Im [k1] = bk + aj;
Re [k2] = ak + bj;
Im [k2] = bk - aj;
j++;
} while (j < k);
kk += ispan;
} while (kk < nn);
kk -= nn;
} while (kk < kspan);
break;
}
/* multiply by rotation factor (except for factors of 2 and 4) */
if (ii == mfactor)
goto Permute_Results_Label; /* exit infinite loop */
kk = jc;
do {
c2 = 1.0 - cd;
s1 = sd;
do {
c1 = c2;
s2 = s1;
kk += kspan;
do {
do {
ak = Re [kk];
Re [kk] = c2 * ak - s2 * Im [kk];
Im [kk] = s2 * ak + c2 * Im [kk];
kk += ispan;
} while (kk < nt);
ak = s1 * s2;
s2 = s1 * c2 + c1 * s2;
c2 = c1 * c2 - ak;
kk = kk - nt + kspan;
} while (kk < ispan);
c2 = c1 - (cd * c1 + sd * s1);
s1 += sd * c1 - cd * s1;
c1 = 2.0 - (c2 * c2 + s1 * s1);
s1 *= c1;
c2 *= c1;
kk = kk - ispan + jc;
} while (kk < kspan);
kk = kk - kspan + jc + inc;
} while (kk < (jc + jc));
break;
#endif /* FFT_RADIX4 */
}
}
/* permute the results to normal order---done in two stages */
/* permutation for square factors of n */
Permute_Results_Label:
fftstate->Perm [0] = ns;
if (kt) {
k = kt + kt + 1;
if (mfactor < k)
k--;
j = 1;
fftstate->Perm [k] = jc;
do {
fftstate->Perm [j] = fftstate->Perm [j - 1] / fftstate->factor [j - 1];
fftstate->Perm [k - 1] = fftstate->Perm [k] * fftstate->factor [j - 1];
j++;
k--;
} while (j < k);
k3 = fftstate->Perm [k];
kspan = fftstate->Perm [1];
kk = jc;
k2 = kspan;
j = 1;
if (nPass != nTotal) {
/* permutation for multivariate transform */
Permute_Multi_Label:
do {
do {
k = kk + jc;
do {
/* swap Re [kk] <> Re [k2], Im [kk] <> Im [k2] */
ak = Re [kk]; Re [kk] = Re [k2]; Re [k2] = ak;
bk = Im [kk]; Im [kk] = Im [k2]; Im [k2] = bk;
kk += inc;
k2 += inc;
} while (kk < (k-1));
kk += ns - jc;
k2 += ns - jc;
} while (kk < (nt-1));
k2 = k2 - nt + kspan;
kk = kk - nt + jc;
} while (k2 < (ns-1));
do {
do {
k2 -= fftstate->Perm [j - 1];
j++;
k2 = fftstate->Perm [j] + k2;
} while (k2 > fftstate->Perm [j - 1]);
j = 1;
do {
if (kk < (k2-1))
goto Permute_Multi_Label;
kk += jc;
k2 += kspan;
} while (k2 < (ns-1));
} while (kk < (ns-1));
} else {
/* permutation for single-variate transform (optional code) */
Permute_Single_Label:
do {
/* swap Re [kk] <> Re [k2], Im [kk] <> Im [k2] */
ak = Re [kk]; Re [kk] = Re [k2]; Re [k2] = ak;
bk = Im [kk]; Im [kk] = Im [k2]; Im [k2] = bk;
kk += inc;
k2 += kspan;
} while (k2 < (ns-1));
do {
do {
k2 -= fftstate->Perm [j - 1];
j++;
k2 = fftstate->Perm [j] + k2;
} while (k2 >= fftstate->Perm [j - 1]);
j = 1;
do {
if (kk < k2)
goto Permute_Single_Label;
kk += inc;
k2 += kspan;
} while (k2 < (ns-1));
} while (kk < (ns-1));
}
jc = k3;
}
if ((kt << 1) + 1 >= mfactor)
return 0;
ispan = fftstate->Perm [kt];
/* permutation for square-free factors of n */
j = mfactor - kt;
fftstate->factor [j] = 1;
do {
fftstate->factor [j - 1] *= fftstate->factor [j];
j--;
} while (j != kt);
kt++;
nn = fftstate->factor [kt - 1] - 1;
if (nn > (int) max_perm) {
return -1;
}
j = jj = 0;
for (;;) {
k = kt + 1;
k2 = fftstate->factor [kt - 1];
kk = fftstate->factor [k - 1];
j++;
if (j > nn)
break; /* exit infinite loop */
jj += kk;
while (jj >= k2) {
jj -= k2;
k2 = kk;
k++;
kk = fftstate->factor [k - 1];
jj += kk;
}
fftstate->Perm [j - 1] = jj;
}
/* determine the permutation cycles of length greater than 1 */
j = 0;
for (;;) {
do {
j++;
kk = fftstate->Perm [j - 1];
} while (kk < 0);
if (kk != j) {
do {
k = kk;
kk = fftstate->Perm [k - 1];
fftstate->Perm [k - 1] = -kk;
} while (kk != j);
k3 = kk;
} else {
fftstate->Perm [j - 1] = -j;
if (j == nn)
break; /* exit infinite loop */
}
}
max_factors *= inc;
/* reorder a and b, following the permutation cycles */
for (;;) {
j = k3 + 1;
nt -= ispan;
ii = nt - inc + 1;
if (nt < 0)
break; /* exit infinite loop */
do {
do {
j--;
} while (fftstate->Perm [j - 1] < 0);
jj = jc;
do {
kspan = jj;
if (jj > max_factors) {
kspan = max_factors;
}
jj -= kspan;
k = fftstate->Perm [j - 1];
kk = jc * k + ii + jj;
k1 = kk + kspan - 1;
k2 = 0;
do {
k2++;
Rtmp [k2 - 1] = Re [k1];
Itmp [k2 - 1] = Im [k1];
k1 -= inc;
} while (k1 != (kk-1));
do {
k1 = kk + kspan - 1;
k2 = k1 - jc * (k + fftstate->Perm [k - 1]);
k = -fftstate->Perm [k - 1];
do {
Re [k1] = Re [k2];
Im [k1] = Im [k2];
k1 -= inc;
k2 -= inc;
} while (k1 != (kk-1));
kk = k2 + 1;
} while (k != j);
k1 = kk + kspan - 1;
k2 = 0;
do {
k2++;
Re [k1] = Rtmp [k2 - 1];
Im [k1] = Itmp [k2 - 1];
k1 -= inc;
} while (k1 != (kk-1));
} while (jj);
} while (j != 1);
}
return 0; /* exit point here */
}
/* ---------------------- end-of-file (c source) ---------------------- */

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the ../../../LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*--------------------------------*-C-*---------------------------------*
* File:
* fftn.h
* ---------------------------------------------------------------------*
* Re[]: real value array
* Im[]: imaginary value array
* nTotal: total number of complex values
* nPass: number of elements involved in this pass of transform
* nSpan: nspan/nPass = number of bytes to increment pointer
* in Re[] and Im[]
* isign: exponent: +1 = forward -1 = reverse
* scaling: normalizing constant by which the final result is *divided*
* scaling == -1, normalize by total dimension of the transform
* scaling < -1, normalize by the square-root of the total dimension
*
* ----------------------------------------------------------------------
* See the comments in the code for correct usage!
*/
#ifndef MODULES_THIRD_PARTY_FFT_FFT_H_
#define MODULES_THIRD_PARTY_FFT_FFT_H_
#define FFT_MAXFFTSIZE 2048
#define FFT_NFACTOR 11
typedef struct {
unsigned int SpaceAlloced;
unsigned int MaxPermAlloced;
double Tmp0[FFT_MAXFFTSIZE];
double Tmp1[FFT_MAXFFTSIZE];
double Tmp2[FFT_MAXFFTSIZE];
double Tmp3[FFT_MAXFFTSIZE];
int Perm[FFT_MAXFFTSIZE];
int factor[FFT_NFACTOR];
} FFTstr;
/* double precision routine */
int WebRtcIsac_Fftns(unsigned int ndim,
const int dims[],
double Re[],
double Im[],
int isign,
double scaling,
FFTstr* fftstate);
#endif /* MODULES_THIRD_PARTY_FFT_FFT_H_ */