add vad code.
This commit is contained in:
@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct {
|
||||
int in_use;
|
||||
int32_t send_bw_avg;
|
||||
int32_t send_max_delay_avg;
|
||||
int16_t bottleneck_idx;
|
||||
int16_t jitter_info;
|
||||
} IsacBandwidthInfo;
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
|
@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
#ifdef WEBRTC_ANDROID
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
|
||||
|
||||
static void WebRtcIsac_AllPoleFilter(double* InOut,
|
||||
double* Coef,
|
||||
size_t lengthInOut,
|
||||
int orderCoef) {
|
||||
/* the state of filter is assumed to be in InOut[-1] to InOut[-orderCoef] */
|
||||
double scal;
|
||||
double sum;
|
||||
size_t n;
|
||||
int k;
|
||||
|
||||
//if (fabs(Coef[0]-1.0)<0.001) {
|
||||
if ( (Coef[0] > 0.9999) && (Coef[0] < 1.0001) )
|
||||
{
|
||||
for(n = 0; n < lengthInOut; n++)
|
||||
{
|
||||
sum = Coef[1] * InOut[-1];
|
||||
for(k = 2; k <= orderCoef; k++){
|
||||
sum += Coef[k] * InOut[-k];
|
||||
}
|
||||
*InOut++ -= sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
scal = 1.0 / Coef[0];
|
||||
for(n=0;n<lengthInOut;n++)
|
||||
{
|
||||
*InOut *= scal;
|
||||
for(k=1;k<=orderCoef;k++){
|
||||
*InOut -= scal*Coef[k]*InOut[-k];
|
||||
}
|
||||
InOut++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void WebRtcIsac_AllZeroFilter(double* In,
|
||||
double* Coef,
|
||||
size_t lengthInOut,
|
||||
int orderCoef,
|
||||
double* Out) {
|
||||
/* the state of filter is assumed to be in In[-1] to In[-orderCoef] */
|
||||
|
||||
size_t n;
|
||||
int k;
|
||||
double tmp;
|
||||
|
||||
for(n = 0; n < lengthInOut; n++)
|
||||
{
|
||||
tmp = In[0] * Coef[0];
|
||||
|
||||
for(k = 1; k <= orderCoef; k++){
|
||||
tmp += Coef[k] * In[-k];
|
||||
}
|
||||
|
||||
*Out++ = tmp;
|
||||
In++;
|
||||
}
|
||||
}
|
||||
|
||||
static void WebRtcIsac_ZeroPoleFilter(double* In,
|
||||
double* ZeroCoef,
|
||||
double* PoleCoef,
|
||||
size_t lengthInOut,
|
||||
int orderCoef,
|
||||
double* Out) {
|
||||
/* the state of the zero section is assumed to be in In[-1] to In[-orderCoef] */
|
||||
/* the state of the pole section is assumed to be in Out[-1] to Out[-orderCoef] */
|
||||
|
||||
WebRtcIsac_AllZeroFilter(In,ZeroCoef,lengthInOut,orderCoef,Out);
|
||||
WebRtcIsac_AllPoleFilter(Out,PoleCoef,lengthInOut,orderCoef);
|
||||
}
|
||||
|
||||
|
||||
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order) {
|
||||
size_t lag, n;
|
||||
double sum, prod;
|
||||
const double *x_lag;
|
||||
|
||||
for (lag = 0; lag <= order; lag++)
|
||||
{
|
||||
sum = 0.0f;
|
||||
x_lag = &x[lag];
|
||||
prod = x[0] * x_lag[0];
|
||||
for (n = 1; n < N - lag; n++) {
|
||||
sum += prod;
|
||||
prod = x[n] * x_lag[n];
|
||||
}
|
||||
sum += prod;
|
||||
r[lag] = sum;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void WebRtcIsac_BwExpand(double* out,
|
||||
double* in,
|
||||
double coef,
|
||||
size_t length) {
|
||||
size_t i;
|
||||
double chirp;
|
||||
|
||||
chirp = coef;
|
||||
|
||||
out[0] = in[0];
|
||||
for (i = 1; i < length; i++) {
|
||||
out[i] = chirp * in[i];
|
||||
chirp *= coef;
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcIsac_WeightingFilter(const double* in,
|
||||
double* weiout,
|
||||
double* whiout,
|
||||
WeightFiltstr* wfdata) {
|
||||
double tmpbuffer[PITCH_FRAME_LEN + PITCH_WLPCBUFLEN];
|
||||
double corr[PITCH_WLPCORDER+1], rc[PITCH_WLPCORDER+1];
|
||||
double apol[PITCH_WLPCORDER+1], apolr[PITCH_WLPCORDER+1];
|
||||
double rho=0.9, *inp, *dp, *dp2;
|
||||
double whoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER];
|
||||
double weoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER];
|
||||
double *weo, *who, opol[PITCH_WLPCORDER+1], ext[PITCH_WLPCWINLEN];
|
||||
int k, n, endpos, start;
|
||||
|
||||
/* Set up buffer and states */
|
||||
memcpy(tmpbuffer, wfdata->buffer, sizeof(double) * PITCH_WLPCBUFLEN);
|
||||
memcpy(tmpbuffer+PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN);
|
||||
memcpy(wfdata->buffer, tmpbuffer+PITCH_FRAME_LEN, sizeof(double) * PITCH_WLPCBUFLEN);
|
||||
|
||||
dp=weoutbuf;
|
||||
dp2=whoutbuf;
|
||||
for (k=0;k<PITCH_WLPCORDER;k++) {
|
||||
*dp++ = wfdata->weostate[k];
|
||||
*dp2++ = wfdata->whostate[k];
|
||||
opol[k]=0.0;
|
||||
}
|
||||
opol[0]=1.0;
|
||||
opol[PITCH_WLPCORDER]=0.0;
|
||||
weo=dp;
|
||||
who=dp2;
|
||||
|
||||
endpos=PITCH_WLPCBUFLEN + PITCH_SUBFRAME_LEN;
|
||||
inp=tmpbuffer + PITCH_WLPCBUFLEN;
|
||||
|
||||
for (n=0; n<PITCH_SUBFRAMES; n++) {
|
||||
/* Windowing */
|
||||
start=endpos-PITCH_WLPCWINLEN;
|
||||
for (k=0; k<PITCH_WLPCWINLEN; k++) {
|
||||
ext[k]=wfdata->window[k]*tmpbuffer[start+k];
|
||||
}
|
||||
|
||||
/* Get LPC polynomial */
|
||||
WebRtcIsac_AutoCorr(corr, ext, PITCH_WLPCWINLEN, PITCH_WLPCORDER);
|
||||
corr[0]=1.01*corr[0]+1.0; /* White noise correction */
|
||||
WebRtcIsac_LevDurb(apol, rc, corr, PITCH_WLPCORDER);
|
||||
WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER+1);
|
||||
|
||||
/* Filtering */
|
||||
WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, weo);
|
||||
WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, who);
|
||||
|
||||
inp+=PITCH_SUBFRAME_LEN;
|
||||
endpos+=PITCH_SUBFRAME_LEN;
|
||||
weo+=PITCH_SUBFRAME_LEN;
|
||||
who+=PITCH_SUBFRAME_LEN;
|
||||
}
|
||||
|
||||
/* Export filter states */
|
||||
for (k=0;k<PITCH_WLPCORDER;k++) {
|
||||
wfdata->weostate[k]=weoutbuf[PITCH_FRAME_LEN+k];
|
||||
wfdata->whostate[k]=whoutbuf[PITCH_FRAME_LEN+k];
|
||||
}
|
||||
|
||||
/* Export output data */
|
||||
memcpy(weiout, weoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN);
|
||||
memcpy(whiout, whoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN);
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
|
||||
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order);
|
||||
|
||||
void WebRtcIsac_WeightingFilter(const double* in,
|
||||
double* weiout,
|
||||
double* whiout,
|
||||
WeightFiltstr* wfdata);
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
|
@ -0,0 +1,409 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata) {
|
||||
int k;
|
||||
|
||||
for (k = 0; k < PITCH_BUFFSIZE; k++) {
|
||||
pitchfiltdata->ubuf[k] = 0.0;
|
||||
}
|
||||
pitchfiltdata->ystate[0] = 0.0;
|
||||
for (k = 1; k < (PITCH_DAMPORDER); k++) {
|
||||
pitchfiltdata->ystate[k] = 0.0;
|
||||
}
|
||||
pitchfiltdata->oldlagp[0] = 50.0;
|
||||
pitchfiltdata->oldgainp[0] = 0.0;
|
||||
}
|
||||
|
||||
static void WebRtcIsac_InitWeightingFilter(WeightFiltstr* wfdata) {
|
||||
int k;
|
||||
double t, dtmp, dtmp2, denum, denum2;
|
||||
|
||||
for (k = 0; k < PITCH_WLPCBUFLEN; k++)
|
||||
wfdata->buffer[k] = 0.0;
|
||||
|
||||
for (k = 0; k < PITCH_WLPCORDER; k++) {
|
||||
wfdata->istate[k] = 0.0;
|
||||
wfdata->weostate[k] = 0.0;
|
||||
wfdata->whostate[k] = 0.0;
|
||||
}
|
||||
|
||||
/* next part should be in Matlab, writing to a global table */
|
||||
t = 0.5;
|
||||
denum = 1.0 / ((double)PITCH_WLPCWINLEN);
|
||||
denum2 = denum * denum;
|
||||
for (k = 0; k < PITCH_WLPCWINLEN; k++) {
|
||||
dtmp = PITCH_WLPCASYM * t * denum + (1 - PITCH_WLPCASYM) * t * t * denum2;
|
||||
dtmp *= 3.14159265;
|
||||
dtmp2 = sin(dtmp);
|
||||
wfdata->window[k] = dtmp2 * dtmp2;
|
||||
t++;
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* State) {
|
||||
int k;
|
||||
|
||||
for (k = 0; k < PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
|
||||
PITCH_FRAME_LEN / 2 + 2;
|
||||
k++)
|
||||
State->dec_buffer[k] = 0.0;
|
||||
for (k = 0; k < 2 * ALLPASSSECTIONS + 1; k++)
|
||||
State->decimator_state[k] = 0.0;
|
||||
for (k = 0; k < 2; k++)
|
||||
State->hp_state[k] = 0.0;
|
||||
for (k = 0; k < QLOOKAHEAD; k++)
|
||||
State->whitened_buf[k] = 0.0;
|
||||
for (k = 0; k < QLOOKAHEAD; k++)
|
||||
State->inbuf[k] = 0.0;
|
||||
|
||||
WebRtcIsac_InitPitchFilter(&(State->PFstr_wght));
|
||||
|
||||
WebRtcIsac_InitPitchFilter(&(State->PFstr));
|
||||
|
||||
WebRtcIsac_InitWeightingFilter(&(State->Wghtstr));
|
||||
}
|
||||
|
||||
void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata) {
|
||||
int k;
|
||||
|
||||
for (k = 0; k < QLOOKAHEAD; k++) {
|
||||
prefiltdata->INLABUF1[k] = 0;
|
||||
prefiltdata->INLABUF2[k] = 0;
|
||||
|
||||
prefiltdata->INLABUF1_float[k] = 0;
|
||||
prefiltdata->INLABUF2_float[k] = 0;
|
||||
}
|
||||
for (k = 0; k < 2 * (QORDER - 1); k++) {
|
||||
prefiltdata->INSTAT1[k] = 0;
|
||||
prefiltdata->INSTAT2[k] = 0;
|
||||
prefiltdata->INSTATLA1[k] = 0;
|
||||
prefiltdata->INSTATLA2[k] = 0;
|
||||
|
||||
prefiltdata->INSTAT1_float[k] = 0;
|
||||
prefiltdata->INSTAT2_float[k] = 0;
|
||||
prefiltdata->INSTATLA1_float[k] = 0;
|
||||
prefiltdata->INSTATLA2_float[k] = 0;
|
||||
}
|
||||
|
||||
/* High pass filter states */
|
||||
prefiltdata->HPstates[0] = 0.0;
|
||||
prefiltdata->HPstates[1] = 0.0;
|
||||
|
||||
prefiltdata->HPstates_float[0] = 0.0f;
|
||||
prefiltdata->HPstates_float[1] = 0.0f;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order) {
|
||||
const double LEVINSON_EPS = 1.0e-10;
|
||||
|
||||
double sum, alpha;
|
||||
size_t m, m_h, i;
|
||||
alpha = 0; // warning -DH
|
||||
a[0] = 1.0;
|
||||
if (r[0] < LEVINSON_EPS) { /* if r[0] <= 0, set LPC coeff. to zero */
|
||||
for (i = 0; i < order; i++) {
|
||||
k[i] = 0;
|
||||
a[i + 1] = 0;
|
||||
}
|
||||
} else {
|
||||
a[1] = k[0] = -r[1] / r[0];
|
||||
alpha = r[0] + r[1] * k[0];
|
||||
for (m = 1; m < order; m++) {
|
||||
sum = r[m + 1];
|
||||
for (i = 0; i < m; i++) {
|
||||
sum += a[i + 1] * r[m - i];
|
||||
}
|
||||
k[m] = -sum / alpha;
|
||||
alpha += k[m] * sum;
|
||||
m_h = (m + 1) >> 1;
|
||||
for (i = 0; i < m_h; i++) {
|
||||
sum = a[i + 1] + k[m] * a[m - i];
|
||||
a[m - i] += k[m] * a[i + 1];
|
||||
a[i + 1] = sum;
|
||||
}
|
||||
a[m + 1] = k[m];
|
||||
}
|
||||
}
|
||||
return alpha;
|
||||
}
|
||||
|
||||
/* The upper channel all-pass filter factors */
|
||||
const float WebRtcIsac_kUpperApFactorsFloat[2] = {0.03470000000000f,
|
||||
0.38260000000000f};
|
||||
|
||||
/* The lower channel all-pass filter factors */
|
||||
const float WebRtcIsac_kLowerApFactorsFloat[2] = {0.15440000000000f,
|
||||
0.74400000000000f};
|
||||
|
||||
/* This function performs all-pass filtering--a series of first order all-pass
|
||||
* sections are used to filter the input in a cascade manner.
|
||||
* The input is overwritten!!
|
||||
*/
|
||||
void WebRtcIsac_AllPassFilter2Float(float* InOut,
|
||||
const float* APSectionFactors,
|
||||
int lengthInOut,
|
||||
int NumberOfSections,
|
||||
float* FilterState) {
|
||||
int n, j;
|
||||
float temp;
|
||||
for (j = 0; j < NumberOfSections; j++) {
|
||||
for (n = 0; n < lengthInOut; n++) {
|
||||
temp = FilterState[j] + APSectionFactors[j] * InOut[n];
|
||||
FilterState[j] = -APSectionFactors[j] * temp + InOut[n];
|
||||
InOut[n] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* The number of composite all-pass filter factors */
|
||||
#define NUMBEROFCOMPOSITEAPSECTIONS 4
|
||||
|
||||
/* Function WebRtcIsac_SplitAndFilter
|
||||
* This function creates low-pass and high-pass decimated versions of part of
|
||||
the input signal, and part of the signal in the input 'lookahead buffer'.
|
||||
|
||||
INPUTS:
|
||||
in: a length FRAMESAMPLES array of input samples
|
||||
prefiltdata: input data structure containing the filterbank states
|
||||
and lookahead samples from the previous encoding
|
||||
iteration.
|
||||
OUTPUTS:
|
||||
LP: a FRAMESAMPLES_HALF array of low-pass filtered samples that
|
||||
have been phase equalized. The first QLOOKAHEAD samples are
|
||||
based on the samples in the two prefiltdata->INLABUFx arrays
|
||||
each of length QLOOKAHEAD.
|
||||
The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based
|
||||
on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input
|
||||
array in[].
|
||||
HP: a FRAMESAMPLES_HALF array of high-pass filtered samples that
|
||||
have been phase equalized. The first QLOOKAHEAD samples are
|
||||
based on the samples in the two prefiltdata->INLABUFx arrays
|
||||
each of length QLOOKAHEAD.
|
||||
The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based
|
||||
on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input
|
||||
array in[].
|
||||
|
||||
LP_la: a FRAMESAMPLES_HALF array of low-pass filtered samples.
|
||||
These samples are not phase equalized. They are computed
|
||||
from the samples in the in[] array.
|
||||
HP_la: a FRAMESAMPLES_HALF array of high-pass filtered samples
|
||||
that are not phase equalized. They are computed from
|
||||
the in[] vector.
|
||||
prefiltdata: this input data structure's filterbank state and
|
||||
lookahead sample buffers are updated for the next
|
||||
encoding iteration.
|
||||
*/
|
||||
void WebRtcIsac_SplitAndFilterFloat(float* pin,
|
||||
float* LP,
|
||||
float* HP,
|
||||
double* LP_la,
|
||||
double* HP_la,
|
||||
PreFiltBankstr* prefiltdata) {
|
||||
int k, n;
|
||||
float CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS];
|
||||
float ForTransform_CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS];
|
||||
float ForTransform_CompositeAPFilterState2[NUMBEROFCOMPOSITEAPSECTIONS];
|
||||
float tempinoutvec[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
|
||||
float tempin_ch1[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
|
||||
float tempin_ch2[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
|
||||
float in[FRAMESAMPLES];
|
||||
float ftmp;
|
||||
|
||||
/* HPstcoeff_in = {a1, a2, b1 - b0 * a1, b2 - b0 * a2}; */
|
||||
static const float kHpStCoefInFloat[4] = {
|
||||
-1.94895953203325f, 0.94984516000000f, -0.05101826139794f,
|
||||
0.05015484000000f};
|
||||
|
||||
/* The composite all-pass filter factors */
|
||||
static const float WebRtcIsac_kCompositeApFactorsFloat[4] = {
|
||||
0.03470000000000f, 0.15440000000000f, 0.38260000000000f,
|
||||
0.74400000000000f};
|
||||
|
||||
// The matrix for transforming the backward composite state to upper channel
|
||||
// state.
|
||||
static const float WebRtcIsac_kTransform1Float[8] = {
|
||||
-0.00158678506084f, 0.00127157815343f, -0.00104805672709f,
|
||||
0.00084837248079f, 0.00134467983258f, -0.00107756549387f,
|
||||
0.00088814793277f, -0.00071893072525f};
|
||||
|
||||
// The matrix for transforming the backward composite state to lower channel
|
||||
// state.
|
||||
static const float WebRtcIsac_kTransform2Float[8] = {
|
||||
-0.00170686041697f, 0.00136780109829f, -0.00112736532350f,
|
||||
0.00091257055385f, 0.00103094281812f, -0.00082615076557f,
|
||||
0.00068092756088f, -0.00055119165484f};
|
||||
|
||||
/* High pass filter */
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES; k++) {
|
||||
in[k] = pin[k] + kHpStCoefInFloat[2] * prefiltdata->HPstates_float[0] +
|
||||
kHpStCoefInFloat[3] * prefiltdata->HPstates_float[1];
|
||||
ftmp = pin[k] - kHpStCoefInFloat[0] * prefiltdata->HPstates_float[0] -
|
||||
kHpStCoefInFloat[1] * prefiltdata->HPstates_float[1];
|
||||
prefiltdata->HPstates_float[1] = prefiltdata->HPstates_float[0];
|
||||
prefiltdata->HPstates_float[0] = ftmp;
|
||||
}
|
||||
|
||||
/* First Channel */
|
||||
|
||||
/*initial state of composite filter is zero */
|
||||
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
|
||||
CompositeAPFilterState[k] = 0.0;
|
||||
}
|
||||
/* put every other sample of input into a temporary vector in reverse
|
||||
* (backward) order*/
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempinoutvec[k] = in[FRAMESAMPLES - 1 - 2 * k];
|
||||
}
|
||||
|
||||
/* now all-pass filter the backwards vector. Output values overwrite the
|
||||
* input vector. */
|
||||
WebRtcIsac_AllPassFilter2Float(
|
||||
tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF,
|
||||
NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
|
||||
|
||||
/* save the backwards filtered output for later forward filtering,
|
||||
but write it in forward order*/
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempin_ch1[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k];
|
||||
}
|
||||
|
||||
/* save the backwards filter state becaue it will be transformed
|
||||
later into a forward state */
|
||||
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
|
||||
ForTransform_CompositeAPFilterState[k] = CompositeAPFilterState[k];
|
||||
}
|
||||
|
||||
/* now backwards filter the samples in the lookahead buffer. The samples were
|
||||
placed there in the encoding of the previous frame. The output samples
|
||||
overwrite the input samples */
|
||||
WebRtcIsac_AllPassFilter2Float(
|
||||
prefiltdata->INLABUF1_float, WebRtcIsac_kCompositeApFactorsFloat,
|
||||
QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
|
||||
|
||||
/* save the output, but write it in forward order */
|
||||
/* write the lookahead samples for the next encoding iteration. Every other
|
||||
sample at the end of the input frame is written in reverse order for the
|
||||
lookahead length. Exported in the prefiltdata structure. */
|
||||
for (k = 0; k < QLOOKAHEAD; k++) {
|
||||
tempin_ch1[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF1_float[k];
|
||||
prefiltdata->INLABUF1_float[k] = in[FRAMESAMPLES - 1 - 2 * k];
|
||||
}
|
||||
|
||||
/* Second Channel. This is exactly like the first channel, except that the
|
||||
even samples are now filtered instead (lower channel). */
|
||||
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
|
||||
CompositeAPFilterState[k] = 0.0;
|
||||
}
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempinoutvec[k] = in[FRAMESAMPLES - 2 - 2 * k];
|
||||
}
|
||||
|
||||
WebRtcIsac_AllPassFilter2Float(
|
||||
tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF,
|
||||
NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempin_ch2[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k];
|
||||
}
|
||||
|
||||
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
|
||||
ForTransform_CompositeAPFilterState2[k] = CompositeAPFilterState[k];
|
||||
}
|
||||
|
||||
WebRtcIsac_AllPassFilter2Float(
|
||||
prefiltdata->INLABUF2_float, WebRtcIsac_kCompositeApFactorsFloat,
|
||||
QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
|
||||
|
||||
for (k = 0; k < QLOOKAHEAD; k++) {
|
||||
tempin_ch2[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF2_float[k];
|
||||
prefiltdata->INLABUF2_float[k] = in[FRAMESAMPLES - 2 - 2 * k];
|
||||
}
|
||||
|
||||
/* Transform filter states from backward to forward */
|
||||
/*At this point, each of the states of the backwards composite filters for the
|
||||
two channels are transformed into forward filtering states for the
|
||||
corresponding forward channel filters. Each channel's forward filtering
|
||||
state from the previous
|
||||
encoding iteration is added to the transformed state to get a proper forward
|
||||
state */
|
||||
|
||||
/* So the existing NUMBEROFCOMPOSITEAPSECTIONS x 1 (4x1) state vector is
|
||||
multiplied by a NUMBEROFCHANNELAPSECTIONSxNUMBEROFCOMPOSITEAPSECTIONS (2x4)
|
||||
transform matrix to get the new state that is added to the previous 2x1
|
||||
input state */
|
||||
|
||||
for (k = 0; k < NUMBEROFCHANNELAPSECTIONS; k++) { /* k is row variable */
|
||||
for (n = 0; n < NUMBEROFCOMPOSITEAPSECTIONS;
|
||||
n++) { /* n is column variable */
|
||||
prefiltdata->INSTAT1_float[k] +=
|
||||
ForTransform_CompositeAPFilterState[n] *
|
||||
WebRtcIsac_kTransform1Float[k * NUMBEROFCHANNELAPSECTIONS + n];
|
||||
prefiltdata->INSTAT2_float[k] +=
|
||||
ForTransform_CompositeAPFilterState2[n] *
|
||||
WebRtcIsac_kTransform2Float[k * NUMBEROFCHANNELAPSECTIONS + n];
|
||||
}
|
||||
}
|
||||
|
||||
/*obtain polyphase components by forward all-pass filtering through each
|
||||
* channel */
|
||||
/* the backward filtered samples are now forward filtered with the
|
||||
* corresponding channel filters */
|
||||
/* The all pass filtering automatically updates the filter states which are
|
||||
exported in the prefiltdata structure */
|
||||
WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat,
|
||||
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
|
||||
prefiltdata->INSTAT1_float);
|
||||
WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat,
|
||||
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
|
||||
prefiltdata->INSTAT2_float);
|
||||
|
||||
/* Now Construct low-pass and high-pass signals as combinations of polyphase
|
||||
* components */
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
LP[k] = 0.5f * (tempin_ch1[k] + tempin_ch2[k]); /* low pass signal*/
|
||||
HP[k] = 0.5f * (tempin_ch1[k] - tempin_ch2[k]); /* high pass signal*/
|
||||
}
|
||||
|
||||
/* Lookahead LP and HP signals */
|
||||
/* now create low pass and high pass signals of the input vector. However, no
|
||||
backwards filtering is performed, and hence no phase equalization is
|
||||
involved. Also, the input contains some samples that are lookahead samples.
|
||||
The high pass and low pass signals that are created are used outside this
|
||||
function for analysis (not encoding) purposes */
|
||||
|
||||
/* set up input */
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempin_ch1[k] = in[2 * k + 1];
|
||||
tempin_ch2[k] = in[2 * k];
|
||||
}
|
||||
|
||||
/* the input filter states are passed in and updated by the all-pass filtering
|
||||
routine and exported in the prefiltdata structure*/
|
||||
WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat,
|
||||
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
|
||||
prefiltdata->INSTATLA1_float);
|
||||
WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat,
|
||||
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
|
||||
prefiltdata->INSTATLA2_float);
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
LP_la[k] = (float)(0.5f * (tempin_ch1[k] + tempin_ch2[k])); /*low pass */
|
||||
HP_la[k] = (double)(0.5f * (tempin_ch1[k] - tempin_ch2[k])); /* high pass */
|
||||
}
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
|
||||
void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata);
|
||||
void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* state);
|
||||
void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata);
|
||||
|
||||
double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order);
|
||||
|
||||
/* The number of all-pass filter factors in an upper or lower channel*/
|
||||
#define NUMBEROFCHANNELAPSECTIONS 2
|
||||
|
||||
/* The upper channel all-pass filter factors */
|
||||
extern const float WebRtcIsac_kUpperApFactorsFloat[2];
|
||||
|
||||
/* The lower channel all-pass filter factors */
|
||||
extern const float WebRtcIsac_kLowerApFactorsFloat[2];
|
||||
|
||||
void WebRtcIsac_AllPassFilter2Float(float* InOut,
|
||||
const float* APSectionFactors,
|
||||
int lengthInOut,
|
||||
int NumberOfSections,
|
||||
float* FilterState);
|
||||
void WebRtcIsac_SplitAndFilterFloat(float* in,
|
||||
float* LP,
|
||||
float* HP,
|
||||
double* LP_la,
|
||||
double* HP_la,
|
||||
PreFiltBankstr* prefiltdata);
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
|
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
#if defined(WEBRTC_POSIX)
|
||||
#define WebRtcIsac_lrint lrint
|
||||
#elif (defined(WEBRTC_ARCH_X86) && defined(WIN32))
|
||||
static __inline long int WebRtcIsac_lrint(double x_dbl) {
|
||||
long int x_int;
|
||||
|
||||
__asm {
|
||||
fld x_dbl
|
||||
fistp x_int
|
||||
}
|
||||
;
|
||||
|
||||
return x_int;
|
||||
}
|
||||
#else // Do a slow but correct implementation of lrint
|
||||
|
||||
static __inline long int WebRtcIsac_lrint(double x_dbl) {
|
||||
long int x_int;
|
||||
x_int = (long int)floor(x_dbl + 0.499999999999);
|
||||
return x_int;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
|
@ -0,0 +1,695 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
#ifdef WEBRTC_ANDROID
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h"
|
||||
#include "rtc_base/system/ignore_warnings.h"
|
||||
|
||||
static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160,
|
||||
0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640};
|
||||
|
||||
/* interpolation filter */
|
||||
__inline static void IntrepolFilter(double *data_ptr, double *intrp)
|
||||
{
|
||||
*intrp = kInterpolWin[0] * data_ptr[-3];
|
||||
*intrp += kInterpolWin[1] * data_ptr[-2];
|
||||
*intrp += kInterpolWin[2] * data_ptr[-1];
|
||||
*intrp += kInterpolWin[3] * data_ptr[0];
|
||||
*intrp += kInterpolWin[4] * data_ptr[1];
|
||||
*intrp += kInterpolWin[5] * data_ptr[2];
|
||||
*intrp += kInterpolWin[6] * data_ptr[3];
|
||||
*intrp += kInterpolWin[7] * data_ptr[4];
|
||||
}
|
||||
|
||||
|
||||
/* 2D parabolic interpolation */
|
||||
/* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */
|
||||
__inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val)
|
||||
{
|
||||
double c, b[2], A[2][2];
|
||||
double t1, t2, d;
|
||||
double delta1, delta2;
|
||||
|
||||
|
||||
// double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}};
|
||||
// should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0
|
||||
|
||||
c = T[1][1];
|
||||
b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]);
|
||||
b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]);
|
||||
A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]);
|
||||
t1 = 0.5 * (T[0][0] + T[2][2]) - c;
|
||||
t2 = 0.5 * (T[2][0] + T[0][2]) - c;
|
||||
d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2;
|
||||
A[0][0] = -t1 - 0.5 * d;
|
||||
A[1][1] = -t2 - 0.5 * d;
|
||||
|
||||
/* deal with singularities or ill-conditioned cases */
|
||||
if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) {
|
||||
*peak_val = T[1][1];
|
||||
return;
|
||||
}
|
||||
|
||||
/* Cholesky decomposition: replace A by upper-triangular factor */
|
||||
A[0][0] = sqrt(A[0][0]);
|
||||
A[0][1] = A[0][1] / A[0][0];
|
||||
A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]);
|
||||
|
||||
/* compute [x; y] = -0.5 * inv(A) * b */
|
||||
t1 = b[0] / A[0][0];
|
||||
t2 = (b[1] - t1 * A[0][1]) / A[1][1];
|
||||
delta2 = t2 / A[1][1];
|
||||
delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0];
|
||||
delta2 *= 0.5;
|
||||
|
||||
/* limit norm */
|
||||
t1 = delta1 * delta1 + delta2 * delta2;
|
||||
if (t1 > 1.0) {
|
||||
delta1 /= t1;
|
||||
delta2 /= t1;
|
||||
}
|
||||
|
||||
*peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c;
|
||||
|
||||
*x += delta1;
|
||||
*y += delta2;
|
||||
}
|
||||
|
||||
|
||||
static void PCorr(const double *in, double *outcorr)
|
||||
{
|
||||
double sum, ysum, prod;
|
||||
const double *x, *inptr;
|
||||
int k, n;
|
||||
|
||||
//ysum = 1e-6; /* use this with float (i.s.o. double)! */
|
||||
ysum = 1e-13;
|
||||
sum = 0.0;
|
||||
x = in + PITCH_MAX_LAG/2 + 2;
|
||||
for (n = 0; n < PITCH_CORR_LEN2; n++) {
|
||||
ysum += in[n] * in[n];
|
||||
sum += x[n] * in[n];
|
||||
}
|
||||
|
||||
outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */
|
||||
*outcorr = sum / sqrt(ysum);
|
||||
|
||||
for (k = 1; k < PITCH_LAG_SPAN2; k++) {
|
||||
ysum -= in[k-1] * in[k-1];
|
||||
ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1];
|
||||
sum = 0.0;
|
||||
inptr = &in[k];
|
||||
prod = x[0] * inptr[0];
|
||||
for (n = 1; n < PITCH_CORR_LEN2; n++) {
|
||||
sum += prod;
|
||||
prod = x[n] * inptr[n];
|
||||
}
|
||||
sum += prod;
|
||||
outcorr--;
|
||||
*outcorr = sum / sqrt(ysum);
|
||||
}
|
||||
}
|
||||
|
||||
static void WebRtcIsac_AllpassFilterForDec(double* InOut,
|
||||
const double* APSectionFactors,
|
||||
size_t lengthInOut,
|
||||
double* FilterState) {
|
||||
// This performs all-pass filtering--a series of first order all-pass
|
||||
// sections are used to filter the input in a cascade manner.
|
||||
size_t n, j;
|
||||
double temp;
|
||||
for (j = 0; j < ALLPASSSECTIONS; j++) {
|
||||
for (n = 0; n < lengthInOut; n += 2) {
|
||||
temp = InOut[n]; // store input
|
||||
InOut[n] = FilterState[j] + APSectionFactors[j] * temp;
|
||||
FilterState[j] = -APSectionFactors[j] * InOut[n] + temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void WebRtcIsac_DecimateAllpass(
|
||||
const double* in,
|
||||
double* state_in, // array of size: 2*ALLPASSSECTIONS+1
|
||||
size_t N, // number of input samples
|
||||
double* out) { // array of size N/2
|
||||
|
||||
static const double APupper[ALLPASSSECTIONS] = {0.0347, 0.3826};
|
||||
static const double APlower[ALLPASSSECTIONS] = {0.1544, 0.744};
|
||||
|
||||
size_t n;
|
||||
double data_vec[PITCH_FRAME_LEN];
|
||||
|
||||
/* copy input */
|
||||
memcpy(data_vec + 1, in, sizeof(double) * (N - 1));
|
||||
|
||||
data_vec[0] = state_in[2 * ALLPASSSECTIONS]; // the z^(-1) state
|
||||
state_in[2 * ALLPASSSECTIONS] = in[N - 1];
|
||||
|
||||
WebRtcIsac_AllpassFilterForDec(data_vec + 1, APupper, N, state_in);
|
||||
WebRtcIsac_AllpassFilterForDec(data_vec, APlower, N,
|
||||
state_in + ALLPASSSECTIONS);
|
||||
|
||||
for (n = 0; n < N / 2; n++)
|
||||
out[n] = data_vec[2 * n] + data_vec[2 * n + 1];
|
||||
}
|
||||
|
||||
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
|
||||
|
||||
static void WebRtcIsac_InitializePitch(const double* in,
|
||||
const double old_lag,
|
||||
const double old_gain,
|
||||
PitchAnalysisStruct* State,
|
||||
double* lags) {
|
||||
double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2];
|
||||
double ratio, log_lag, gain_bias;
|
||||
double bias;
|
||||
double corrvec1[PITCH_LAG_SPAN2];
|
||||
double corrvec2[PITCH_LAG_SPAN2];
|
||||
int m, k;
|
||||
// Allocating 10 extra entries at the begining of the CorrSurf
|
||||
double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)];
|
||||
double* CorrSurf[2*PITCH_BW+3];
|
||||
double *CorrSurfPtr1, *CorrSurfPtr2;
|
||||
double LagWin[3] = {0.2, 0.5, 0.98};
|
||||
int ind1, ind2, peaks_ind, peak, max_ind;
|
||||
int peaks[PITCH_MAX_NUM_PEAKS];
|
||||
double adj, gain_tmp;
|
||||
double corr, corr_max;
|
||||
double intrp_a, intrp_b, intrp_c, intrp_d;
|
||||
double peak_vals[PITCH_MAX_NUM_PEAKS];
|
||||
double lags1[PITCH_MAX_NUM_PEAKS];
|
||||
double lags2[PITCH_MAX_NUM_PEAKS];
|
||||
double T[3][3];
|
||||
int row;
|
||||
|
||||
for(k = 0; k < 2*PITCH_BW+3; k++)
|
||||
{
|
||||
CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)];
|
||||
}
|
||||
/* reset CorrSurf matrix */
|
||||
memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4)));
|
||||
|
||||
//warnings -DH
|
||||
max_ind = 0;
|
||||
peak = 0;
|
||||
|
||||
/* copy old values from state buffer */
|
||||
memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
|
||||
|
||||
/* decimation; put result after the old values */
|
||||
WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN,
|
||||
&buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]);
|
||||
|
||||
/* low-pass filtering */
|
||||
for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++)
|
||||
buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2];
|
||||
|
||||
/* copy end part back into state buffer */
|
||||
memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
|
||||
|
||||
/* compute correlation for first and second half of the frame */
|
||||
PCorr(buf_dec, corrvec1);
|
||||
PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2);
|
||||
|
||||
/* bias towards pitch lag of previous frame */
|
||||
log_lag = log(0.5 * old_lag);
|
||||
gain_bias = 4.0 * old_gain * old_gain;
|
||||
if (gain_bias > 0.8) gain_bias = 0.8;
|
||||
for (k = 0; k < PITCH_LAG_SPAN2; k++)
|
||||
{
|
||||
ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag;
|
||||
bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio);
|
||||
corrvec1[k] *= bias;
|
||||
}
|
||||
|
||||
/* taper correlation functions */
|
||||
for (k = 0; k < 3; k++) {
|
||||
gain_tmp = LagWin[k];
|
||||
corrvec1[k] *= gain_tmp;
|
||||
corrvec2[k] *= gain_tmp;
|
||||
corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
|
||||
corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
|
||||
}
|
||||
|
||||
corr_max = 0.0;
|
||||
/* fill middle row of correlation surface */
|
||||
ind1 = 0;
|
||||
ind2 = 0;
|
||||
CorrSurfPtr1 = &CorrSurf[PITCH_BW][2];
|
||||
for (k = 0; k < PITCH_LAG_SPAN2; k++) {
|
||||
corr = corrvec1[ind1++] + corrvec2[ind2++];
|
||||
CorrSurfPtr1[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
}
|
||||
/* fill first and last rows of correlation surface */
|
||||
ind1 = 0;
|
||||
ind2 = PITCH_BW;
|
||||
CorrSurfPtr1 = &CorrSurf[0][2];
|
||||
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2];
|
||||
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) {
|
||||
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
|
||||
adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
|
||||
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
|
||||
CorrSurfPtr1[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
|
||||
CorrSurfPtr2[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
}
|
||||
/* fill second and next to last rows of correlation surface */
|
||||
ind1 = 0;
|
||||
ind2 = PITCH_BW-1;
|
||||
CorrSurfPtr1 = &CorrSurf[1][2];
|
||||
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1];
|
||||
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) {
|
||||
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
|
||||
adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
|
||||
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
|
||||
CorrSurfPtr1[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
|
||||
CorrSurfPtr2[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
}
|
||||
/* fill remainder of correlation surface */
|
||||
for (m = 2; m < PITCH_BW; m++) {
|
||||
ind1 = 0;
|
||||
ind2 = PITCH_BW - m; /* always larger than ind1 */
|
||||
CorrSurfPtr1 = &CorrSurf[m][2];
|
||||
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m];
|
||||
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) {
|
||||
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
|
||||
adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
|
||||
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
|
||||
CorrSurfPtr1[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
|
||||
CorrSurfPtr2[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* threshold value to qualify as a peak */
|
||||
corr_max *= 0.6;
|
||||
|
||||
peaks_ind = 0;
|
||||
/* find peaks */
|
||||
for (m = 1; m < PITCH_BW+1; m++) {
|
||||
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
|
||||
CorrSurfPtr1 = &CorrSurf[m][2];
|
||||
for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) {
|
||||
corr = CorrSurfPtr1[k];
|
||||
if (corr > corr_max) {
|
||||
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
|
||||
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
|
||||
/* found a peak; store index into matrix */
|
||||
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) {
|
||||
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
|
||||
CorrSurfPtr1 = &CorrSurf[m][2];
|
||||
for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) {
|
||||
corr = CorrSurfPtr1[k];
|
||||
if (corr > corr_max) {
|
||||
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
|
||||
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
|
||||
/* found a peak; store index into matrix */
|
||||
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (peaks_ind > 0) {
|
||||
/* examine each peak */
|
||||
CorrSurfPtr1 = &CorrSurf[0][0];
|
||||
for (k = 0; k < peaks_ind; k++) {
|
||||
peak = peaks[k];
|
||||
|
||||
/* compute four interpolated values around current peak */
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d);
|
||||
|
||||
/* determine maximum of the interpolated values */
|
||||
corr = CorrSurfPtr1[peak];
|
||||
corr_max = intrp_a;
|
||||
if (intrp_b > corr_max) corr_max = intrp_b;
|
||||
if (intrp_c > corr_max) corr_max = intrp_c;
|
||||
if (intrp_d > corr_max) corr_max = intrp_d;
|
||||
|
||||
/* determine where the peak sits and fill a 3x3 matrix around it */
|
||||
row = peak / (PITCH_LAG_SPAN2+4);
|
||||
lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
|
||||
lags2[k] = (double) (lags1[k] + PITCH_BW - row);
|
||||
if ( corr > corr_max ) {
|
||||
T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
|
||||
T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
|
||||
T[1][1] = corr;
|
||||
T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
|
||||
T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
|
||||
T[1][0] = intrp_a;
|
||||
T[0][1] = intrp_b;
|
||||
T[2][1] = intrp_c;
|
||||
T[1][2] = intrp_d;
|
||||
} else {
|
||||
if (intrp_a == corr_max) {
|
||||
lags1[k] -= 0.5;
|
||||
lags2[k] += 0.5;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]);
|
||||
T[1][1] = intrp_a;
|
||||
T[0][2] = intrp_b;
|
||||
T[2][2] = intrp_c;
|
||||
T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)];
|
||||
T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
|
||||
T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
|
||||
T[1][2] = corr;
|
||||
} else if (intrp_b == corr_max) {
|
||||
lags1[k] -= 0.5;
|
||||
lags2[k] -= 0.5;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]);
|
||||
T[2][0] = intrp_a;
|
||||
T[1][1] = intrp_b;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]);
|
||||
T[2][2] = intrp_d;
|
||||
T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
|
||||
T[0][1] = CorrSurfPtr1[peak - 1];
|
||||
T[2][1] = corr;
|
||||
T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
|
||||
} else if (intrp_c == corr_max) {
|
||||
lags1[k] += 0.5;
|
||||
lags2[k] += 0.5;
|
||||
T[0][0] = intrp_a;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]);
|
||||
T[1][1] = intrp_c;
|
||||
T[0][2] = intrp_d;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]);
|
||||
T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
|
||||
T[0][1] = corr;
|
||||
T[2][1] = CorrSurfPtr1[peak + 1];
|
||||
T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
|
||||
} else {
|
||||
lags1[k] += 0.5;
|
||||
lags2[k] -= 0.5;
|
||||
T[0][0] = intrp_b;
|
||||
T[2][0] = intrp_c;
|
||||
T[1][1] = intrp_d;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]);
|
||||
T[1][0] = corr;
|
||||
T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
|
||||
T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
|
||||
T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)];
|
||||
}
|
||||
}
|
||||
|
||||
/* 2D parabolic interpolation gives more accurate lags and peak value */
|
||||
Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]);
|
||||
}
|
||||
|
||||
/* determine the highest peak, after applying a bias towards short lags */
|
||||
corr_max = 0.0;
|
||||
for (k = 0; k < peaks_ind; k++) {
|
||||
corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k]));
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr;
|
||||
peak = k;
|
||||
}
|
||||
}
|
||||
|
||||
lags1[peak] *= 2.0;
|
||||
lags2[peak] *= 2.0;
|
||||
|
||||
if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG;
|
||||
if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG;
|
||||
if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG;
|
||||
if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG;
|
||||
|
||||
/* store lags of highest peak in output array */
|
||||
lags[0] = lags1[peak];
|
||||
lags[1] = lags1[peak];
|
||||
lags[2] = lags2[peak];
|
||||
lags[3] = lags2[peak];
|
||||
}
|
||||
else
|
||||
{
|
||||
row = max_ind / (PITCH_LAG_SPAN2+4);
|
||||
lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
|
||||
lags2[0] = (double) (lags1[0] + PITCH_BW - row);
|
||||
|
||||
if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG;
|
||||
if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG;
|
||||
if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG;
|
||||
if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG;
|
||||
|
||||
/* store lags of highest peak in output array */
|
||||
lags[0] = lags1[0];
|
||||
lags[1] = lags1[0];
|
||||
lags[2] = lags2[0];
|
||||
lags[3] = lags2[0];
|
||||
}
|
||||
}
|
||||
|
||||
RTC_POP_IGNORING_WFRAME_LARGER_THAN()
|
||||
|
||||
/* create weighting matrix by orthogonalizing a basis of polynomials of increasing order
|
||||
* t = (0:4)';
|
||||
* A = [t.^0, t.^1, t.^2, t.^3, t.^4];
|
||||
* [Q, dummy] = qr(A);
|
||||
* P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
|
||||
static const double kWeight[5][5] = {
|
||||
{ 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286},
|
||||
{-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857},
|
||||
{-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286},
|
||||
{ 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143},
|
||||
{ 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286}
|
||||
};
|
||||
|
||||
/* second order high-pass filter */
|
||||
static void WebRtcIsac_Highpass(const double* in,
|
||||
double* out,
|
||||
double* state,
|
||||
size_t N) {
|
||||
/* create high-pass filter ocefficients
|
||||
* z = 0.998 * exp(j*2*pi*35/8000);
|
||||
* p = 0.94 * exp(j*2*pi*140/8000);
|
||||
* HP_b = [1, -2*real(z), abs(z)^2];
|
||||
* HP_a = [1, -2*real(p), abs(p)^2]; */
|
||||
static const double a_coef[2] = { 1.86864659625574, -0.88360000000000};
|
||||
static const double b_coef[2] = {-1.99524591718270, 0.99600400000000};
|
||||
|
||||
size_t k;
|
||||
|
||||
for (k=0; k<N; k++) {
|
||||
*out = *in + state[1];
|
||||
state[1] = state[0] + b_coef[0] * *in + a_coef[0] * *out;
|
||||
state[0] = b_coef[1] * *in++ + a_coef[1] * *out++;
|
||||
}
|
||||
}
|
||||
|
||||
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
|
||||
|
||||
void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */
|
||||
double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
|
||||
PitchAnalysisStruct *State,
|
||||
double *lags,
|
||||
double *gains)
|
||||
{
|
||||
double HPin[PITCH_FRAME_LEN];
|
||||
double Weighted[PITCH_FRAME_LEN];
|
||||
double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD];
|
||||
double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD];
|
||||
double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead
|
||||
double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD];
|
||||
double old_lag, old_gain;
|
||||
double nrg_wht, tmp;
|
||||
double Wnrg, Wfluct, Wgain;
|
||||
double H[4][4];
|
||||
double grad[4];
|
||||
double dG[4];
|
||||
int k, m, n, iter;
|
||||
|
||||
/* high pass filtering using second order pole-zero filter */
|
||||
WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN);
|
||||
|
||||
/* copy from state into buffer */
|
||||
memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD);
|
||||
|
||||
/* compute weighted and whitened signals */
|
||||
WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr));
|
||||
|
||||
/* copy from buffer into state */
|
||||
memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD);
|
||||
|
||||
old_lag = State->PFstr_wght.oldlagp[0];
|
||||
old_gain = State->PFstr_wght.oldgainp[0];
|
||||
|
||||
/* inital pitch estimate */
|
||||
WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags);
|
||||
|
||||
|
||||
/* Iterative optimization of lags - to be done */
|
||||
|
||||
/* compute energy of whitened signal */
|
||||
nrg_wht = 0.0;
|
||||
for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++)
|
||||
nrg_wht += Whitened[k] * Whitened[k];
|
||||
|
||||
|
||||
/* Iterative optimization of gains */
|
||||
|
||||
/* set weights for energy, gain fluctiation, and spectral gain penalty functions */
|
||||
Wnrg = 1.0 / nrg_wht;
|
||||
Wgain = 0.005;
|
||||
Wfluct = 3.0;
|
||||
|
||||
/* set initial gains */
|
||||
for (k = 0; k < 4; k++)
|
||||
gains[k] = PITCH_MAX_GAIN_06;
|
||||
|
||||
/* two iterations should be enough */
|
||||
for (iter = 0; iter < 2; iter++) {
|
||||
/* compute Jacobian of pre-filter output towards gains */
|
||||
WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains);
|
||||
|
||||
/* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */
|
||||
for (k = 0; k < 4; k++) {
|
||||
tmp = 0.0;
|
||||
for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
|
||||
tmp += out_G[n] * out_dG[k][n];
|
||||
grad[k] = tmp * Wnrg;
|
||||
}
|
||||
for (k = 0; k < 4; k++) {
|
||||
for (m = 0; m <= k; m++) {
|
||||
tmp = 0.0;
|
||||
for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
|
||||
tmp += out_dG[m][n] * out_dG[k][n];
|
||||
H[k][m] = tmp * Wnrg;
|
||||
}
|
||||
}
|
||||
|
||||
/* add gradient and Hessian (lower triangle) for dampening fast gain changes */
|
||||
for (k = 0; k < 4; k++) {
|
||||
tmp = kWeight[k+1][0] * old_gain;
|
||||
for (m = 0; m < 4; m++)
|
||||
tmp += kWeight[k+1][m+1] * gains[m];
|
||||
grad[k] += tmp * Wfluct;
|
||||
}
|
||||
for (k = 0; k < 4; k++) {
|
||||
for (m = 0; m <= k; m++) {
|
||||
H[k][m] += kWeight[k+1][m+1] * Wfluct;
|
||||
}
|
||||
}
|
||||
|
||||
/* add gradient and Hessian for dampening gain */
|
||||
for (k = 0; k < 3; k++) {
|
||||
tmp = 1.0 / (1 - gains[k]);
|
||||
grad[k] += tmp * tmp * Wgain;
|
||||
H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain);
|
||||
}
|
||||
tmp = 1.0 / (1 - gains[3]);
|
||||
grad[3] += 1.33 * (tmp * tmp * Wgain);
|
||||
H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain);
|
||||
|
||||
|
||||
/* compute Cholesky factorization of Hessian
|
||||
* by overwritting the upper triangle; scale factors on diagonal
|
||||
* (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */
|
||||
H[0][1] = H[1][0] / H[0][0];
|
||||
H[0][2] = H[2][0] / H[0][0];
|
||||
H[0][3] = H[3][0] / H[0][0];
|
||||
H[1][1] -= H[0][0] * H[0][1] * H[0][1];
|
||||
H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1];
|
||||
H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1];
|
||||
H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2];
|
||||
H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2];
|
||||
H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3];
|
||||
|
||||
/* Compute update as delta_gains = -inv(H) * grad */
|
||||
/* copy and negate */
|
||||
for (k = 0; k < 4; k++)
|
||||
dG[k] = -grad[k];
|
||||
/* back substitution */
|
||||
dG[1] -= dG[0] * H[0][1];
|
||||
dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2];
|
||||
dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3];
|
||||
/* scale */
|
||||
for (k = 0; k < 4; k++)
|
||||
dG[k] /= H[k][k];
|
||||
/* back substitution */
|
||||
dG[2] -= dG[3] * H[2][3];
|
||||
dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2];
|
||||
dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1];
|
||||
|
||||
/* update gains and check range */
|
||||
for (k = 0; k < 4; k++) {
|
||||
gains[k] += dG[k];
|
||||
if (gains[k] > PITCH_MAX_GAIN)
|
||||
gains[k] = PITCH_MAX_GAIN;
|
||||
else if (gains[k] < 0.0)
|
||||
gains[k] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/* update state for next frame */
|
||||
WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains);
|
||||
|
||||
/* concatenate previous input's end and current input */
|
||||
memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD);
|
||||
memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN);
|
||||
|
||||
/* lookahead pitch filtering for masking analysis */
|
||||
WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains);
|
||||
|
||||
/* store last part of input */
|
||||
for (k = 0; k < QLOOKAHEAD; k++)
|
||||
State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN];
|
||||
}
|
||||
|
||||
RTC_POP_IGNORING_WFRAME_LARGER_THAN()
|
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* pitch_estimator.h
|
||||
*
|
||||
* Pitch functions
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
|
||||
void WebRtcIsac_PitchAnalysis(
|
||||
const double* in, /* PITCH_FRAME_LEN samples */
|
||||
double* out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
|
||||
PitchAnalysisStruct* State,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ */
|
@ -0,0 +1,388 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <memory.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h"
|
||||
#include "rtc_base/compile_assert_c.h"
|
||||
|
||||
/*
|
||||
* We are implementing the following filters;
|
||||
*
|
||||
* Pre-filtering:
|
||||
* y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
|
||||
*
|
||||
* Post-filtering:
|
||||
* y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
|
||||
*
|
||||
* Note that `lag` is a floating number so we perform an interpolation to
|
||||
* obtain the correct `lag`.
|
||||
*
|
||||
*/
|
||||
|
||||
static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25,
|
||||
-0.07};
|
||||
|
||||
/* interpolation coefficients; generated by design_pitch_filter.m */
|
||||
static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
|
||||
{-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125,
|
||||
0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109,
|
||||
0.01754159521746},
|
||||
{-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643,
|
||||
0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084,
|
||||
0.01654127246314},
|
||||
{-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787,
|
||||
0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311,
|
||||
0.01295781500709},
|
||||
{-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393,
|
||||
0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166,
|
||||
0.00719783432422},
|
||||
{-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001,
|
||||
0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000,
|
||||
-0.00000000000000},
|
||||
{0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562,
|
||||
0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377,
|
||||
-0.00764851320885},
|
||||
{0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650,
|
||||
0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059,
|
||||
-0.01463300534216},
|
||||
{0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190,
|
||||
0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865,
|
||||
-0.01985640750433}
|
||||
};
|
||||
|
||||
/*
|
||||
* Enumerating the operation of the filter.
|
||||
* iSAC has 4 different pitch-filter which are very similar in their structure.
|
||||
*
|
||||
* kPitchFilterPre : In this mode the filter is operating as pitch
|
||||
* pre-filter. This is used at the encoder.
|
||||
* kPitchFilterPost : In this mode the filter is operating as pitch
|
||||
* post-filter. This is the inverse of pre-filter and used
|
||||
* in the decoder.
|
||||
* kPitchFilterPreLa : This is, in structure, similar to pre-filtering but
|
||||
* utilizing 3 millisecond lookahead. It is used to
|
||||
* obtain the signal for LPC analysis.
|
||||
* kPitchFilterPreGain : This is, in structure, similar to pre-filtering but
|
||||
* differential changes in gain is considered. This is
|
||||
* used to find the optimal gain.
|
||||
*/
|
||||
typedef enum {
|
||||
kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain
|
||||
} PitchFilterOperation;
|
||||
|
||||
/*
|
||||
* Structure with parameters used for pitch-filtering.
|
||||
* buffer : a buffer where the sum of previous inputs and outputs
|
||||
* are stored.
|
||||
* damper_state : the state of the damping filter. The filter is defined by
|
||||
* `kDampFilter`.
|
||||
* interpol_coeff : pointer to a set of coefficient which are used to utilize
|
||||
* fractional pitch by interpolation.
|
||||
* gain : pitch-gain to be applied to the current segment of input.
|
||||
* lag : pitch-lag for the current segment of input.
|
||||
* lag_offset : the offset of lag w.r.t. current sample.
|
||||
* sub_frame : sub-frame index, there are 4 pitch sub-frames in an iSAC
|
||||
* frame.
|
||||
* This specifies the usage of the filter. See
|
||||
* 'PitchFilterOperation' for operational modes.
|
||||
* num_samples : number of samples to be processed in each segment.
|
||||
* index : index of the input and output sample.
|
||||
* damper_state_dg : state of damping filter for different trial gains.
|
||||
* gain_mult : differential changes to gain.
|
||||
*/
|
||||
typedef struct {
|
||||
double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD];
|
||||
double damper_state[PITCH_DAMPORDER];
|
||||
const double *interpol_coeff;
|
||||
double gain;
|
||||
double lag;
|
||||
int lag_offset;
|
||||
|
||||
int sub_frame;
|
||||
PitchFilterOperation mode;
|
||||
int num_samples;
|
||||
int index;
|
||||
|
||||
double damper_state_dg[4][PITCH_DAMPORDER];
|
||||
double gain_mult[4];
|
||||
} PitchFilterParam;
|
||||
|
||||
/**********************************************************************
|
||||
* FilterSegment()
|
||||
* Filter one segment, a quarter of a frame.
|
||||
*
|
||||
* Inputs
|
||||
* in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
|
||||
* filter_param : pitch filter parameters.
|
||||
*
|
||||
* Outputs
|
||||
* out_data : pointer to a buffer where the filtered signal is written to.
|
||||
* out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
|
||||
* where the output of different gain values (differential
|
||||
* change to gain) is written.
|
||||
*/
|
||||
static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
|
||||
double* out_data,
|
||||
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
|
||||
int n;
|
||||
int m;
|
||||
int j;
|
||||
double sum;
|
||||
double sum2;
|
||||
/* Index of `parameters->buffer` where the output is written to. */
|
||||
int pos = parameters->index + PITCH_BUFFSIZE;
|
||||
/* Index of `parameters->buffer` where samples are read for fractional-lag
|
||||
* computation. */
|
||||
int pos_lag = pos - parameters->lag_offset;
|
||||
|
||||
for (n = 0; n < parameters->num_samples; ++n) {
|
||||
/* Shift low pass filter states. */
|
||||
for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
|
||||
parameters->damper_state[m] = parameters->damper_state[m - 1];
|
||||
}
|
||||
/* Filter to get fractional pitch. */
|
||||
sum = 0.0;
|
||||
for (m = 0; m < PITCH_FRACORDER; ++m) {
|
||||
sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m];
|
||||
}
|
||||
/* Multiply with gain. */
|
||||
parameters->damper_state[0] = parameters->gain * sum;
|
||||
|
||||
if (parameters->mode == kPitchFilterPreGain) {
|
||||
int lag_index = parameters->index - parameters->lag_offset;
|
||||
int m_tmp = (lag_index < 0) ? -lag_index : 0;
|
||||
/* Update the damper state for the new sample. */
|
||||
for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
parameters->damper_state_dg[j][m] =
|
||||
parameters->damper_state_dg[j][m - 1];
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < parameters->sub_frame + 1; ++j) {
|
||||
/* Filter for fractional pitch. */
|
||||
sum2 = 0.0;
|
||||
for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) {
|
||||
/* `lag_index + m` is always larger than or equal to zero, see how
|
||||
* m_tmp is computed. This is equivalent to assume samples outside
|
||||
* `out_dg[j]` are zero. */
|
||||
sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
|
||||
}
|
||||
/* Add the contribution of differential gain change. */
|
||||
parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum +
|
||||
parameters->gain * sum2;
|
||||
}
|
||||
|
||||
/* Filter with damping filter, and store the results. */
|
||||
for (j = 0; j < parameters->sub_frame + 1; ++j) {
|
||||
sum = 0.0;
|
||||
for (m = 0; m < PITCH_DAMPORDER; ++m) {
|
||||
sum -= parameters->damper_state_dg[j][m] * kDampFilter[m];
|
||||
}
|
||||
out_dg[j][parameters->index] = sum;
|
||||
}
|
||||
}
|
||||
/* Filter with damping filter. */
|
||||
sum = 0.0;
|
||||
for (m = 0; m < PITCH_DAMPORDER; ++m) {
|
||||
sum += parameters->damper_state[m] * kDampFilter[m];
|
||||
}
|
||||
|
||||
/* Subtract from input and update buffer. */
|
||||
out_data[parameters->index] = in_data[parameters->index] - sum;
|
||||
parameters->buffer[pos] = in_data[parameters->index] +
|
||||
out_data[parameters->index];
|
||||
|
||||
++parameters->index;
|
||||
++pos;
|
||||
++pos_lag;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Update filter parameters based on the pitch-gains and pitch-lags. */
|
||||
static void Update(PitchFilterParam* parameters) {
|
||||
double fraction;
|
||||
int fraction_index;
|
||||
/* Compute integer lag-offset. */
|
||||
parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY +
|
||||
0.5);
|
||||
/* Find correct set of coefficients for computing fractional pitch. */
|
||||
fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY);
|
||||
fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5);
|
||||
parameters->interpol_coeff = kIntrpCoef[fraction_index];
|
||||
|
||||
if (parameters->mode == kPitchFilterPreGain) {
|
||||
/* If in this mode make a differential change to pitch gain. */
|
||||
parameters->gain_mult[parameters->sub_frame] += 0.2;
|
||||
if (parameters->gain_mult[parameters->sub_frame] > 1.0) {
|
||||
parameters->gain_mult[parameters->sub_frame] = 1.0;
|
||||
}
|
||||
if (parameters->sub_frame > 0) {
|
||||
parameters->gain_mult[parameters->sub_frame - 1] -= 0.2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* FilterFrame()
|
||||
* Filter a frame of 30 millisecond, given pitch-lags and pitch-gains.
|
||||
*
|
||||
* Inputs
|
||||
* in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
|
||||
* lags : pointer to pitch-lags, 4 lags per frame.
|
||||
* gains : pointer to pitch-gians, 4 gains per frame.
|
||||
* mode : defining the functionality of the filter. It takes the
|
||||
* following values.
|
||||
* kPitchFilterPre: Pitch pre-filter, used at encoder.
|
||||
* kPitchFilterPost: Pitch post-filter, used at decoder.
|
||||
* kPitchFilterPreLa: Pitch pre-filter with lookahead.
|
||||
* kPitchFilterPreGain: Pitch pre-filter used to otain optimal
|
||||
* pitch-gains.
|
||||
*
|
||||
* Outputs
|
||||
* out_data : pointer to a buffer where the filtered signal is written to.
|
||||
* out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
|
||||
* where the output of different gain values (differential
|
||||
* change to gain) is written.
|
||||
*/
|
||||
static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
|
||||
double* lags, double* gains, PitchFilterOperation mode,
|
||||
double* out_data,
|
||||
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
|
||||
PitchFilterParam filter_parameters;
|
||||
double gain_delta, lag_delta;
|
||||
double old_lag, old_gain;
|
||||
int n;
|
||||
int m;
|
||||
const double kEnhancer = 1.3;
|
||||
|
||||
/* Set up buffer and states. */
|
||||
filter_parameters.index = 0;
|
||||
filter_parameters.lag_offset = 0;
|
||||
filter_parameters.mode = mode;
|
||||
/* Copy states to local variables. */
|
||||
memcpy(filter_parameters.buffer, filter_state->ubuf,
|
||||
sizeof(filter_state->ubuf));
|
||||
RTC_COMPILE_ASSERT(sizeof(filter_parameters.buffer) >=
|
||||
sizeof(filter_state->ubuf));
|
||||
memset(filter_parameters.buffer +
|
||||
sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]),
|
||||
0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf));
|
||||
memcpy(filter_parameters.damper_state, filter_state->ystate,
|
||||
sizeof(filter_state->ystate));
|
||||
|
||||
if (mode == kPitchFilterPreGain) {
|
||||
/* Clear buffers. */
|
||||
memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult));
|
||||
memset(filter_parameters.damper_state_dg, 0,
|
||||
sizeof(filter_parameters.damper_state_dg));
|
||||
for (n = 0; n < PITCH_SUBFRAMES; ++n) {
|
||||
//memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD));
|
||||
memset(out_dg[n], 0, sizeof(out_dg[n]));
|
||||
}
|
||||
} else if (mode == kPitchFilterPost) {
|
||||
/* Make output more periodic. Negative sign is to change the structure
|
||||
* of the filter. */
|
||||
for (n = 0; n < PITCH_SUBFRAMES; ++n) {
|
||||
gains[n] *= -kEnhancer;
|
||||
}
|
||||
}
|
||||
|
||||
old_lag = *filter_state->oldlagp;
|
||||
old_gain = *filter_state->oldgainp;
|
||||
|
||||
/* No interpolation if pitch lag step is big. */
|
||||
if ((lags[0] > (PITCH_UPSTEP * old_lag)) ||
|
||||
(lags[0] < (PITCH_DOWNSTEP * old_lag))) {
|
||||
old_lag = lags[0];
|
||||
old_gain = gains[0];
|
||||
|
||||
if (mode == kPitchFilterPreGain) {
|
||||
filter_parameters.gain_mult[0] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
filter_parameters.num_samples = PITCH_UPDATE;
|
||||
for (m = 0; m < PITCH_SUBFRAMES; ++m) {
|
||||
/* Set the sub-frame value. */
|
||||
filter_parameters.sub_frame = m;
|
||||
/* Calculate interpolation steps for pitch-lag and pitch-gain. */
|
||||
lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME;
|
||||
filter_parameters.lag = old_lag;
|
||||
gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME;
|
||||
filter_parameters.gain = old_gain;
|
||||
/* Store for the next sub-frame. */
|
||||
old_lag = lags[m];
|
||||
old_gain = gains[m];
|
||||
|
||||
for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) {
|
||||
/* Step-wise interpolation of pitch gains and lags. As pitch-lag changes,
|
||||
* some parameters of filter need to be update. */
|
||||
filter_parameters.gain += gain_delta;
|
||||
filter_parameters.lag += lag_delta;
|
||||
/* Update parameters according to new lag value. */
|
||||
Update(&filter_parameters);
|
||||
/* Filter a segment of input. */
|
||||
FilterSegment(in_data, &filter_parameters, out_data, out_dg);
|
||||
}
|
||||
}
|
||||
|
||||
if (mode != kPitchFilterPreGain) {
|
||||
/* Export buffer and states. */
|
||||
memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN],
|
||||
sizeof(filter_state->ubuf));
|
||||
memcpy(filter_state->ystate, filter_parameters.damper_state,
|
||||
sizeof(filter_state->ystate));
|
||||
|
||||
/* Store for the next frame. */
|
||||
*filter_state->oldlagp = old_lag;
|
||||
*filter_state->oldgainp = old_gain;
|
||||
}
|
||||
|
||||
if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) {
|
||||
/* Filter the lookahead segment, this is treated as the last sub-frame. So
|
||||
* set `pf_param` to last sub-frame. */
|
||||
filter_parameters.sub_frame = PITCH_SUBFRAMES - 1;
|
||||
filter_parameters.num_samples = QLOOKAHEAD;
|
||||
FilterSegment(in_data, &filter_parameters, out_data, out_dg);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data,
|
||||
PitchFiltstr* pf_state, double* lags,
|
||||
double* gains) {
|
||||
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL);
|
||||
}
|
||||
|
||||
void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data,
|
||||
PitchFiltstr* pf_state, double* lags,
|
||||
double* gains) {
|
||||
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data,
|
||||
NULL);
|
||||
}
|
||||
|
||||
void WebRtcIsac_PitchfilterPre_gains(
|
||||
double* in_data, double* out_data,
|
||||
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state,
|
||||
double* lags, double* gains) {
|
||||
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data,
|
||||
out_dg);
|
||||
}
|
||||
|
||||
void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data,
|
||||
PitchFiltstr* pf_state, double* lags,
|
||||
double* gains) {
|
||||
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL);
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
|
||||
void WebRtcIsac_PitchfilterPre(double* indat,
|
||||
double* outdat,
|
||||
PitchFiltstr* pfp,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
void WebRtcIsac_PitchfilterPost(double* indat,
|
||||
double* outdat,
|
||||
PitchFiltstr* pfp,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
void WebRtcIsac_PitchfilterPre_la(double* indat,
|
||||
double* outdat,
|
||||
PitchFiltstr* pfp,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
void WebRtcIsac_PitchfilterPre_gains(
|
||||
double* indat,
|
||||
double* outdat,
|
||||
double out_dG[][PITCH_FRAME_LEN + QLOOKAHEAD],
|
||||
PitchFiltstr* pfp,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
|
@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* settings.h
|
||||
*
|
||||
* Declaration of #defines used in the iSAC codec
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_
|
||||
|
||||
/* sampling frequency (Hz) */
|
||||
#define FS 16000
|
||||
|
||||
/* number of samples per frame (either 320 (20ms), 480 (30ms) or 960 (60ms)) */
|
||||
#define INITIAL_FRAMESAMPLES 960
|
||||
|
||||
/* do not modify the following; this will have to be modified if we
|
||||
* have a 20ms framesize option */
|
||||
/**********************************************************************/
|
||||
/* miliseconds */
|
||||
#define FRAMESIZE 30
|
||||
/* number of samples per frame processed in the encoder, 480 */
|
||||
#define FRAMESAMPLES 480 /* ((FRAMESIZE*FS)/1000) */
|
||||
#define FRAMESAMPLES_HALF 240
|
||||
#define FRAMESAMPLES_QUARTER 120
|
||||
/**********************************************************************/
|
||||
|
||||
/* max number of samples per frame (= 60 ms frame) */
|
||||
#define MAX_FRAMESAMPLES 960
|
||||
#define MAX_SWBFRAMESAMPLES (MAX_FRAMESAMPLES * 2)
|
||||
/* number of samples per 10ms frame */
|
||||
#define FRAMESAMPLES_10ms ((10 * FS) / 1000)
|
||||
#define SWBFRAMESAMPLES_10ms (FRAMESAMPLES_10ms * 2)
|
||||
/* number of samples in 30 ms frame */
|
||||
#define FRAMESAMPLES_30ms 480
|
||||
/* number of subframes */
|
||||
#define SUBFRAMES 6
|
||||
/* length of a subframe */
|
||||
#define UPDATE 80
|
||||
/* length of half a subframe (low/high band) */
|
||||
#define HALF_SUBFRAMELEN (UPDATE / 2)
|
||||
/* samples of look ahead (in a half-band, so actually
|
||||
* half the samples of look ahead @ FS) */
|
||||
#define QLOOKAHEAD 24 /* 3 ms */
|
||||
/* order of AR model in spectral entropy coder */
|
||||
#define AR_ORDER 6
|
||||
/* order of LP model in spectral entropy coder */
|
||||
#define LP_ORDER 0
|
||||
|
||||
/* window length (masking analysis) */
|
||||
#define WINLEN 256
|
||||
/* order of low-band pole filter used to approximate masking curve */
|
||||
#define ORDERLO 12
|
||||
/* order of hi-band pole filter used to approximate masking curve */
|
||||
#define ORDERHI 6
|
||||
|
||||
#define UB_LPC_ORDER 4
|
||||
#define UB_LPC_VEC_PER_FRAME 2
|
||||
#define UB16_LPC_VEC_PER_FRAME 4
|
||||
#define UB_ACTIVE_SUBFRAMES 2
|
||||
#define UB_MAX_LPC_ORDER 6
|
||||
#define UB_INTERPOL_SEGMENTS 1
|
||||
#define UB16_INTERPOL_SEGMENTS 3
|
||||
#define LB_TOTAL_DELAY_SAMPLES 48
|
||||
enum ISACBandwidth { isac8kHz = 8, isac12kHz = 12, isac16kHz = 16 };
|
||||
enum ISACBand {
|
||||
kIsacLowerBand = 0,
|
||||
kIsacUpperBand12 = 1,
|
||||
kIsacUpperBand16 = 2
|
||||
};
|
||||
enum IsacSamplingRate { kIsacWideband = 16, kIsacSuperWideband = 32 };
|
||||
#define UB_LPC_GAIN_DIM SUBFRAMES
|
||||
#define FB_STATE_SIZE_WORD32 6
|
||||
|
||||
/* order for post_filter_bank */
|
||||
#define POSTQORDER 3
|
||||
/* order for pre-filterbank */
|
||||
#define QORDER 3
|
||||
/* another order */
|
||||
#define QORDER_ALL (POSTQORDER + QORDER - 1)
|
||||
/* for decimator */
|
||||
#define ALLPASSSECTIONS 2
|
||||
|
||||
/* array size for byte stream in number of bytes. */
|
||||
/* The old maximum size still needed for the decoding */
|
||||
#define STREAM_SIZE_MAX 600
|
||||
#define STREAM_SIZE_MAX_30 200 /* 200 bytes=53.4 kbps @ 30 ms.framelength */
|
||||
#define STREAM_SIZE_MAX_60 400 /* 400 bytes=53.4 kbps @ 60 ms.framelength */
|
||||
|
||||
/* storage size for bit counts */
|
||||
#define BIT_COUNTER_SIZE 30
|
||||
/* maximum order of any AR model or filter */
|
||||
#define MAX_AR_MODEL_ORDER 12 // 50
|
||||
|
||||
/* For pitch analysis */
|
||||
#define PITCH_FRAME_LEN (FRAMESAMPLES_HALF) /* 30 ms */
|
||||
#define PITCH_MAX_LAG 140 /* 57 Hz */
|
||||
#define PITCH_MIN_LAG 20 /* 400 Hz */
|
||||
#define PITCH_MAX_GAIN 0.45
|
||||
#define PITCH_MAX_GAIN_06 0.27 /* PITCH_MAX_GAIN*0.6 */
|
||||
#define PITCH_MAX_GAIN_Q12 1843
|
||||
#define PITCH_LAG_SPAN2 (PITCH_MAX_LAG / 2 - PITCH_MIN_LAG / 2 + 5)
|
||||
#define PITCH_CORR_LEN2 60 /* 15 ms */
|
||||
#define PITCH_CORR_STEP2 (PITCH_FRAME_LEN / 4)
|
||||
#define PITCH_BW 11 /* half the band width of correlation surface */
|
||||
#define PITCH_SUBFRAMES 4
|
||||
#define PITCH_GRAN_PER_SUBFRAME 5
|
||||
#define PITCH_SUBFRAME_LEN (PITCH_FRAME_LEN / PITCH_SUBFRAMES)
|
||||
#define PITCH_UPDATE (PITCH_SUBFRAME_LEN / PITCH_GRAN_PER_SUBFRAME)
|
||||
/* maximum number of peaks to be examined in correlation surface */
|
||||
#define PITCH_MAX_NUM_PEAKS 10
|
||||
#define PITCH_PEAK_DECAY 0.85
|
||||
/* For weighting filter */
|
||||
#define PITCH_WLPCORDER 6
|
||||
#define PITCH_WLPCWINLEN PITCH_FRAME_LEN
|
||||
#define PITCH_WLPCASYM 0.3 /* asymmetry parameter */
|
||||
#define PITCH_WLPCBUFLEN PITCH_WLPCWINLEN
|
||||
/* For pitch filter */
|
||||
/* Extra 50 for fraction and LP filters */
|
||||
#define PITCH_BUFFSIZE (PITCH_MAX_LAG + 50)
|
||||
#define PITCH_INTBUFFSIZE (PITCH_FRAME_LEN + PITCH_BUFFSIZE)
|
||||
/* Max rel. step for interpolation */
|
||||
#define PITCH_UPSTEP 1.5
|
||||
/* Max rel. step for interpolation */
|
||||
#define PITCH_DOWNSTEP 0.67
|
||||
#define PITCH_FRACS 8
|
||||
#define PITCH_FRACORDER 9
|
||||
#define PITCH_DAMPORDER 5
|
||||
#define PITCH_FILTDELAY 1.5f
|
||||
/* stepsize for quantization of the pitch Gain */
|
||||
#define PITCH_GAIN_STEPSIZE 0.125
|
||||
|
||||
/* Order of high pass filter */
|
||||
#define HPORDER 2
|
||||
|
||||
/* some mathematical constants */
|
||||
/* log2(exp) */
|
||||
#define LOG2EXP 1.44269504088896
|
||||
#define PI 3.14159265358979
|
||||
|
||||
/* Maximum number of iterations allowed to limit payload size */
|
||||
#define MAX_PAYLOAD_LIMIT_ITERATION 5
|
||||
|
||||
/* Redundant Coding */
|
||||
#define RCU_BOTTLENECK_BPS 16000
|
||||
#define RCU_TRANSCODING_SCALE 0.40f
|
||||
#define RCU_TRANSCODING_SCALE_INVERSE 2.5f
|
||||
|
||||
#define RCU_TRANSCODING_SCALE_UB 0.50f
|
||||
#define RCU_TRANSCODING_SCALE_UB_INVERSE 2.0f
|
||||
|
||||
/* Define Error codes */
|
||||
/* 6000 General */
|
||||
#define ISAC_MEMORY_ALLOCATION_FAILED 6010
|
||||
#define ISAC_MODE_MISMATCH 6020
|
||||
#define ISAC_DISALLOWED_BOTTLENECK 6030
|
||||
#define ISAC_DISALLOWED_FRAME_LENGTH 6040
|
||||
#define ISAC_UNSUPPORTED_SAMPLING_FREQUENCY 6050
|
||||
|
||||
/* 6200 Bandwidth estimator */
|
||||
#define ISAC_RANGE_ERROR_BW_ESTIMATOR 6240
|
||||
/* 6400 Encoder */
|
||||
#define ISAC_ENCODER_NOT_INITIATED 6410
|
||||
#define ISAC_DISALLOWED_CODING_MODE 6420
|
||||
#define ISAC_DISALLOWED_FRAME_MODE_ENCODER 6430
|
||||
#define ISAC_DISALLOWED_BITSTREAM_LENGTH 6440
|
||||
#define ISAC_PAYLOAD_LARGER_THAN_LIMIT 6450
|
||||
#define ISAC_DISALLOWED_ENCODER_BANDWIDTH 6460
|
||||
/* 6600 Decoder */
|
||||
#define ISAC_DECODER_NOT_INITIATED 6610
|
||||
#define ISAC_EMPTY_PACKET 6620
|
||||
#define ISAC_DISALLOWED_FRAME_MODE_DECODER 6630
|
||||
#define ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH 6640
|
||||
#define ISAC_RANGE_ERROR_DECODE_BANDWIDTH 6650
|
||||
#define ISAC_RANGE_ERROR_DECODE_PITCH_GAIN 6660
|
||||
#define ISAC_RANGE_ERROR_DECODE_PITCH_LAG 6670
|
||||
#define ISAC_RANGE_ERROR_DECODE_LPC 6680
|
||||
#define ISAC_RANGE_ERROR_DECODE_SPECTRUM 6690
|
||||
#define ISAC_LENGTH_MISMATCH 6730
|
||||
#define ISAC_RANGE_ERROR_DECODE_BANDWITH 6740
|
||||
#define ISAC_DISALLOWED_BANDWIDTH_MODE_DECODER 6750
|
||||
#define ISAC_DISALLOWED_LPC_MODEL 6760
|
||||
/* 6800 Call setup formats */
|
||||
#define ISAC_INCOMPATIBLE_FORMATS 6810
|
||||
|
||||
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ */
|
@ -0,0 +1,448 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* structs.h
|
||||
*
|
||||
* This header file contains all the structs used in the ISAC codec
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/bandwidth_info.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/settings.h"
|
||||
#include "modules/third_party/fft/fft.h"
|
||||
|
||||
typedef struct Bitstreamstruct {
|
||||
uint8_t stream[STREAM_SIZE_MAX];
|
||||
uint32_t W_upper;
|
||||
uint32_t streamval;
|
||||
uint32_t stream_index;
|
||||
|
||||
} Bitstr;
|
||||
|
||||
typedef struct {
|
||||
double DataBufferLo[WINLEN];
|
||||
double DataBufferHi[WINLEN];
|
||||
|
||||
double CorrBufLo[ORDERLO + 1];
|
||||
double CorrBufHi[ORDERHI + 1];
|
||||
|
||||
float PreStateLoF[ORDERLO + 1];
|
||||
float PreStateLoG[ORDERLO + 1];
|
||||
float PreStateHiF[ORDERHI + 1];
|
||||
float PreStateHiG[ORDERHI + 1];
|
||||
float PostStateLoF[ORDERLO + 1];
|
||||
float PostStateLoG[ORDERLO + 1];
|
||||
float PostStateHiF[ORDERHI + 1];
|
||||
float PostStateHiG[ORDERHI + 1];
|
||||
|
||||
double OldEnergy;
|
||||
|
||||
} MaskFiltstr;
|
||||
|
||||
typedef struct {
|
||||
// state vectors for each of the two analysis filters
|
||||
double INSTAT1[2 * (QORDER - 1)];
|
||||
double INSTAT2[2 * (QORDER - 1)];
|
||||
double INSTATLA1[2 * (QORDER - 1)];
|
||||
double INSTATLA2[2 * (QORDER - 1)];
|
||||
double INLABUF1[QLOOKAHEAD];
|
||||
double INLABUF2[QLOOKAHEAD];
|
||||
|
||||
float INSTAT1_float[2 * (QORDER - 1)];
|
||||
float INSTAT2_float[2 * (QORDER - 1)];
|
||||
float INSTATLA1_float[2 * (QORDER - 1)];
|
||||
float INSTATLA2_float[2 * (QORDER - 1)];
|
||||
float INLABUF1_float[QLOOKAHEAD];
|
||||
float INLABUF2_float[QLOOKAHEAD];
|
||||
|
||||
/* High pass filter */
|
||||
double HPstates[HPORDER];
|
||||
float HPstates_float[HPORDER];
|
||||
|
||||
} PreFiltBankstr;
|
||||
|
||||
typedef struct {
|
||||
// state vectors for each of the two analysis filters
|
||||
double STATE_0_LOWER[2 * POSTQORDER];
|
||||
double STATE_0_UPPER[2 * POSTQORDER];
|
||||
|
||||
/* High pass filter */
|
||||
double HPstates1[HPORDER];
|
||||
double HPstates2[HPORDER];
|
||||
|
||||
float STATE_0_LOWER_float[2 * POSTQORDER];
|
||||
float STATE_0_UPPER_float[2 * POSTQORDER];
|
||||
|
||||
float HPstates1_float[HPORDER];
|
||||
float HPstates2_float[HPORDER];
|
||||
|
||||
} PostFiltBankstr;
|
||||
|
||||
typedef struct {
|
||||
// data buffer for pitch filter
|
||||
double ubuf[PITCH_BUFFSIZE];
|
||||
|
||||
// low pass state vector
|
||||
double ystate[PITCH_DAMPORDER];
|
||||
|
||||
// old lag and gain
|
||||
double oldlagp[1];
|
||||
double oldgainp[1];
|
||||
|
||||
} PitchFiltstr;
|
||||
|
||||
typedef struct {
|
||||
// data buffer
|
||||
double buffer[PITCH_WLPCBUFLEN];
|
||||
|
||||
// state vectors
|
||||
double istate[PITCH_WLPCORDER];
|
||||
double weostate[PITCH_WLPCORDER];
|
||||
double whostate[PITCH_WLPCORDER];
|
||||
|
||||
// LPC window -> should be a global array because constant
|
||||
double window[PITCH_WLPCWINLEN];
|
||||
|
||||
} WeightFiltstr;
|
||||
|
||||
typedef struct {
|
||||
// for inital estimator
|
||||
double dec_buffer[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
|
||||
PITCH_FRAME_LEN / 2 + 2];
|
||||
double decimator_state[2 * ALLPASSSECTIONS + 1];
|
||||
double hp_state[2];
|
||||
|
||||
double whitened_buf[QLOOKAHEAD];
|
||||
|
||||
double inbuf[QLOOKAHEAD];
|
||||
|
||||
PitchFiltstr PFstr_wght;
|
||||
PitchFiltstr PFstr;
|
||||
WeightFiltstr Wghtstr;
|
||||
|
||||
} PitchAnalysisStruct;
|
||||
|
||||
/* Have instance of struct together with other iSAC structs */
|
||||
typedef struct {
|
||||
/* Previous frame length (in ms) */
|
||||
int32_t prev_frame_length;
|
||||
|
||||
/* Previous RTP timestamp from received
|
||||
packet (in samples relative beginning) */
|
||||
int32_t prev_rec_rtp_number;
|
||||
|
||||
/* Send timestamp for previous packet (in ms using timeGetTime()) */
|
||||
uint32_t prev_rec_send_ts;
|
||||
|
||||
/* Arrival time for previous packet (in ms using timeGetTime()) */
|
||||
uint32_t prev_rec_arr_ts;
|
||||
|
||||
/* rate of previous packet, derived from RTP timestamps (in bits/s) */
|
||||
float prev_rec_rtp_rate;
|
||||
|
||||
/* Time sinse the last update of the BN estimate (in ms) */
|
||||
uint32_t last_update_ts;
|
||||
|
||||
/* Time sinse the last reduction (in ms) */
|
||||
uint32_t last_reduction_ts;
|
||||
|
||||
/* How many times the estimate was update in the beginning */
|
||||
int32_t count_tot_updates_rec;
|
||||
|
||||
/* The estimated bottle neck rate from there to here (in bits/s) */
|
||||
int32_t rec_bw;
|
||||
float rec_bw_inv;
|
||||
float rec_bw_avg;
|
||||
float rec_bw_avg_Q;
|
||||
|
||||
/* The estimated mean absolute jitter value,
|
||||
as seen on this side (in ms) */
|
||||
float rec_jitter;
|
||||
float rec_jitter_short_term;
|
||||
float rec_jitter_short_term_abs;
|
||||
float rec_max_delay;
|
||||
float rec_max_delay_avg_Q;
|
||||
|
||||
/* (assumed) bitrate for headers (bps) */
|
||||
float rec_header_rate;
|
||||
|
||||
/* The estimated bottle neck rate from here to there (in bits/s) */
|
||||
float send_bw_avg;
|
||||
|
||||
/* The estimated mean absolute jitter value, as seen on
|
||||
the other siee (in ms) */
|
||||
float send_max_delay_avg;
|
||||
|
||||
// number of packets received since last update
|
||||
int num_pkts_rec;
|
||||
|
||||
int num_consec_rec_pkts_over_30k;
|
||||
|
||||
// flag for marking that a high speed network has been
|
||||
// detected downstream
|
||||
int hsn_detect_rec;
|
||||
|
||||
int num_consec_snt_pkts_over_30k;
|
||||
|
||||
// flag for marking that a high speed network has
|
||||
// been detected upstream
|
||||
int hsn_detect_snd;
|
||||
|
||||
uint32_t start_wait_period;
|
||||
|
||||
int in_wait_period;
|
||||
|
||||
int change_to_WB;
|
||||
|
||||
uint32_t senderTimestamp;
|
||||
uint32_t receiverTimestamp;
|
||||
// enum IsacSamplingRate incomingStreamSampFreq;
|
||||
uint16_t numConsecLatePkts;
|
||||
float consecLatency;
|
||||
int16_t inWaitLatePkts;
|
||||
|
||||
IsacBandwidthInfo external_bw_info;
|
||||
} BwEstimatorstr;
|
||||
|
||||
typedef struct {
|
||||
/* boolean, flags if previous packet exceeded B.N. */
|
||||
int PrevExceed;
|
||||
/* ms */
|
||||
int ExceedAgo;
|
||||
/* packets left to send in current burst */
|
||||
int BurstCounter;
|
||||
/* packets */
|
||||
int InitCounter;
|
||||
/* ms remaining in buffer when next packet will be sent */
|
||||
double StillBuffered;
|
||||
|
||||
} RateModel;
|
||||
|
||||
/* The following strutc is used to store data from encoding, to make it
|
||||
fast and easy to construct a new bitstream with a different Bandwidth
|
||||
estimate. All values (except framelength and minBytes) is double size to
|
||||
handle 60 ms of data.
|
||||
*/
|
||||
typedef struct {
|
||||
/* Used to keep track of if it is first or second part of 60 msec packet */
|
||||
int startIdx;
|
||||
|
||||
/* Frame length in samples */
|
||||
int16_t framelength;
|
||||
|
||||
/* Pitch Gain */
|
||||
int pitchGain_index[2];
|
||||
|
||||
/* Pitch Lag */
|
||||
double meanGain[2];
|
||||
int pitchIndex[PITCH_SUBFRAMES * 2];
|
||||
|
||||
/* LPC */
|
||||
int LPCindex_s[108 * 2]; /* KLT_ORDER_SHAPE = 108 */
|
||||
int LPCindex_g[12 * 2]; /* KLT_ORDER_GAIN = 12 */
|
||||
double LPCcoeffs_lo[(ORDERLO + 1) * SUBFRAMES * 2];
|
||||
double LPCcoeffs_hi[(ORDERHI + 1) * SUBFRAMES * 2];
|
||||
|
||||
/* Encode Spec */
|
||||
int16_t fre[FRAMESAMPLES];
|
||||
int16_t fim[FRAMESAMPLES];
|
||||
int16_t AvgPitchGain[2];
|
||||
|
||||
/* Used in adaptive mode only */
|
||||
int minBytes;
|
||||
|
||||
} IsacSaveEncoderData;
|
||||
|
||||
typedef struct {
|
||||
int indexLPCShape[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME];
|
||||
double lpcGain[SUBFRAMES << 1];
|
||||
int lpcGainIndex[SUBFRAMES << 1];
|
||||
|
||||
Bitstr bitStreamObj;
|
||||
|
||||
int16_t realFFT[FRAMESAMPLES_HALF];
|
||||
int16_t imagFFT[FRAMESAMPLES_HALF];
|
||||
} ISACUBSaveEncDataStruct;
|
||||
|
||||
typedef struct {
|
||||
Bitstr bitstr_obj;
|
||||
MaskFiltstr maskfiltstr_obj;
|
||||
PreFiltBankstr prefiltbankstr_obj;
|
||||
PitchFiltstr pitchfiltstr_obj;
|
||||
PitchAnalysisStruct pitchanalysisstr_obj;
|
||||
FFTstr fftstr_obj;
|
||||
IsacSaveEncoderData SaveEnc_obj;
|
||||
|
||||
int buffer_index;
|
||||
int16_t current_framesamples;
|
||||
|
||||
float data_buffer_float[FRAMESAMPLES_30ms];
|
||||
|
||||
int frame_nb;
|
||||
double bottleneck;
|
||||
int16_t new_framelength;
|
||||
double s2nr;
|
||||
|
||||
/* Maximum allowed number of bits for a 30 msec packet */
|
||||
int16_t payloadLimitBytes30;
|
||||
/* Maximum allowed number of bits for a 30 msec packet */
|
||||
int16_t payloadLimitBytes60;
|
||||
/* Maximum allowed number of bits for both 30 and 60 msec packet */
|
||||
int16_t maxPayloadBytes;
|
||||
/* Maximum allowed rate in bytes per 30 msec packet */
|
||||
int16_t maxRateInBytes;
|
||||
|
||||
/*---
|
||||
If set to 1 iSAC will not adapt the frame-size, if used in
|
||||
channel-adaptive mode. The initial value will be used for all rates.
|
||||
---*/
|
||||
int16_t enforceFrameSize;
|
||||
|
||||
/*-----
|
||||
This records the BWE index the encoder injected into the bit-stream.
|
||||
It will be used in RCU. The same BWE index of main payload will be in
|
||||
the redundant payload. We can not retrieve it from BWE because it is
|
||||
a recursive procedure (WebRtcIsac_GetDownlinkBwJitIndexImpl) and has to be
|
||||
called only once per each encode.
|
||||
-----*/
|
||||
int16_t lastBWIdx;
|
||||
} ISACLBEncStruct;
|
||||
|
||||
typedef struct {
|
||||
Bitstr bitstr_obj;
|
||||
MaskFiltstr maskfiltstr_obj;
|
||||
PreFiltBankstr prefiltbankstr_obj;
|
||||
FFTstr fftstr_obj;
|
||||
ISACUBSaveEncDataStruct SaveEnc_obj;
|
||||
|
||||
int buffer_index;
|
||||
float data_buffer_float[MAX_FRAMESAMPLES + LB_TOTAL_DELAY_SAMPLES];
|
||||
double bottleneck;
|
||||
/* Maximum allowed number of bits for a 30 msec packet */
|
||||
// int16_t payloadLimitBytes30;
|
||||
/* Maximum allowed number of bits for both 30 and 60 msec packet */
|
||||
// int16_t maxPayloadBytes;
|
||||
int16_t maxPayloadSizeBytes;
|
||||
|
||||
double lastLPCVec[UB_LPC_ORDER];
|
||||
int16_t numBytesUsed;
|
||||
int16_t lastJitterInfo;
|
||||
} ISACUBEncStruct;
|
||||
|
||||
typedef struct {
|
||||
Bitstr bitstr_obj;
|
||||
MaskFiltstr maskfiltstr_obj;
|
||||
PostFiltBankstr postfiltbankstr_obj;
|
||||
PitchFiltstr pitchfiltstr_obj;
|
||||
FFTstr fftstr_obj;
|
||||
|
||||
} ISACLBDecStruct;
|
||||
|
||||
typedef struct {
|
||||
Bitstr bitstr_obj;
|
||||
MaskFiltstr maskfiltstr_obj;
|
||||
PostFiltBankstr postfiltbankstr_obj;
|
||||
FFTstr fftstr_obj;
|
||||
|
||||
} ISACUBDecStruct;
|
||||
|
||||
typedef struct {
|
||||
ISACLBEncStruct ISACencLB_obj;
|
||||
ISACLBDecStruct ISACdecLB_obj;
|
||||
} ISACLBStruct;
|
||||
|
||||
typedef struct {
|
||||
ISACUBEncStruct ISACencUB_obj;
|
||||
ISACUBDecStruct ISACdecUB_obj;
|
||||
} ISACUBStruct;
|
||||
|
||||
/*
|
||||
This struct is used to take a snapshot of the entropy coder and LPC gains
|
||||
right before encoding LPC gains. This allows us to go back to that state
|
||||
if we like to limit the payload size.
|
||||
*/
|
||||
typedef struct {
|
||||
/* 6 lower-band & 6 upper-band */
|
||||
double loFiltGain[SUBFRAMES];
|
||||
double hiFiltGain[SUBFRAMES];
|
||||
/* Upper boundary of interval W */
|
||||
uint32_t W_upper;
|
||||
uint32_t streamval;
|
||||
/* Index to the current position in bytestream */
|
||||
uint32_t stream_index;
|
||||
uint8_t stream[3];
|
||||
} transcode_obj;
|
||||
|
||||
typedef struct {
|
||||
// TODO(kwiberg): The size of these tables could be reduced by storing floats
|
||||
// instead of doubles, and by making use of the identity cos(x) =
|
||||
// sin(x+pi/2). They could also be made global constants that we fill in at
|
||||
// compile time.
|
||||
double costab1[FRAMESAMPLES_HALF];
|
||||
double sintab1[FRAMESAMPLES_HALF];
|
||||
double costab2[FRAMESAMPLES_QUARTER];
|
||||
double sintab2[FRAMESAMPLES_QUARTER];
|
||||
} TransformTables;
|
||||
|
||||
typedef struct {
|
||||
// lower-band codec instance
|
||||
ISACLBStruct instLB;
|
||||
// upper-band codec instance
|
||||
ISACUBStruct instUB;
|
||||
|
||||
// Bandwidth Estimator and model for the rate.
|
||||
BwEstimatorstr bwestimator_obj;
|
||||
RateModel rate_data_obj;
|
||||
double MaxDelay;
|
||||
|
||||
/* 0 = adaptive; 1 = instantaneous */
|
||||
int16_t codingMode;
|
||||
|
||||
// overall bottleneck of the codec
|
||||
int32_t bottleneck;
|
||||
|
||||
// QMF Filter state
|
||||
int32_t analysisFBState1[FB_STATE_SIZE_WORD32];
|
||||
int32_t analysisFBState2[FB_STATE_SIZE_WORD32];
|
||||
int32_t synthesisFBState1[FB_STATE_SIZE_WORD32];
|
||||
int32_t synthesisFBState2[FB_STATE_SIZE_WORD32];
|
||||
|
||||
// Error Code
|
||||
int16_t errorCode;
|
||||
|
||||
// bandwidth of the encoded audio 8, 12 or 16 kHz
|
||||
enum ISACBandwidth bandwidthKHz;
|
||||
// Sampling rate of audio, encoder and decode, 8 or 16 kHz
|
||||
enum IsacSamplingRate encoderSamplingRateKHz;
|
||||
enum IsacSamplingRate decoderSamplingRateKHz;
|
||||
// Flag to keep track of initializations, lower & upper-band
|
||||
// encoder and decoder.
|
||||
int16_t initFlag;
|
||||
|
||||
// Flag to to indicate signal bandwidth switch
|
||||
int16_t resetFlag_8kHz;
|
||||
|
||||
// Maximum allowed rate, measured in Bytes per 30 ms.
|
||||
int16_t maxRateBytesPer30Ms;
|
||||
// Maximum allowed payload-size, measured in Bytes.
|
||||
int16_t maxPayloadSizeBytes;
|
||||
/* The expected sampling rate of the input signal. Valid values are 16000
|
||||
* and 32000. This is not the operation sampling rate of the codec. */
|
||||
uint16_t in_sample_rate_hz;
|
||||
|
||||
// Trig tables for WebRtcIsac_Time2Spec and WebRtcIsac_Spec2time.
|
||||
TransformTables transform_tables;
|
||||
} ISACMainStruct;
|
||||
|
||||
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ */
|
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "absl/base/nullability.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "api/task_queue/task_queue_base.h"
|
||||
#include "modules/audio_processing/include/aec_dump.h"
|
||||
#include "rtc_base/system/file_wrapper.h"
|
||||
#include "rtc_base/system/rtc_export.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class RTC_EXPORT AecDumpFactory {
|
||||
public:
|
||||
// The `worker_queue` must outlive the created AecDump instance.
|
||||
// `max_log_size_bytes == -1` means the log size will be unlimited.
|
||||
// The AecDump takes responsibility for `handle` and closes it in the
|
||||
// destructor. A non-null return value indicates that the file has been
|
||||
// sucessfully opened.
|
||||
static absl::Nullable<std::unique_ptr<AecDump>> Create(
|
||||
FileWrapper file,
|
||||
int64_t max_log_size_bytes,
|
||||
absl::Nonnull<TaskQueueBase*> worker_queue);
|
||||
static absl::Nullable<std::unique_ptr<AecDump>> Create(
|
||||
absl::string_view file_name,
|
||||
int64_t max_log_size_bytes,
|
||||
absl::Nonnull<TaskQueueBase*> worker_queue);
|
||||
static absl::Nullable<std::unique_ptr<AecDump>> Create(
|
||||
absl::Nonnull<FILE*> handle,
|
||||
int64_t max_log_size_bytes,
|
||||
absl::Nonnull<TaskQueueBase*> worker_queue);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_
|
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AudioSamplesScaler::AudioSamplesScaler(float initial_gain)
|
||||
: previous_gain_(initial_gain), target_gain_(initial_gain) {}
|
||||
|
||||
void AudioSamplesScaler::Process(AudioBuffer& audio_buffer) {
|
||||
if (static_cast<int>(audio_buffer.num_frames()) != samples_per_channel_) {
|
||||
// Update the members depending on audio-buffer length if needed.
|
||||
RTC_DCHECK_GT(audio_buffer.num_frames(), 0);
|
||||
samples_per_channel_ = static_cast<int>(audio_buffer.num_frames());
|
||||
one_by_samples_per_channel_ = 1.f / samples_per_channel_;
|
||||
}
|
||||
|
||||
if (target_gain_ == 1.f && previous_gain_ == target_gain_) {
|
||||
// If only a gain of 1 is to be applied, do an early return without applying
|
||||
// any gain.
|
||||
return;
|
||||
}
|
||||
|
||||
float gain = previous_gain_;
|
||||
if (previous_gain_ == target_gain_) {
|
||||
// Apply a non-changing gain.
|
||||
for (size_t channel = 0; channel < audio_buffer.num_channels(); ++channel) {
|
||||
rtc::ArrayView<float> channel_view(audio_buffer.channels()[channel],
|
||||
samples_per_channel_);
|
||||
for (float& sample : channel_view) {
|
||||
sample *= gain;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const float increment =
|
||||
(target_gain_ - previous_gain_) * one_by_samples_per_channel_;
|
||||
|
||||
if (increment > 0.f) {
|
||||
// Apply an increasing gain.
|
||||
for (size_t channel = 0; channel < audio_buffer.num_channels();
|
||||
++channel) {
|
||||
gain = previous_gain_;
|
||||
rtc::ArrayView<float> channel_view(audio_buffer.channels()[channel],
|
||||
samples_per_channel_);
|
||||
for (float& sample : channel_view) {
|
||||
gain = std::min(gain + increment, target_gain_);
|
||||
sample *= gain;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Apply a decreasing gain.
|
||||
for (size_t channel = 0; channel < audio_buffer.num_channels();
|
||||
++channel) {
|
||||
gain = previous_gain_;
|
||||
rtc::ArrayView<float> channel_view(audio_buffer.channels()[channel],
|
||||
samples_per_channel_);
|
||||
for (float& sample : channel_view) {
|
||||
gain = std::max(gain + increment, target_gain_);
|
||||
sample *= gain;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
previous_gain_ = target_gain_;
|
||||
|
||||
// Saturate the samples to be in the S16 range.
|
||||
for (size_t channel = 0; channel < audio_buffer.num_channels(); ++channel) {
|
||||
rtc::ArrayView<float> channel_view(audio_buffer.channels()[channel],
|
||||
samples_per_channel_);
|
||||
for (float& sample : channel_view) {
|
||||
constexpr float kMinFloatS16Value = -32768.f;
|
||||
constexpr float kMaxFloatS16Value = 32767.f;
|
||||
sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Handles and applies a gain to the samples in an audio buffer.
|
||||
// The gain is applied for each sample and any changes in the gain take effect
|
||||
// gradually (in a linear manner) over one frame.
|
||||
class AudioSamplesScaler {
|
||||
public:
|
||||
// C-tor. The supplied `initial_gain` is used immediately at the first call to
|
||||
// Process(), i.e., in contrast to the gain supplied by SetGain(...) there is
|
||||
// no gradual change to the `initial_gain`.
|
||||
explicit AudioSamplesScaler(float initial_gain);
|
||||
AudioSamplesScaler(const AudioSamplesScaler&) = delete;
|
||||
AudioSamplesScaler& operator=(const AudioSamplesScaler&) = delete;
|
||||
|
||||
// Applies the specified gain to the audio in `audio_buffer`.
|
||||
void Process(AudioBuffer& audio_buffer);
|
||||
|
||||
// Sets the gain to apply to each sample.
|
||||
void SetGain(float gain) { target_gain_ = gain; }
|
||||
|
||||
private:
|
||||
float previous_gain_ = 1.f;
|
||||
float target_gain_ = 1.f;
|
||||
int samples_per_channel_ = -1;
|
||||
float one_by_samples_per_channel_ = -1.f;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_
|
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h"
|
||||
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kMinAnalogMicGainLevel = 0;
|
||||
constexpr int kMaxAnalogMicGainLevel = 255;
|
||||
|
||||
float ComputeLevelBasedGain(int emulated_analog_mic_gain_level) {
|
||||
static_assert(
|
||||
kMinAnalogMicGainLevel == 0,
|
||||
"The minimum gain level must be 0 for the maths below to work.");
|
||||
static_assert(kMaxAnalogMicGainLevel > 0,
|
||||
"The minimum gain level must be larger than 0 for the maths "
|
||||
"below to work.");
|
||||
constexpr float kGainToLevelMultiplier = 1.f / kMaxAnalogMicGainLevel;
|
||||
|
||||
RTC_DCHECK_GE(emulated_analog_mic_gain_level, kMinAnalogMicGainLevel);
|
||||
RTC_DCHECK_LE(emulated_analog_mic_gain_level, kMaxAnalogMicGainLevel);
|
||||
return kGainToLevelMultiplier * emulated_analog_mic_gain_level;
|
||||
}
|
||||
|
||||
float ComputePreGain(float pre_gain,
|
||||
int emulated_analog_mic_gain_level,
|
||||
bool emulated_analog_mic_gain_enabled) {
|
||||
return emulated_analog_mic_gain_enabled
|
||||
? pre_gain * ComputeLevelBasedGain(emulated_analog_mic_gain_level)
|
||||
: pre_gain;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
CaptureLevelsAdjuster::CaptureLevelsAdjuster(
|
||||
bool emulated_analog_mic_gain_enabled,
|
||||
int emulated_analog_mic_gain_level,
|
||||
float pre_gain,
|
||||
float post_gain)
|
||||
: emulated_analog_mic_gain_enabled_(emulated_analog_mic_gain_enabled),
|
||||
emulated_analog_mic_gain_level_(emulated_analog_mic_gain_level),
|
||||
pre_gain_(pre_gain),
|
||||
pre_adjustment_gain_(ComputePreGain(pre_gain_,
|
||||
emulated_analog_mic_gain_level_,
|
||||
emulated_analog_mic_gain_enabled_)),
|
||||
pre_scaler_(pre_adjustment_gain_),
|
||||
post_scaler_(post_gain) {}
|
||||
|
||||
void CaptureLevelsAdjuster::ApplyPreLevelAdjustment(AudioBuffer& audio_buffer) {
|
||||
pre_scaler_.Process(audio_buffer);
|
||||
}
|
||||
|
||||
void CaptureLevelsAdjuster::ApplyPostLevelAdjustment(
|
||||
AudioBuffer& audio_buffer) {
|
||||
post_scaler_.Process(audio_buffer);
|
||||
}
|
||||
|
||||
void CaptureLevelsAdjuster::SetPreGain(float pre_gain) {
|
||||
pre_gain_ = pre_gain;
|
||||
UpdatePreAdjustmentGain();
|
||||
}
|
||||
|
||||
void CaptureLevelsAdjuster::SetPostGain(float post_gain) {
|
||||
post_scaler_.SetGain(post_gain);
|
||||
}
|
||||
|
||||
void CaptureLevelsAdjuster::SetAnalogMicGainLevel(int level) {
|
||||
RTC_DCHECK_GE(level, kMinAnalogMicGainLevel);
|
||||
RTC_DCHECK_LE(level, kMaxAnalogMicGainLevel);
|
||||
int clamped_level =
|
||||
rtc::SafeClamp(level, kMinAnalogMicGainLevel, kMaxAnalogMicGainLevel);
|
||||
|
||||
emulated_analog_mic_gain_level_ = clamped_level;
|
||||
UpdatePreAdjustmentGain();
|
||||
}
|
||||
|
||||
void CaptureLevelsAdjuster::UpdatePreAdjustmentGain() {
|
||||
pre_adjustment_gain_ =
|
||||
ComputePreGain(pre_gain_, emulated_analog_mic_gain_level_,
|
||||
emulated_analog_mic_gain_enabled_);
|
||||
pre_scaler_.SetGain(pre_adjustment_gain_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Adjusts the level of the capture signal before and after all capture-side
|
||||
// processing is done using a combination of explicitly specified gains
|
||||
// and an emulated analog gain functionality where a specified analog level
|
||||
// results in an additional gain. The pre-adjustment is achieved by combining
|
||||
// the gain value `pre_gain` and the level `emulated_analog_mic_gain_level` to
|
||||
// form a combined gain of `pre_gain`*`emulated_analog_mic_gain_level`/255 which
|
||||
// is multiplied to each sample. The intention of the
|
||||
// `emulated_analog_mic_gain_level` is to be controlled by the analog AGC
|
||||
// functionality and to produce an emulated analog mic gain equal to
|
||||
// `emulated_analog_mic_gain_level`/255. The post level adjustment is achieved
|
||||
// by multiplying each sample with the value of `post_gain`. Any changes in the
|
||||
// gains take are done smoothly over one frame and the scaled samples are
|
||||
// clamped to fit into the allowed S16 sample range.
|
||||
class CaptureLevelsAdjuster {
|
||||
public:
|
||||
// C-tor. The values for the level and the gains must fulfill
|
||||
// 0 <= emulated_analog_mic_gain_level <= 255.
|
||||
// 0.f <= pre_gain.
|
||||
// 0.f <= post_gain.
|
||||
CaptureLevelsAdjuster(bool emulated_analog_mic_gain_enabled,
|
||||
int emulated_analog_mic_gain_level,
|
||||
float pre_gain,
|
||||
float post_gain);
|
||||
CaptureLevelsAdjuster(const CaptureLevelsAdjuster&) = delete;
|
||||
CaptureLevelsAdjuster& operator=(const CaptureLevelsAdjuster&) = delete;
|
||||
|
||||
// Adjusts the level of the signal. This should be called before any of the
|
||||
// other processing is performed.
|
||||
void ApplyPreLevelAdjustment(AudioBuffer& audio_buffer);
|
||||
|
||||
// Adjusts the level of the signal. This should be called after all of the
|
||||
// other processing have been performed.
|
||||
void ApplyPostLevelAdjustment(AudioBuffer& audio_buffer);
|
||||
|
||||
// Sets the gain to apply to each sample before any of the other processing is
|
||||
// performed.
|
||||
void SetPreGain(float pre_gain);
|
||||
|
||||
// Returns the total pre-adjustment gain applied, comprising both the pre_gain
|
||||
// as well as the gain from the emulated analog mic, to each sample before any
|
||||
// of the other processing is performed.
|
||||
float GetPreAdjustmentGain() const { return pre_adjustment_gain_; }
|
||||
|
||||
// Sets the gain to apply to each sample after all of the other processing
|
||||
// have been performed.
|
||||
void SetPostGain(float post_gain);
|
||||
|
||||
// Sets the analog gain level to use for the emulated analog gain.
|
||||
// `level` must be in the range [0...255].
|
||||
void SetAnalogMicGainLevel(int level);
|
||||
|
||||
// Returns the current analog gain level used for the emulated analog gain.
|
||||
int GetAnalogMicGainLevel() const { return emulated_analog_mic_gain_level_; }
|
||||
|
||||
private:
|
||||
// Updates the value of `pre_adjustment_gain_` based on the supplied values
|
||||
// for `pre_gain` and `emulated_analog_mic_gain_level_`.
|
||||
void UpdatePreAdjustmentGain();
|
||||
|
||||
const bool emulated_analog_mic_gain_enabled_;
|
||||
int emulated_analog_mic_gain_level_;
|
||||
float pre_gain_;
|
||||
float pre_adjustment_gain_;
|
||||
AudioSamplesScaler pre_scaler_;
|
||||
AudioSamplesScaler post_scaler_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_
|
@ -0,0 +1,287 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/echo_control_mobile_impl.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "api/audio/audio_processing.h"
|
||||
#include "modules/audio_processing/aecm/echo_control_mobile.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
int16_t MapSetting(EchoControlMobileImpl::RoutingMode mode) {
|
||||
switch (mode) {
|
||||
case EchoControlMobileImpl::kQuietEarpieceOrHeadset:
|
||||
return 0;
|
||||
case EchoControlMobileImpl::kEarpiece:
|
||||
return 1;
|
||||
case EchoControlMobileImpl::kLoudEarpiece:
|
||||
return 2;
|
||||
case EchoControlMobileImpl::kSpeakerphone:
|
||||
return 3;
|
||||
case EchoControlMobileImpl::kLoudSpeakerphone:
|
||||
return 4;
|
||||
}
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
return -1;
|
||||
}
|
||||
|
||||
AudioProcessing::Error MapError(int err) {
|
||||
switch (err) {
|
||||
case AECM_UNSUPPORTED_FUNCTION_ERROR:
|
||||
return AudioProcessing::kUnsupportedFunctionError;
|
||||
case AECM_NULL_POINTER_ERROR:
|
||||
return AudioProcessing::kNullPointerError;
|
||||
case AECM_BAD_PARAMETER_ERROR:
|
||||
return AudioProcessing::kBadParameterError;
|
||||
case AECM_BAD_PARAMETER_WARNING:
|
||||
return AudioProcessing::kBadStreamParameterWarning;
|
||||
default:
|
||||
// AECM_UNSPECIFIED_ERROR
|
||||
// AECM_UNINITIALIZED_ERROR
|
||||
return AudioProcessing::kUnspecifiedError;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
struct EchoControlMobileImpl::StreamProperties {
|
||||
StreamProperties() = delete;
|
||||
StreamProperties(int sample_rate_hz,
|
||||
size_t num_reverse_channels,
|
||||
size_t num_output_channels)
|
||||
: sample_rate_hz(sample_rate_hz),
|
||||
num_reverse_channels(num_reverse_channels),
|
||||
num_output_channels(num_output_channels) {}
|
||||
|
||||
int sample_rate_hz;
|
||||
size_t num_reverse_channels;
|
||||
size_t num_output_channels;
|
||||
};
|
||||
|
||||
class EchoControlMobileImpl::Canceller {
|
||||
public:
|
||||
Canceller() {
|
||||
state_ = WebRtcAecm_Create();
|
||||
RTC_CHECK(state_);
|
||||
}
|
||||
|
||||
~Canceller() {
|
||||
RTC_DCHECK(state_);
|
||||
WebRtcAecm_Free(state_);
|
||||
}
|
||||
|
||||
Canceller(const Canceller&) = delete;
|
||||
Canceller& operator=(const Canceller&) = delete;
|
||||
|
||||
void* state() {
|
||||
RTC_DCHECK(state_);
|
||||
return state_;
|
||||
}
|
||||
|
||||
void Initialize(int sample_rate_hz) {
|
||||
RTC_DCHECK(state_);
|
||||
int error = WebRtcAecm_Init(state_, sample_rate_hz);
|
||||
RTC_DCHECK_EQ(AudioProcessing::kNoError, error);
|
||||
}
|
||||
|
||||
private:
|
||||
void* state_;
|
||||
};
|
||||
|
||||
EchoControlMobileImpl::EchoControlMobileImpl()
|
||||
: routing_mode_(kSpeakerphone), comfort_noise_enabled_(false) {}
|
||||
|
||||
EchoControlMobileImpl::~EchoControlMobileImpl() {}
|
||||
|
||||
void EchoControlMobileImpl::ProcessRenderAudio(
|
||||
rtc::ArrayView<const int16_t> packed_render_audio) {
|
||||
RTC_DCHECK(stream_properties_);
|
||||
|
||||
size_t buffer_index = 0;
|
||||
size_t num_frames_per_band =
|
||||
packed_render_audio.size() / (stream_properties_->num_output_channels *
|
||||
stream_properties_->num_reverse_channels);
|
||||
|
||||
for (auto& canceller : cancellers_) {
|
||||
WebRtcAecm_BufferFarend(canceller->state(),
|
||||
&packed_render_audio[buffer_index],
|
||||
num_frames_per_band);
|
||||
|
||||
buffer_index += num_frames_per_band;
|
||||
}
|
||||
}
|
||||
|
||||
void EchoControlMobileImpl::PackRenderAudioBuffer(
|
||||
const AudioBuffer* audio,
|
||||
size_t num_output_channels,
|
||||
size_t num_channels,
|
||||
std::vector<int16_t>* packed_buffer) {
|
||||
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
||||
audio->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(num_channels, audio->num_channels());
|
||||
|
||||
// The ordering convention must be followed to pass to the correct AECM.
|
||||
packed_buffer->clear();
|
||||
int render_channel = 0;
|
||||
for (size_t i = 0; i < num_output_channels; i++) {
|
||||
for (size_t j = 0; j < audio->num_channels(); j++) {
|
||||
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> data_to_buffer;
|
||||
FloatS16ToS16(audio->split_bands_const(render_channel)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(), data_to_buffer.data());
|
||||
|
||||
// Buffer the samples in the render queue.
|
||||
packed_buffer->insert(
|
||||
packed_buffer->end(), data_to_buffer.data(),
|
||||
data_to_buffer.data() + audio->num_frames_per_band());
|
||||
render_channel = (render_channel + 1) % audio->num_channels();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t EchoControlMobileImpl::NumCancellersRequired(
|
||||
size_t num_output_channels,
|
||||
size_t num_reverse_channels) {
|
||||
return num_output_channels * num_reverse_channels;
|
||||
}
|
||||
|
||||
int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
|
||||
int stream_delay_ms) {
|
||||
RTC_DCHECK(stream_properties_);
|
||||
RTC_DCHECK_GE(160, audio->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(audio->num_channels(), stream_properties_->num_output_channels);
|
||||
RTC_DCHECK_GE(cancellers_.size(), stream_properties_->num_reverse_channels *
|
||||
audio->num_channels());
|
||||
|
||||
int err = AudioProcessing::kNoError;
|
||||
|
||||
// The ordering convention must be followed to pass to the correct AECM.
|
||||
size_t handle_index = 0;
|
||||
for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
|
||||
// TODO(ajm): improve how this works, possibly inside AECM.
|
||||
// This is kind of hacked up.
|
||||
RTC_DCHECK_LT(capture, low_pass_reference_.size());
|
||||
const int16_t* noisy =
|
||||
reference_copied_ ? low_pass_reference_[capture].data() : nullptr;
|
||||
|
||||
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
||||
audio->num_frames_per_band());
|
||||
|
||||
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> split_bands_data;
|
||||
int16_t* split_bands = split_bands_data.data();
|
||||
const int16_t* clean = split_bands_data.data();
|
||||
if (audio->split_bands(capture)[kBand0To8kHz]) {
|
||||
FloatS16ToS16(audio->split_bands(capture)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(), split_bands_data.data());
|
||||
} else {
|
||||
clean = nullptr;
|
||||
split_bands = nullptr;
|
||||
}
|
||||
|
||||
if (noisy == NULL) {
|
||||
noisy = clean;
|
||||
clean = NULL;
|
||||
}
|
||||
for (size_t render = 0; render < stream_properties_->num_reverse_channels;
|
||||
++render) {
|
||||
err = WebRtcAecm_Process(cancellers_[handle_index]->state(), noisy, clean,
|
||||
split_bands, audio->num_frames_per_band(),
|
||||
stream_delay_ms);
|
||||
|
||||
if (split_bands) {
|
||||
S16ToFloatS16(split_bands, audio->num_frames_per_band(),
|
||||
audio->split_bands(capture)[kBand0To8kHz]);
|
||||
}
|
||||
|
||||
if (err != AudioProcessing::kNoError) {
|
||||
return MapError(err);
|
||||
}
|
||||
|
||||
++handle_index;
|
||||
}
|
||||
for (size_t band = 1u; band < audio->num_bands(); ++band) {
|
||||
memset(audio->split_bands_f(capture)[band], 0,
|
||||
audio->num_frames_per_band() *
|
||||
sizeof(audio->split_bands_f(capture)[band][0]));
|
||||
}
|
||||
}
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
|
||||
int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) {
|
||||
if (MapSetting(mode) == -1) {
|
||||
return AudioProcessing::kBadParameterError;
|
||||
}
|
||||
routing_mode_ = mode;
|
||||
return Configure();
|
||||
}
|
||||
|
||||
EchoControlMobileImpl::RoutingMode EchoControlMobileImpl::routing_mode() const {
|
||||
return routing_mode_;
|
||||
}
|
||||
|
||||
int EchoControlMobileImpl::enable_comfort_noise(bool enable) {
|
||||
comfort_noise_enabled_ = enable;
|
||||
return Configure();
|
||||
}
|
||||
|
||||
bool EchoControlMobileImpl::is_comfort_noise_enabled() const {
|
||||
return comfort_noise_enabled_;
|
||||
}
|
||||
|
||||
void EchoControlMobileImpl::Initialize(int sample_rate_hz,
|
||||
size_t num_reverse_channels,
|
||||
size_t num_output_channels) {
|
||||
low_pass_reference_.resize(num_output_channels);
|
||||
for (auto& reference : low_pass_reference_) {
|
||||
reference.fill(0);
|
||||
}
|
||||
|
||||
stream_properties_.reset(new StreamProperties(
|
||||
sample_rate_hz, num_reverse_channels, num_output_channels));
|
||||
|
||||
// AECM only supports 16 kHz or lower sample rates.
|
||||
RTC_DCHECK_LE(stream_properties_->sample_rate_hz,
|
||||
AudioProcessing::kSampleRate16kHz);
|
||||
|
||||
cancellers_.resize(
|
||||
NumCancellersRequired(stream_properties_->num_output_channels,
|
||||
stream_properties_->num_reverse_channels));
|
||||
|
||||
for (auto& canceller : cancellers_) {
|
||||
if (!canceller) {
|
||||
canceller.reset(new Canceller());
|
||||
}
|
||||
canceller->Initialize(sample_rate_hz);
|
||||
}
|
||||
Configure();
|
||||
}
|
||||
|
||||
int EchoControlMobileImpl::Configure() {
|
||||
AecmConfig config;
|
||||
config.cngMode = comfort_noise_enabled_;
|
||||
config.echoMode = MapSetting(routing_mode_);
|
||||
int error = AudioProcessing::kNoError;
|
||||
for (auto& canceller : cancellers_) {
|
||||
int handle_error = WebRtcAecm_set_config(canceller->state(), config);
|
||||
if (handle_error != AudioProcessing::kNoError) {
|
||||
error = handle_error;
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioBuffer;
|
||||
|
||||
// The acoustic echo control for mobile (AECM) component is a low complexity
|
||||
// robust option intended for use on mobile devices.
|
||||
class EchoControlMobileImpl {
|
||||
public:
|
||||
EchoControlMobileImpl();
|
||||
|
||||
~EchoControlMobileImpl();
|
||||
|
||||
// Recommended settings for particular audio routes. In general, the louder
|
||||
// the echo is expected to be, the higher this value should be set. The
|
||||
// preferred setting may vary from device to device.
|
||||
enum RoutingMode {
|
||||
kQuietEarpieceOrHeadset,
|
||||
kEarpiece,
|
||||
kLoudEarpiece,
|
||||
kSpeakerphone,
|
||||
kLoudSpeakerphone
|
||||
};
|
||||
|
||||
// Sets echo control appropriate for the audio routing `mode` on the device.
|
||||
// It can and should be updated during a call if the audio routing changes.
|
||||
int set_routing_mode(RoutingMode mode);
|
||||
RoutingMode routing_mode() const;
|
||||
|
||||
// Comfort noise replaces suppressed background noise to maintain a
|
||||
// consistent signal level.
|
||||
int enable_comfort_noise(bool enable);
|
||||
bool is_comfort_noise_enabled() const;
|
||||
|
||||
void ProcessRenderAudio(rtc::ArrayView<const int16_t> packed_render_audio);
|
||||
int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
|
||||
|
||||
void Initialize(int sample_rate_hz,
|
||||
size_t num_reverse_channels,
|
||||
size_t num_output_channels);
|
||||
|
||||
static void PackRenderAudioBuffer(const AudioBuffer* audio,
|
||||
size_t num_output_channels,
|
||||
size_t num_channels,
|
||||
std::vector<int16_t>* packed_buffer);
|
||||
|
||||
static size_t NumCancellersRequired(size_t num_output_channels,
|
||||
size_t num_reverse_channels);
|
||||
|
||||
private:
|
||||
class Canceller;
|
||||
struct StreamProperties;
|
||||
|
||||
int Configure();
|
||||
|
||||
RoutingMode routing_mode_;
|
||||
bool comfort_noise_enabled_;
|
||||
|
||||
std::vector<std::unique_ptr<Canceller>> cancellers_;
|
||||
std::unique_ptr<StreamProperties> stream_properties_;
|
||||
std::vector<std::array<int16_t, 160>> low_pass_reference_;
|
||||
bool reference_copied_ = false;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_
|
41
VocieProcess/modules/audio_processing/include/aec_dump.cc
Normal file
41
VocieProcess/modules/audio_processing/include/aec_dump.cc
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/include/aec_dump.h"
|
||||
|
||||
namespace webrtc {
|
||||
InternalAPMConfig::InternalAPMConfig() = default;
|
||||
InternalAPMConfig::InternalAPMConfig(const InternalAPMConfig&) = default;
|
||||
InternalAPMConfig::InternalAPMConfig(InternalAPMConfig&&) = default;
|
||||
InternalAPMConfig& InternalAPMConfig::operator=(const InternalAPMConfig&) =
|
||||
default;
|
||||
|
||||
bool InternalAPMConfig::operator==(const InternalAPMConfig& other) const {
|
||||
return aec_enabled == other.aec_enabled &&
|
||||
aec_delay_agnostic_enabled == other.aec_delay_agnostic_enabled &&
|
||||
aec_drift_compensation_enabled ==
|
||||
other.aec_drift_compensation_enabled &&
|
||||
aec_extended_filter_enabled == other.aec_extended_filter_enabled &&
|
||||
aec_suppression_level == other.aec_suppression_level &&
|
||||
aecm_enabled == other.aecm_enabled &&
|
||||
aecm_comfort_noise_enabled == other.aecm_comfort_noise_enabled &&
|
||||
aecm_routing_mode == other.aecm_routing_mode &&
|
||||
agc_enabled == other.agc_enabled && agc_mode == other.agc_mode &&
|
||||
agc_limiter_enabled == other.agc_limiter_enabled &&
|
||||
hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled &&
|
||||
ns_level == other.ns_level &&
|
||||
transient_suppression_enabled == other.transient_suppression_enabled &&
|
||||
noise_robust_agc_enabled == other.noise_robust_agc_enabled &&
|
||||
pre_amplifier_enabled == other.pre_amplifier_enabled &&
|
||||
pre_amplifier_fixed_gain_factor ==
|
||||
other.pre_amplifier_fixed_gain_factor &&
|
||||
experiments_description == other.experiments_description;
|
||||
}
|
||||
} // namespace webrtc
|
116
VocieProcess/modules/audio_processing/include/aec_dump.h
Normal file
116
VocieProcess/modules/audio_processing/include/aec_dump.h
Normal file
@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_
|
||||
#define MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/base/attributes.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/audio_processing.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct for passing current config from APM without having to
|
||||
// include protobuf headers.
|
||||
struct InternalAPMConfig {
|
||||
InternalAPMConfig();
|
||||
InternalAPMConfig(const InternalAPMConfig&);
|
||||
InternalAPMConfig(InternalAPMConfig&&);
|
||||
|
||||
InternalAPMConfig& operator=(const InternalAPMConfig&);
|
||||
InternalAPMConfig& operator=(InternalAPMConfig&&) = delete;
|
||||
|
||||
bool operator==(const InternalAPMConfig& other) const;
|
||||
|
||||
bool aec_enabled = false;
|
||||
bool aec_delay_agnostic_enabled = false;
|
||||
bool aec_drift_compensation_enabled = false;
|
||||
bool aec_extended_filter_enabled = false;
|
||||
int aec_suppression_level = 0;
|
||||
bool aecm_enabled = false;
|
||||
bool aecm_comfort_noise_enabled = false;
|
||||
int aecm_routing_mode = 0;
|
||||
bool agc_enabled = false;
|
||||
int agc_mode = 0;
|
||||
bool agc_limiter_enabled = false;
|
||||
bool hpf_enabled = false;
|
||||
bool ns_enabled = false;
|
||||
int ns_level = 0;
|
||||
bool transient_suppression_enabled = false;
|
||||
bool noise_robust_agc_enabled = false;
|
||||
bool pre_amplifier_enabled = false;
|
||||
float pre_amplifier_fixed_gain_factor = 1.f;
|
||||
std::string experiments_description = "";
|
||||
};
|
||||
|
||||
// An interface for recording configuration and input/output streams
|
||||
// of the Audio Processing Module. The recordings are called
|
||||
// 'aec-dumps' and are stored in a protobuf format defined in
|
||||
// debug.proto.
|
||||
// The Write* methods are always safe to call concurrently or
|
||||
// otherwise for all implementing subclasses. The intended mode of
|
||||
// operation is to create a protobuf object from the input, and send
|
||||
// it away to be written to file asynchronously.
|
||||
class AecDump {
|
||||
public:
|
||||
struct AudioProcessingState {
|
||||
int delay;
|
||||
int drift;
|
||||
absl::optional<int> applied_input_volume;
|
||||
bool keypress;
|
||||
};
|
||||
|
||||
virtual ~AecDump() = default;
|
||||
|
||||
// Logs Event::Type INIT message.
|
||||
virtual void WriteInitMessage(const ProcessingConfig& api_format,
|
||||
int64_t time_now_ms) = 0;
|
||||
ABSL_DEPRECATED("")
|
||||
void WriteInitMessage(const ProcessingConfig& api_format) {
|
||||
WriteInitMessage(api_format, 0);
|
||||
}
|
||||
|
||||
// Logs Event::Type STREAM message. To log an input/output pair,
|
||||
// call the AddCapture* and AddAudioProcessingState methods followed
|
||||
// by a WriteCaptureStreamMessage call.
|
||||
virtual void AddCaptureStreamInput(
|
||||
const AudioFrameView<const float>& src) = 0;
|
||||
virtual void AddCaptureStreamOutput(
|
||||
const AudioFrameView<const float>& src) = 0;
|
||||
virtual void AddCaptureStreamInput(const int16_t* const data,
|
||||
int num_channels,
|
||||
int samples_per_channel) = 0;
|
||||
virtual void AddCaptureStreamOutput(const int16_t* const data,
|
||||
int num_channels,
|
||||
int samples_per_channel) = 0;
|
||||
virtual void AddAudioProcessingState(const AudioProcessingState& state) = 0;
|
||||
virtual void WriteCaptureStreamMessage() = 0;
|
||||
|
||||
// Logs Event::Type REVERSE_STREAM message.
|
||||
virtual void WriteRenderStreamMessage(const int16_t* const data,
|
||||
int num_channels,
|
||||
int samples_per_channel) = 0;
|
||||
virtual void WriteRenderStreamMessage(
|
||||
const AudioFrameView<const float>& src) = 0;
|
||||
|
||||
virtual void WriteRuntimeSetting(
|
||||
const AudioProcessing::RuntimeSetting& runtime_setting) = 0;
|
||||
|
||||
// Logs Event::Type CONFIG message.
|
||||
virtual void WriteConfig(const InternalAPMConfig& config) = 0;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_
|
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/include/audio_frame_proxies.h"
|
||||
|
||||
#include "api/audio/audio_frame.h"
|
||||
#include "api/audio/audio_processing.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
|
||||
if (!frame || !ap) {
|
||||
return AudioProcessing::Error::kNullPointerError;
|
||||
}
|
||||
|
||||
StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_);
|
||||
StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_);
|
||||
RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames());
|
||||
|
||||
int result = ap->ProcessStream(frame->data(), input_config, output_config,
|
||||
frame->mutable_data());
|
||||
|
||||
AudioProcessingStats stats = ap->GetStatistics();
|
||||
|
||||
if (stats.voice_detected) {
|
||||
frame->vad_activity_ = *stats.voice_detected
|
||||
? AudioFrame::VADActivity::kVadActive
|
||||
: AudioFrame::VADActivity::kVadPassive;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
|
||||
if (!frame || !ap) {
|
||||
return AudioProcessing::Error::kNullPointerError;
|
||||
}
|
||||
|
||||
// Must be a native rate.
|
||||
if (frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate8kHz &&
|
||||
frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate16kHz &&
|
||||
frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate32kHz &&
|
||||
frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate48kHz) {
|
||||
return AudioProcessing::Error::kBadSampleRateError;
|
||||
}
|
||||
|
||||
if (frame->num_channels_ <= 0) {
|
||||
return AudioProcessing::Error::kBadNumberChannelsError;
|
||||
}
|
||||
|
||||
StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_);
|
||||
StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_);
|
||||
|
||||
int result = ap->ProcessReverseStream(frame->data(), input_config,
|
||||
output_config, frame->mutable_data());
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
|
||||
#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
class AudioProcessing;
|
||||
|
||||
// Processes a 10 ms `frame` of the primary audio stream using the provided
|
||||
// AudioProcessing object. On the client-side, this is the near-end (or
|
||||
// captured) audio. The `sample_rate_hz_`, `num_channels_`, and
|
||||
// `samples_per_channel_` members of `frame` must be valid. If changed from the
|
||||
// previous call to this function, it will trigger an initialization of the
|
||||
// provided AudioProcessing object.
|
||||
// The function returns any error codes passed from the AudioProcessing
|
||||
// ProcessStream method.
|
||||
int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame);
|
||||
|
||||
// Processes a 10 ms `frame` of the reverse direction audio stream using the
|
||||
// provided AudioProcessing object. The frame may be modified. On the
|
||||
// client-side, this is the far-end (or to be rendered) audio. The
|
||||
// `sample_rate_hz_`, `num_channels_`, and `samples_per_channel_` members of
|
||||
// `frame` must be valid. If changed from the previous call to this function, it
|
||||
// will trigger an initialization of the provided AudioProcessing object.
|
||||
// The function returns any error codes passed from the AudioProcessing
|
||||
// ProcessReverseStream method.
|
||||
int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
|
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_
|
||||
#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_
|
||||
|
||||
#include "api/audio/audio_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class to pass audio data in T** format, where T is a numeric type.
|
||||
template <class T>
|
||||
class AudioFrameView {
|
||||
public:
|
||||
// `num_channels` and `channel_size` describe the T**
|
||||
// `audio_samples`. `audio_samples` is assumed to point to a
|
||||
// two-dimensional |num_channels * channel_size| array of floats.
|
||||
//
|
||||
// Note: The implementation now only requires the first channel pointer.
|
||||
// The previous implementation retained a pointer to externally owned array
|
||||
// of channel pointers, but since the channel size and count are provided
|
||||
// and the array is assumed to be a single two-dimensional array, the other
|
||||
// channel pointers can be calculated based on that (which is what the class
|
||||
// now uses `DeinterleavedView<>` internally for).
|
||||
AudioFrameView(T* const* audio_samples, int num_channels, int channel_size)
|
||||
: view_(num_channels && channel_size ? audio_samples[0] : nullptr,
|
||||
channel_size,
|
||||
num_channels) {
|
||||
RTC_DCHECK_GE(view_.num_channels(), 0);
|
||||
RTC_DCHECK_GE(view_.samples_per_channel(), 0);
|
||||
}
|
||||
|
||||
// Implicit cast to allow converting AudioFrameView<float> to
|
||||
// AudioFrameView<const float>.
|
||||
template <class U>
|
||||
AudioFrameView(AudioFrameView<U> other) : view_(other.view()) {}
|
||||
|
||||
// Allow constructing AudioFrameView from a DeinterleavedView.
|
||||
template <class U>
|
||||
explicit AudioFrameView(DeinterleavedView<U> view) : view_(view) {}
|
||||
|
||||
AudioFrameView() = delete;
|
||||
|
||||
int num_channels() const { return view_.num_channels(); }
|
||||
int samples_per_channel() const { return view_.samples_per_channel(); }
|
||||
MonoView<T> channel(int idx) { return view_[idx]; }
|
||||
MonoView<const T> channel(int idx) const { return view_[idx]; }
|
||||
MonoView<T> operator[](int idx) { return view_[idx]; }
|
||||
MonoView<const T> operator[](int idx) const { return view_[idx]; }
|
||||
|
||||
DeinterleavedView<T> view() { return view_; }
|
||||
DeinterleavedView<const T> view() const { return view_; }
|
||||
|
||||
private:
|
||||
DeinterleavedView<T> view_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_
|
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Functor to use when supplying a verifier function for the queue item
|
||||
// verifcation.
|
||||
template <typename T>
|
||||
class RenderQueueItemVerifier {
|
||||
public:
|
||||
explicit RenderQueueItemVerifier(size_t minimum_capacity)
|
||||
: minimum_capacity_(minimum_capacity) {}
|
||||
|
||||
bool operator()(const std::vector<T>& v) const {
|
||||
return v.capacity() >= minimum_capacity_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t minimum_capacity_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H__
|
138
VocieProcess/modules/audio_processing/rms_level.cc
Normal file
138
VocieProcess/modules/audio_processing/rms_level.cc
Normal file
@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/rms_level.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
static constexpr float kMaxSquaredLevel = 32768 * 32768;
|
||||
// kMinLevel is the level corresponding to kMinLevelDb, that is 10^(-127/10).
|
||||
static constexpr float kMinLevel = 1.995262314968883e-13f;
|
||||
|
||||
// Calculates the normalized RMS value from a mean square value. The input
|
||||
// should be the sum of squared samples divided by the number of samples. The
|
||||
// value will be normalized to full range before computing the RMS, wich is
|
||||
// returned as a negated dBfs. That is, 0 is full amplitude while 127 is very
|
||||
// faint.
|
||||
int ComputeRms(float mean_square) {
|
||||
if (mean_square <= kMinLevel * kMaxSquaredLevel) {
|
||||
// Very faint; simply return the minimum value.
|
||||
return RmsLevel::kMinLevelDb;
|
||||
}
|
||||
// Normalize by the max level.
|
||||
const float mean_square_norm = mean_square / kMaxSquaredLevel;
|
||||
RTC_DCHECK_GT(mean_square_norm, kMinLevel);
|
||||
// 20log_10(x^0.5) = 10log_10(x)
|
||||
const float rms = 10.f * std::log10(mean_square_norm);
|
||||
RTC_DCHECK_LE(rms, 0.f);
|
||||
RTC_DCHECK_GT(rms, -RmsLevel::kMinLevelDb);
|
||||
// Return the negated value.
|
||||
return static_cast<int>(-rms + 0.5f);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
RmsLevel::RmsLevel() {
|
||||
Reset();
|
||||
}
|
||||
|
||||
RmsLevel::~RmsLevel() = default;
|
||||
|
||||
void RmsLevel::Reset() {
|
||||
sum_square_ = 0.f;
|
||||
sample_count_ = 0;
|
||||
max_sum_square_ = 0.f;
|
||||
block_size_ = absl::nullopt;
|
||||
}
|
||||
|
||||
void RmsLevel::Analyze(rtc::ArrayView<const int16_t> data) {
|
||||
if (data.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
CheckBlockSize(data.size());
|
||||
|
||||
const float sum_square =
|
||||
std::accumulate(data.begin(), data.end(), 0.f,
|
||||
[](float a, int16_t b) { return a + b * b; });
|
||||
RTC_DCHECK_GE(sum_square, 0.f);
|
||||
sum_square_ += sum_square;
|
||||
sample_count_ += data.size();
|
||||
|
||||
max_sum_square_ = std::max(max_sum_square_, sum_square);
|
||||
}
|
||||
|
||||
void RmsLevel::Analyze(rtc::ArrayView<const float> data) {
|
||||
if (data.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
CheckBlockSize(data.size());
|
||||
|
||||
float sum_square = 0.f;
|
||||
|
||||
for (float data_k : data) {
|
||||
int16_t tmp =
|
||||
static_cast<int16_t>(std::min(std::max(data_k, -32768.f), 32767.f));
|
||||
sum_square += tmp * tmp;
|
||||
}
|
||||
RTC_DCHECK_GE(sum_square, 0.f);
|
||||
sum_square_ += sum_square;
|
||||
sample_count_ += data.size();
|
||||
|
||||
max_sum_square_ = std::max(max_sum_square_, sum_square);
|
||||
}
|
||||
|
||||
void RmsLevel::AnalyzeMuted(size_t length) {
|
||||
CheckBlockSize(length);
|
||||
sample_count_ += length;
|
||||
}
|
||||
|
||||
int RmsLevel::Average() {
|
||||
const bool have_samples = (sample_count_ != 0);
|
||||
int rms = have_samples ? ComputeRms(sum_square_ / sample_count_)
|
||||
: RmsLevel::kMinLevelDb;
|
||||
|
||||
// To ensure that kMinLevelDb represents digital silence (muted audio
|
||||
// sources) we'll check here if the sum_square is actually 0. If it's not
|
||||
// we'll bump up the return value to `kInaudibleButNotMuted`.
|
||||
// https://datatracker.ietf.org/doc/html/rfc6464
|
||||
if (have_samples && rms == RmsLevel::kMinLevelDb && sum_square_ != 0.0f) {
|
||||
rms = kInaudibleButNotMuted;
|
||||
}
|
||||
|
||||
Reset();
|
||||
return rms;
|
||||
}
|
||||
|
||||
RmsLevel::Levels RmsLevel::AverageAndPeak() {
|
||||
// Note that block_size_ should by design always be non-empty when
|
||||
// sample_count_ != 0. Also, the * operator of absl::optional enforces this
|
||||
// with a DCHECK.
|
||||
Levels levels = (sample_count_ == 0)
|
||||
? Levels{RmsLevel::kMinLevelDb, RmsLevel::kMinLevelDb}
|
||||
: Levels{ComputeRms(sum_square_ / sample_count_),
|
||||
ComputeRms(max_sum_square_ / *block_size_)};
|
||||
Reset();
|
||||
return levels;
|
||||
}
|
||||
|
||||
void RmsLevel::CheckBlockSize(size_t block_size) {
|
||||
if (block_size_ != block_size) {
|
||||
Reset();
|
||||
block_size_ = block_size;
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
77
VocieProcess/modules/audio_processing/rms_level.h
Normal file
77
VocieProcess/modules/audio_processing/rms_level.h
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Computes the root mean square (RMS) level in dBFs (decibels from digital
|
||||
// full-scale) of audio data. The computation follows RFC 6465:
|
||||
// https://tools.ietf.org/html/rfc6465
|
||||
// with the intent that it can provide the RTP audio level indication.
|
||||
//
|
||||
// The expected approach is to provide constant-sized chunks of audio to
|
||||
// Analyze(). When enough chunks have been accumulated to form a packet, call
|
||||
// Average() to get the audio level indicator for the RTP header.
|
||||
class RmsLevel {
|
||||
public:
|
||||
struct Levels {
|
||||
int average;
|
||||
int peak;
|
||||
};
|
||||
|
||||
enum : int { kMinLevelDb = 127, kInaudibleButNotMuted = 126 };
|
||||
|
||||
RmsLevel();
|
||||
~RmsLevel();
|
||||
|
||||
// Can be called to reset internal states, but is not required during normal
|
||||
// operation.
|
||||
void Reset();
|
||||
|
||||
// Pass each chunk of audio to Analyze() to accumulate the level.
|
||||
void Analyze(rtc::ArrayView<const int16_t> data);
|
||||
void Analyze(rtc::ArrayView<const float> data);
|
||||
|
||||
// If all samples with the given `length` have a magnitude of zero, this is
|
||||
// a shortcut to avoid some computation.
|
||||
void AnalyzeMuted(size_t length);
|
||||
|
||||
// Computes the RMS level over all data passed to Analyze() since the last
|
||||
// call to Average(). The returned value is positive but should be interpreted
|
||||
// as negative as per the RFC. It is constrained to [0, 127]. Resets the
|
||||
// internal state to start a new measurement period.
|
||||
int Average();
|
||||
|
||||
// Like Average() above, but also returns the RMS peak value. Resets the
|
||||
// internal state to start a new measurement period.
|
||||
Levels AverageAndPeak();
|
||||
|
||||
private:
|
||||
// Compares `block_size` with `block_size_`. If they are different, calls
|
||||
// Reset() and stores the new size.
|
||||
void CheckBlockSize(size_t block_size);
|
||||
|
||||
float sum_square_;
|
||||
size_t sample_count_;
|
||||
float max_sum_square_;
|
||||
absl::optional<size_t> block_size_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_
|
29
VocieProcess/modules/audio_processing/vad/common.h
Normal file
29
VocieProcess/modules/audio_processing/vad/common.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
static const int kSampleRateHz = 16000;
|
||||
static const size_t kLength10Ms = kSampleRateHz / 100;
|
||||
static const size_t kMaxNumFrames = 4;
|
||||
|
||||
struct AudioFeatures {
|
||||
double log_pitch_gain[kMaxNumFrames];
|
||||
double pitch_lag_hz[kMaxNumFrames];
|
||||
double spectral_peak[kMaxNumFrames];
|
||||
double rms[kMaxNumFrames];
|
||||
size_t num_frames;
|
||||
bool silence;
|
||||
};
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
|
61
VocieProcess/modules/audio_processing/vad/gmm.cc
Normal file
61
VocieProcess/modules/audio_processing/vad/gmm.cc
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/vad/gmm.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kMaxDimension = 10;
|
||||
|
||||
static void RemoveMean(const double* in,
|
||||
const double* mean_vec,
|
||||
int dimension,
|
||||
double* out) {
|
||||
for (int n = 0; n < dimension; ++n)
|
||||
out[n] = in[n] - mean_vec[n];
|
||||
}
|
||||
|
||||
static double ComputeExponent(const double* in,
|
||||
const double* covar_inv,
|
||||
int dimension) {
|
||||
double q = 0;
|
||||
for (int i = 0; i < dimension; ++i) {
|
||||
double v = 0;
|
||||
for (int j = 0; j < dimension; j++)
|
||||
v += (*covar_inv++) * in[j];
|
||||
q += v * in[i];
|
||||
}
|
||||
q *= -0.5;
|
||||
return q;
|
||||
}
|
||||
|
||||
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) {
|
||||
if (gmm_parameters.dimension > kMaxDimension) {
|
||||
return -1; // This is invalid pdf so the caller can check this.
|
||||
}
|
||||
double f = 0;
|
||||
double v[kMaxDimension];
|
||||
const double* mean_vec = gmm_parameters.mean;
|
||||
const double* covar_inv = gmm_parameters.covar_inverse;
|
||||
|
||||
for (int n = 0; n < gmm_parameters.num_mixtures; n++) {
|
||||
RemoveMean(x, mean_vec, gmm_parameters.dimension, v);
|
||||
double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) +
|
||||
gmm_parameters.weight[n];
|
||||
f += exp(q);
|
||||
mean_vec += gmm_parameters.dimension;
|
||||
covar_inv += gmm_parameters.dimension * gmm_parameters.dimension;
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
45
VocieProcess/modules/audio_processing/vad/gmm.h
Normal file
45
VocieProcess/modules/audio_processing/vad/gmm.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_GMM_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_GMM_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A structure that specifies a GMM.
|
||||
// A GMM is formulated as
|
||||
// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... +
|
||||
// w[num_mixtures - 1] * mixture[num_mixtures - 1];
|
||||
// Where a 'mixture' is a Gaussian density.
|
||||
|
||||
struct GmmParameters {
|
||||
// weight[n] = log(w[n]) - `dimension`/2 * log(2*pi) - 1/2 * log(det(cov[n]));
|
||||
// where cov[n] is the covariance matrix of mixture n;
|
||||
const double* weight;
|
||||
// pointer to the first element of a `num_mixtures`x`dimension` matrix
|
||||
// where kth row is the mean of the kth mixture.
|
||||
const double* mean;
|
||||
// pointer to the first element of a `num_mixtures`x`dimension`x`dimension`
|
||||
// 3D-matrix, where the kth 2D-matrix is the inverse of the covariance
|
||||
// matrix of the kth mixture.
|
||||
const double* covar_inverse;
|
||||
// Dimensionality of the mixtures.
|
||||
int dimension;
|
||||
// number of the mixtures.
|
||||
int num_mixtures;
|
||||
};
|
||||
|
||||
// Evaluate the given GMM, according to `gmm_parameters`, at the given point
|
||||
// `x`. If the dimensionality of the given GMM is larger that the maximum
|
||||
// acceptable dimension by the following function -1 is returned.
|
||||
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_GMM_H_
|
82
VocieProcess/modules/audio_processing/vad/noise_gmm_tables.h
Normal file
82
VocieProcess/modules/audio_processing/vad/noise_gmm_tables.h
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// GMM tables for inactive segments. Generated by MakeGmmTables.m.
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kNoiseGmmNumMixtures = 12;
|
||||
static const int kNoiseGmmDim = 3;
|
||||
|
||||
static const double
|
||||
kNoiseGmmCovarInverse[kNoiseGmmNumMixtures][kNoiseGmmDim][kNoiseGmmDim] = {
|
||||
{{7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02},
|
||||
{4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04},
|
||||
{1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}},
|
||||
{{8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03},
|
||||
{-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04},
|
||||
{5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}},
|
||||
{{4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03},
|
||||
{-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05},
|
||||
{-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}},
|
||||
{{9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03},
|
||||
{-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07},
|
||||
{-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}},
|
||||
{{7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02},
|
||||
{-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06},
|
||||
{2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}},
|
||||
{{8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02},
|
||||
{-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06},
|
||||
{-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}},
|
||||
{{9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03},
|
||||
{5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07},
|
||||
{-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}},
|
||||
{{8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03},
|
||||
{5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07},
|
||||
{6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}},
|
||||
{{6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03},
|
||||
{-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05},
|
||||
{5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}},
|
||||
{{6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03},
|
||||
{4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08},
|
||||
{-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}},
|
||||
{{1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02},
|
||||
{-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07},
|
||||
{-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}},
|
||||
{{4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03},
|
||||
{-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07},
|
||||
{5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}};
|
||||
|
||||
static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = {
|
||||
{-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01},
|
||||
{-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02},
|
||||
{-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02},
|
||||
{-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02},
|
||||
{-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01},
|
||||
{-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02},
|
||||
{-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02},
|
||||
{-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02},
|
||||
{-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02},
|
||||
{-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02},
|
||||
{-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02},
|
||||
{-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}};
|
||||
|
||||
static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = {
|
||||
-1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01,
|
||||
-1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01,
|
||||
-1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01,
|
||||
-1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
|
120
VocieProcess/modules/audio_processing/vad/pitch_based_vad.cc
Normal file
120
VocieProcess/modules/audio_processing/vad/pitch_based_vad.cc
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/vad/pitch_based_vad.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "modules/audio_processing/vad/common.h"
|
||||
#include "modules/audio_processing/vad/noise_gmm_tables.h"
|
||||
#include "modules/audio_processing/vad/vad_circular_buffer.h"
|
||||
#include "modules/audio_processing/vad/voice_gmm_tables.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static_assert(kNoiseGmmDim == kVoiceGmmDim,
|
||||
"noise and voice gmm dimension not equal");
|
||||
|
||||
// These values should match MATLAB counterparts for unit-tests to pass.
|
||||
static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
|
||||
static const double kInitialPriorProbability = 0.3;
|
||||
static const int kTransientWidthThreshold = 7;
|
||||
static const double kLowProbabilityThreshold = 0.2;
|
||||
|
||||
static double LimitProbability(double p) {
|
||||
const double kLimHigh = 0.99;
|
||||
const double kLimLow = 0.01;
|
||||
|
||||
if (p > kLimHigh)
|
||||
p = kLimHigh;
|
||||
else if (p < kLimLow)
|
||||
p = kLimLow;
|
||||
return p;
|
||||
}
|
||||
|
||||
PitchBasedVad::PitchBasedVad()
|
||||
: p_prior_(kInitialPriorProbability),
|
||||
circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
|
||||
// Setup noise GMM.
|
||||
noise_gmm_.dimension = kNoiseGmmDim;
|
||||
noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
|
||||
noise_gmm_.weight = kNoiseGmmWeights;
|
||||
noise_gmm_.mean = &kNoiseGmmMean[0][0];
|
||||
noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
|
||||
|
||||
// Setup voice GMM.
|
||||
voice_gmm_.dimension = kVoiceGmmDim;
|
||||
voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
|
||||
voice_gmm_.weight = kVoiceGmmWeights;
|
||||
voice_gmm_.mean = &kVoiceGmmMean[0][0];
|
||||
voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
|
||||
}
|
||||
|
||||
PitchBasedVad::~PitchBasedVad() {}
|
||||
|
||||
int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
|
||||
double* p_combined) {
|
||||
double p;
|
||||
double gmm_features[3];
|
||||
double pdf_features_given_voice;
|
||||
double pdf_features_given_noise;
|
||||
// These limits are the same in matlab implementation 'VoicingProbGMM().'
|
||||
const double kLimLowLogPitchGain = -2.0;
|
||||
const double kLimHighLogPitchGain = -0.9;
|
||||
const double kLimLowSpectralPeak = 200;
|
||||
const double kLimHighSpectralPeak = 2000;
|
||||
const double kEps = 1e-12;
|
||||
for (size_t n = 0; n < features.num_frames; n++) {
|
||||
gmm_features[0] = features.log_pitch_gain[n];
|
||||
gmm_features[1] = features.spectral_peak[n];
|
||||
gmm_features[2] = features.pitch_lag_hz[n];
|
||||
|
||||
pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
|
||||
pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
|
||||
|
||||
if (features.spectral_peak[n] < kLimLowSpectralPeak ||
|
||||
features.spectral_peak[n] > kLimHighSpectralPeak ||
|
||||
features.log_pitch_gain[n] < kLimLowLogPitchGain) {
|
||||
pdf_features_given_voice = kEps * pdf_features_given_noise;
|
||||
} else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
|
||||
pdf_features_given_noise = kEps * pdf_features_given_voice;
|
||||
}
|
||||
|
||||
p = p_prior_ * pdf_features_given_voice /
|
||||
(pdf_features_given_voice * p_prior_ +
|
||||
pdf_features_given_noise * (1 - p_prior_));
|
||||
|
||||
p = LimitProbability(p);
|
||||
|
||||
// Combine pitch-based probability with standalone probability, before
|
||||
// updating prior probabilities.
|
||||
double prod_active = p * p_combined[n];
|
||||
double prod_inactive = (1 - p) * (1 - p_combined[n]);
|
||||
p_combined[n] = prod_active / (prod_active + prod_inactive);
|
||||
|
||||
if (UpdatePrior(p_combined[n]) < 0)
|
||||
return -1;
|
||||
// Limit prior probability. With a zero prior probability the posterior
|
||||
// probability is always zero.
|
||||
p_prior_ = LimitProbability(p_prior_);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PitchBasedVad::UpdatePrior(double p) {
|
||||
circular_buffer_->Insert(p);
|
||||
if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
|
||||
kLowProbabilityThreshold) < 0)
|
||||
return -1;
|
||||
p_prior_ = circular_buffer_->Mean();
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
57
VocieProcess/modules/audio_processing/vad/pitch_based_vad.h
Normal file
57
VocieProcess/modules/audio_processing/vad/pitch_based_vad.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/vad/common.h"
|
||||
#include "modules/audio_processing/vad/gmm.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class VadCircularBuffer;
|
||||
|
||||
// Computes the probability of the input audio frame to be active given
|
||||
// the corresponding pitch-gain and lag of the frame.
|
||||
class PitchBasedVad {
|
||||
public:
|
||||
PitchBasedVad();
|
||||
~PitchBasedVad();
|
||||
|
||||
// Compute pitch-based voicing probability, given the features.
|
||||
// features: a structure containing features required for computing voicing
|
||||
// probabilities.
|
||||
//
|
||||
// p_combined: an array which contains the combined activity probabilities
|
||||
// computed prior to the call of this function. The method,
|
||||
// then, computes the voicing probabilities and combine them
|
||||
// with the given values. The result are returned in `p`.
|
||||
int VoicingProbability(const AudioFeatures& features, double* p_combined);
|
||||
|
||||
private:
|
||||
int UpdatePrior(double p);
|
||||
|
||||
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
|
||||
// all the code recognize it as "no-error."
|
||||
static const int kNoError = 0;
|
||||
|
||||
GmmParameters noise_gmm_;
|
||||
GmmParameters voice_gmm_;
|
||||
|
||||
double p_prior_;
|
||||
|
||||
std::unique_ptr<VadCircularBuffer> circular_buffer_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
|
55
VocieProcess/modules/audio_processing/vad/pitch_internal.cc
Normal file
55
VocieProcess/modules/audio_processing/vad/pitch_internal.cc
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/vad/pitch_internal.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A 4-to-3 linear interpolation.
|
||||
// The interpolation constants are derived as following:
|
||||
// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval
|
||||
// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is
|
||||
// like interpolating 4-to-6 and keep the odd samples.
|
||||
// The reason behind this is that LPC coefficients are computed for the first
|
||||
// half of each 10ms interval.
|
||||
static void PitchInterpolation(double old_val, const double* in, double* out) {
|
||||
out[0] = 1. / 6. * old_val + 5. / 6. * in[0];
|
||||
out[1] = 5. / 6. * in[1] + 1. / 6. * in[2];
|
||||
out[2] = 0.5 * in[2] + 0.5 * in[3];
|
||||
}
|
||||
|
||||
void GetSubframesPitchParameters(int sampling_rate_hz,
|
||||
double* gains,
|
||||
double* lags,
|
||||
int num_in_frames,
|
||||
int num_out_frames,
|
||||
double* log_old_gain,
|
||||
double* old_lag,
|
||||
double* log_pitch_gain,
|
||||
double* pitch_lag_hz) {
|
||||
// Gain interpolation is in log-domain, also returned in log-domain.
|
||||
for (int n = 0; n < num_in_frames; n++)
|
||||
gains[n] = log(gains[n] + 1e-12);
|
||||
|
||||
// Interpolate lags and gains.
|
||||
PitchInterpolation(*log_old_gain, gains, log_pitch_gain);
|
||||
*log_old_gain = gains[num_in_frames - 1];
|
||||
PitchInterpolation(*old_lag, lags, pitch_lag_hz);
|
||||
*old_lag = lags[num_in_frames - 1];
|
||||
|
||||
// Convert pitch-lags to Hertz.
|
||||
for (int n = 0; n < num_out_frames; n++) {
|
||||
pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
30
VocieProcess/modules/audio_processing/vad/pitch_internal.h
Normal file
30
VocieProcess/modules/audio_processing/vad/pitch_internal.h
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// TODO(turajs): Write a description of this function. Also be consistent with
|
||||
// usage of `sampling_rate_hz` vs `kSamplingFreqHz`.
|
||||
void GetSubframesPitchParameters(int sampling_rate_hz,
|
||||
double* gains,
|
||||
double* lags,
|
||||
int num_in_frames,
|
||||
int num_out_frames,
|
||||
double* log_old_gain,
|
||||
double* old_lag,
|
||||
double* log_pitch_gain,
|
||||
double* pitch_lag_hz);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
|
107
VocieProcess/modules/audio_processing/vad/pole_zero_filter.cc
Normal file
107
VocieProcess/modules/audio_processing/vad/pole_zero_filter.cc
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/vad/pole_zero_filter.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients,
|
||||
size_t order_numerator,
|
||||
const float* denominator_coefficients,
|
||||
size_t order_denominator) {
|
||||
if (order_numerator > kMaxFilterOrder ||
|
||||
order_denominator > kMaxFilterOrder || denominator_coefficients[0] == 0 ||
|
||||
numerator_coefficients == NULL || denominator_coefficients == NULL)
|
||||
return NULL;
|
||||
return new PoleZeroFilter(numerator_coefficients, order_numerator,
|
||||
denominator_coefficients, order_denominator);
|
||||
}
|
||||
|
||||
PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients,
|
||||
size_t order_numerator,
|
||||
const float* denominator_coefficients,
|
||||
size_t order_denominator)
|
||||
: past_input_(),
|
||||
past_output_(),
|
||||
numerator_coefficients_(),
|
||||
denominator_coefficients_(),
|
||||
order_numerator_(order_numerator),
|
||||
order_denominator_(order_denominator),
|
||||
highest_order_(std::max(order_denominator, order_numerator)) {
|
||||
memcpy(numerator_coefficients_, numerator_coefficients,
|
||||
sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1));
|
||||
memcpy(denominator_coefficients_, denominator_coefficients,
|
||||
sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1));
|
||||
|
||||
if (denominator_coefficients_[0] != 1) {
|
||||
for (size_t n = 0; n <= order_numerator_; n++)
|
||||
numerator_coefficients_[n] /= denominator_coefficients_[0];
|
||||
for (size_t n = 0; n <= order_denominator_; n++)
|
||||
denominator_coefficients_[n] /= denominator_coefficients_[0];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static float FilterArPast(const T* past,
|
||||
size_t order,
|
||||
const float* coefficients) {
|
||||
float sum = 0.0f;
|
||||
size_t past_index = order - 1;
|
||||
for (size_t k = 1; k <= order; k++, past_index--)
|
||||
sum += coefficients[k] * past[past_index];
|
||||
return sum;
|
||||
}
|
||||
|
||||
int PoleZeroFilter::Filter(const int16_t* in,
|
||||
size_t num_input_samples,
|
||||
float* output) {
|
||||
if (in == NULL || output == NULL)
|
||||
return -1;
|
||||
// This is the typical case, just a memcpy.
|
||||
const size_t k = std::min(num_input_samples, highest_order_);
|
||||
size_t n;
|
||||
for (n = 0; n < k; n++) {
|
||||
output[n] = in[n] * numerator_coefficients_[0];
|
||||
output[n] += FilterArPast(&past_input_[n], order_numerator_,
|
||||
numerator_coefficients_);
|
||||
output[n] -= FilterArPast(&past_output_[n], order_denominator_,
|
||||
denominator_coefficients_);
|
||||
|
||||
past_input_[n + order_numerator_] = in[n];
|
||||
past_output_[n + order_denominator_] = output[n];
|
||||
}
|
||||
if (highest_order_ < num_input_samples) {
|
||||
for (size_t m = 0; n < num_input_samples; n++, m++) {
|
||||
output[n] = in[n] * numerator_coefficients_[0];
|
||||
output[n] +=
|
||||
FilterArPast(&in[m], order_numerator_, numerator_coefficients_);
|
||||
output[n] -= FilterArPast(&output[m], order_denominator_,
|
||||
denominator_coefficients_);
|
||||
}
|
||||
// Record into the past signal.
|
||||
memcpy(past_input_, &in[num_input_samples - order_numerator_],
|
||||
sizeof(in[0]) * order_numerator_);
|
||||
memcpy(past_output_, &output[num_input_samples - order_denominator_],
|
||||
sizeof(output[0]) * order_denominator_);
|
||||
} else {
|
||||
// Odd case that the length of the input is shorter that filter order.
|
||||
memmove(past_input_, &past_input_[num_input_samples],
|
||||
order_numerator_ * sizeof(past_input_[0]));
|
||||
memmove(past_output_, &past_output_[num_input_samples],
|
||||
order_denominator_ * sizeof(past_output_[0]));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
51
VocieProcess/modules/audio_processing/vad/pole_zero_filter.h
Normal file
51
VocieProcess/modules/audio_processing/vad/pole_zero_filter.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class PoleZeroFilter {
|
||||
public:
|
||||
~PoleZeroFilter() {}
|
||||
|
||||
static PoleZeroFilter* Create(const float* numerator_coefficients,
|
||||
size_t order_numerator,
|
||||
const float* denominator_coefficients,
|
||||
size_t order_denominator);
|
||||
|
||||
int Filter(const int16_t* in, size_t num_input_samples, float* output);
|
||||
|
||||
private:
|
||||
PoleZeroFilter(const float* numerator_coefficients,
|
||||
size_t order_numerator,
|
||||
const float* denominator_coefficients,
|
||||
size_t order_denominator);
|
||||
|
||||
static const int kMaxFilterOrder = 24;
|
||||
|
||||
int16_t past_input_[kMaxFilterOrder * 2];
|
||||
float past_output_[kMaxFilterOrder * 2];
|
||||
|
||||
float numerator_coefficients_[kMaxFilterOrder + 1];
|
||||
float denominator_coefficients_[kMaxFilterOrder + 1];
|
||||
|
||||
size_t order_numerator_;
|
||||
size_t order_denominator_;
|
||||
size_t highest_order_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
|
91
VocieProcess/modules/audio_processing/vad/standalone_vad.cc
Normal file
91
VocieProcess/modules/audio_processing/vad/standalone_vad.cc
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/vad/standalone_vad.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "common_audio/vad/include/webrtc_vad.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kDefaultStandaloneVadMode = 3;
|
||||
|
||||
StandaloneVad::StandaloneVad(VadInst* vad)
|
||||
: vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) {}
|
||||
|
||||
StandaloneVad::~StandaloneVad() {
|
||||
WebRtcVad_Free(vad_);
|
||||
}
|
||||
|
||||
StandaloneVad* StandaloneVad::Create() {
|
||||
VadInst* vad = WebRtcVad_Create();
|
||||
if (!vad)
|
||||
return nullptr;
|
||||
|
||||
int err = WebRtcVad_Init(vad);
|
||||
err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode);
|
||||
if (err != 0) {
|
||||
WebRtcVad_Free(vad);
|
||||
return nullptr;
|
||||
}
|
||||
return new StandaloneVad(vad);
|
||||
}
|
||||
|
||||
int StandaloneVad::AddAudio(const int16_t* data, size_t length) {
|
||||
if (length != kLength10Ms)
|
||||
return -1;
|
||||
|
||||
if (index_ + length > kLength10Ms * kMaxNum10msFrames)
|
||||
// Reset the buffer if it's full.
|
||||
// TODO(ajm): Instead, consider just processing every 10 ms frame. Then we
|
||||
// can forgo the buffering.
|
||||
index_ = 0;
|
||||
|
||||
memcpy(&buffer_[index_], data, sizeof(int16_t) * length);
|
||||
index_ += length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int StandaloneVad::GetActivity(double* p, size_t length_p) {
|
||||
if (index_ == 0)
|
||||
return -1;
|
||||
|
||||
const size_t num_frames = index_ / kLength10Ms;
|
||||
if (num_frames > length_p)
|
||||
return -1;
|
||||
RTC_DCHECK_EQ(0, WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_));
|
||||
|
||||
int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_);
|
||||
if (activity < 0)
|
||||
return -1;
|
||||
else if (activity == 0)
|
||||
p[0] = 0.01; // Arbitrary but small and non-zero.
|
||||
else
|
||||
p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities.
|
||||
for (size_t n = 1; n < num_frames; n++)
|
||||
p[n] = p[0];
|
||||
// Reset the buffer to start from the beginning.
|
||||
index_ = 0;
|
||||
return activity;
|
||||
}
|
||||
|
||||
int StandaloneVad::set_mode(int mode) {
|
||||
if (mode < 0 || mode > 3)
|
||||
return -1;
|
||||
if (WebRtcVad_set_mode(vad_, mode) != 0)
|
||||
return -1;
|
||||
|
||||
mode_ = mode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
69
VocieProcess/modules/audio_processing/vad/standalone_vad.h
Normal file
69
VocieProcess/modules/audio_processing/vad/standalone_vad.h
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "common_audio/vad/include/webrtc_vad.h"
|
||||
#include "modules/audio_processing/vad/common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class StandaloneVad {
|
||||
public:
|
||||
static StandaloneVad* Create();
|
||||
~StandaloneVad();
|
||||
|
||||
// Outputs
|
||||
// p: a buffer where probabilities are written to.
|
||||
// length_p: number of elements of `p`.
|
||||
//
|
||||
// return value:
|
||||
// -1: if no audio is stored or VAD returns error.
|
||||
// 0: in success.
|
||||
// In case of error the content of `activity` is unchanged.
|
||||
//
|
||||
// Note that due to a high false-positive (VAD decision is active while the
|
||||
// processed audio is just background noise) rate, stand-alone VAD is used as
|
||||
// a one-sided indicator. The activity probability is 0.5 if the frame is
|
||||
// classified as active, and the probability is 0.01 if the audio is
|
||||
// classified as passive. In this way, when probabilities are combined, the
|
||||
// effect of the stand-alone VAD is neutral if the input is classified as
|
||||
// active.
|
||||
int GetActivity(double* p, size_t length_p);
|
||||
|
||||
// Expecting 10 ms of 16 kHz audio to be pushed in.
|
||||
int AddAudio(const int16_t* data, size_t length);
|
||||
|
||||
// Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most
|
||||
// aggressive mode. Returns -1 if the input is less than 0 or larger than 3,
|
||||
// otherwise 0 is returned.
|
||||
int set_mode(int mode);
|
||||
// Get the agressiveness of the current VAD.
|
||||
int mode() const { return mode_; }
|
||||
|
||||
private:
|
||||
explicit StandaloneVad(VadInst* vad);
|
||||
|
||||
static const size_t kMaxNum10msFrames = 3;
|
||||
|
||||
// TODO(turajs): Is there a way to use scoped-pointer here?
|
||||
VadInst* vad_;
|
||||
int16_t buffer_[kMaxNum10msFrames * kLength10Ms];
|
||||
size_t index_;
|
||||
int mode_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
275
VocieProcess/modules/audio_processing/vad/vad_audio_proc.cc
Normal file
275
VocieProcess/modules/audio_processing/vad/vad_audio_proc.cc
Normal file
@ -0,0 +1,275 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/vad/vad_audio_proc.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "common_audio/third_party/ooura/fft_size_256/fft4g.h"
|
||||
#include "modules/audio_processing/vad/pitch_internal.h"
|
||||
#include "modules/audio_processing/vad/pole_zero_filter.h"
|
||||
#include "modules/audio_processing/vad/vad_audio_proc_internal.h"
|
||||
#include "rtc_base/checks.h"
|
||||
extern "C" {
|
||||
#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
}
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The following structures are declared anonymous in iSAC's structs.h. To
|
||||
// forward declare them, we use this derived class trick.
|
||||
struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {};
|
||||
struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {};
|
||||
|
||||
static constexpr float kFrequencyResolution =
|
||||
kSampleRateHz / static_cast<float>(VadAudioProc::kDftSize);
|
||||
static constexpr int kSilenceRms = 5;
|
||||
|
||||
// TODO(turajs): Make a Create or Init for VadAudioProc.
|
||||
VadAudioProc::VadAudioProc()
|
||||
: audio_buffer_(),
|
||||
num_buffer_samples_(kNumPastSignalSamples),
|
||||
log_old_gain_(-2),
|
||||
old_lag_(50), // Arbitrary but valid as pitch-lag (in samples).
|
||||
pitch_analysis_handle_(new PitchAnalysisStruct),
|
||||
pre_filter_handle_(new PreFiltBankstr),
|
||||
high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator,
|
||||
kFilterOrder,
|
||||
kCoeffDenominator,
|
||||
kFilterOrder)) {
|
||||
static_assert(kNumPastSignalSamples + kNumSubframeSamples ==
|
||||
sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]),
|
||||
"lpc analysis window incorrect size");
|
||||
static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]),
|
||||
"correlation weight incorrect size");
|
||||
|
||||
// TODO(turajs): Are we doing too much in the constructor?
|
||||
float data[kDftSize];
|
||||
// Make FFT to initialize.
|
||||
ip_[0] = 0;
|
||||
WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
|
||||
// TODO(turajs): Need to initialize high-pass filter.
|
||||
|
||||
// Initialize iSAC components.
|
||||
WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get());
|
||||
WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get());
|
||||
}
|
||||
|
||||
VadAudioProc::~VadAudioProc() {}
|
||||
|
||||
void VadAudioProc::ResetBuffer() {
|
||||
memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],
|
||||
sizeof(audio_buffer_[0]) * kNumPastSignalSamples);
|
||||
num_buffer_samples_ = kNumPastSignalSamples;
|
||||
}
|
||||
|
||||
int VadAudioProc::ExtractFeatures(const int16_t* frame,
|
||||
size_t length,
|
||||
AudioFeatures* features) {
|
||||
features->num_frames = 0;
|
||||
if (length != kNumSubframeSamples) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// High-pass filter to remove the DC component and very low frequency content.
|
||||
// We have experienced that this high-pass filtering improves voice/non-voiced
|
||||
// classification.
|
||||
if (high_pass_filter_->Filter(frame, kNumSubframeSamples,
|
||||
&audio_buffer_[num_buffer_samples_]) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
num_buffer_samples_ += kNumSubframeSamples;
|
||||
if (num_buffer_samples_ < kBufferLength) {
|
||||
return 0;
|
||||
}
|
||||
RTC_DCHECK_EQ(num_buffer_samples_, kBufferLength);
|
||||
features->num_frames = kNum10msSubframes;
|
||||
features->silence = false;
|
||||
|
||||
Rms(features->rms, kMaxNumFrames);
|
||||
for (size_t i = 0; i < kNum10msSubframes; ++i) {
|
||||
if (features->rms[i] < kSilenceRms) {
|
||||
// PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.
|
||||
// Bail out here instead.
|
||||
features->silence = true;
|
||||
ResetBuffer();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,
|
||||
kMaxNumFrames);
|
||||
FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);
|
||||
ResetBuffer();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Computes |kLpcOrder + 1| correlation coefficients.
|
||||
void VadAudioProc::SubframeCorrelation(double* corr,
|
||||
size_t length_corr,
|
||||
size_t subframe_index) {
|
||||
RTC_DCHECK_GE(length_corr, kLpcOrder + 1);
|
||||
double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
|
||||
size_t buffer_index = subframe_index * kNumSubframeSamples;
|
||||
|
||||
for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)
|
||||
windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];
|
||||
|
||||
WebRtcIsac_AutoCorr(corr, windowed_audio,
|
||||
kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);
|
||||
}
|
||||
|
||||
// Compute `kNum10msSubframes` sets of LPC coefficients, one per 10 ms input.
|
||||
// The analysis window is 15 ms long and it is centered on the first half of
|
||||
// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
|
||||
// first half of each 10 ms subframe.
|
||||
void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) {
|
||||
RTC_DCHECK_GE(length_lpc, kNum10msSubframes * (kLpcOrder + 1));
|
||||
double corr[kLpcOrder + 1];
|
||||
double reflec_coeff[kLpcOrder];
|
||||
for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes;
|
||||
i++, offset_lpc += kLpcOrder + 1) {
|
||||
SubframeCorrelation(corr, kLpcOrder + 1, i);
|
||||
corr[0] *= 1.0001;
|
||||
// This makes Lev-Durb a bit more stable.
|
||||
for (size_t k = 0; k < kLpcOrder + 1; k++) {
|
||||
corr[k] *= kCorrWeight[k];
|
||||
}
|
||||
WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);
|
||||
}
|
||||
}
|
||||
|
||||
// Fit a second order curve to these 3 points and find the location of the
|
||||
// extremum. The points are inverted before curve fitting.
|
||||
static float QuadraticInterpolation(float prev_val,
|
||||
float curr_val,
|
||||
float next_val) {
|
||||
// Doing the interpolation in |1 / A(z)|^2.
|
||||
float fractional_index = 0;
|
||||
next_val = 1.0f / next_val;
|
||||
prev_val = 1.0f / prev_val;
|
||||
curr_val = 1.0f / curr_val;
|
||||
|
||||
fractional_index =
|
||||
-(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);
|
||||
RTC_DCHECK_LT(fabs(fractional_index), 1);
|
||||
return fractional_index;
|
||||
}
|
||||
|
||||
// 1 / A(z), where A(z) is defined by `lpc` is a model of the spectral envelope
|
||||
// of the input signal. The local maximum of the spectral envelope corresponds
|
||||
// with the local minimum of A(z). It saves complexity, as we save one
|
||||
// inversion. Furthermore, we find the first local maximum of magnitude squared,
|
||||
// to save on one square root.
|
||||
void VadAudioProc::FindFirstSpectralPeaks(double* f_peak,
|
||||
size_t length_f_peak) {
|
||||
RTC_DCHECK_GE(length_f_peak, kNum10msSubframes);
|
||||
double lpc[kNum10msSubframes * (kLpcOrder + 1)];
|
||||
// For all sub-frames.
|
||||
GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
|
||||
|
||||
const size_t kNumDftCoefficients = kDftSize / 2 + 1;
|
||||
float data[kDftSize];
|
||||
|
||||
for (size_t i = 0; i < kNum10msSubframes; i++) {
|
||||
// Convert to float with zero pad.
|
||||
memset(data, 0, sizeof(data));
|
||||
for (size_t n = 0; n < kLpcOrder + 1; n++) {
|
||||
data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]);
|
||||
}
|
||||
// Transform to frequency domain.
|
||||
WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
|
||||
|
||||
size_t index_peak = 0;
|
||||
float prev_magn_sqr = data[0] * data[0];
|
||||
float curr_magn_sqr = data[2] * data[2] + data[3] * data[3];
|
||||
float next_magn_sqr;
|
||||
bool found_peak = false;
|
||||
for (size_t n = 2; n < kNumDftCoefficients - 1; n++) {
|
||||
next_magn_sqr =
|
||||
data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1];
|
||||
if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
|
||||
found_peak = true;
|
||||
index_peak = n - 1;
|
||||
break;
|
||||
}
|
||||
prev_magn_sqr = curr_magn_sqr;
|
||||
curr_magn_sqr = next_magn_sqr;
|
||||
}
|
||||
float fractional_index = 0;
|
||||
if (!found_peak) {
|
||||
// Checking if |kNumDftCoefficients - 1| is the local minimum.
|
||||
next_magn_sqr = data[1] * data[1];
|
||||
if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
|
||||
index_peak = kNumDftCoefficients - 1;
|
||||
}
|
||||
} else {
|
||||
// A peak is found, do a simple quadratic interpolation to get a more
|
||||
// accurate estimate of the peak location.
|
||||
fractional_index =
|
||||
QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr);
|
||||
}
|
||||
f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;
|
||||
}
|
||||
}
|
||||
|
||||
// Using iSAC functions to estimate pitch gains & lags.
|
||||
void VadAudioProc::PitchAnalysis(double* log_pitch_gains,
|
||||
double* pitch_lags_hz,
|
||||
size_t length) {
|
||||
// TODO(turajs): This can be "imported" from iSAC & and the next two
|
||||
// constants.
|
||||
RTC_DCHECK_GE(length, kNum10msSubframes);
|
||||
const int kNumPitchSubframes = 4;
|
||||
double gains[kNumPitchSubframes];
|
||||
double lags[kNumPitchSubframes];
|
||||
|
||||
const int kNumSubbandFrameSamples = 240;
|
||||
const int kNumLookaheadSamples = 24;
|
||||
|
||||
float lower[kNumSubbandFrameSamples];
|
||||
float upper[kNumSubbandFrameSamples];
|
||||
double lower_lookahead[kNumSubbandFrameSamples];
|
||||
double upper_lookahead[kNumSubbandFrameSamples];
|
||||
double lower_lookahead_pre_filter[kNumSubbandFrameSamples +
|
||||
kNumLookaheadSamples];
|
||||
|
||||
// Split signal to lower and upper bands
|
||||
WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower,
|
||||
upper, lower_lookahead, upper_lookahead,
|
||||
pre_filter_handle_.get());
|
||||
WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,
|
||||
pitch_analysis_handle_.get(), lags, gains);
|
||||
|
||||
// Lags are computed on lower-band signal with sampling rate half of the
|
||||
// input signal.
|
||||
GetSubframesPitchParameters(
|
||||
kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes,
|
||||
&log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz);
|
||||
}
|
||||
|
||||
void VadAudioProc::Rms(double* rms, size_t length_rms) {
|
||||
RTC_DCHECK_GE(length_rms, kNum10msSubframes);
|
||||
size_t offset = kNumPastSignalSamples;
|
||||
for (size_t i = 0; i < kNum10msSubframes; i++) {
|
||||
rms[i] = 0;
|
||||
for (size_t n = 0; n < kNumSubframeSamples; n++, offset++)
|
||||
rms[i] += audio_buffer_[offset] * audio_buffer_[offset];
|
||||
rms[i] = sqrt(rms[i] / kNumSubframeSamples);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
88
VocieProcess/modules/audio_processing/vad/vad_audio_proc.h
Normal file
88
VocieProcess/modules/audio_processing/vad/vad_audio_proc.h
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR...
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class PoleZeroFilter;
|
||||
|
||||
class VadAudioProc {
|
||||
public:
|
||||
// Forward declare iSAC structs.
|
||||
struct PitchAnalysisStruct;
|
||||
struct PreFiltBankstr;
|
||||
|
||||
VadAudioProc();
|
||||
~VadAudioProc();
|
||||
|
||||
int ExtractFeatures(const int16_t* audio_frame,
|
||||
size_t length,
|
||||
AudioFeatures* audio_features);
|
||||
|
||||
static constexpr size_t kDftSize = 512;
|
||||
|
||||
private:
|
||||
void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
|
||||
void SubframeCorrelation(double* corr,
|
||||
size_t length_corr,
|
||||
size_t subframe_index);
|
||||
void GetLpcPolynomials(double* lpc, size_t length_lpc);
|
||||
void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
|
||||
void Rms(double* rms, size_t length_rms);
|
||||
void ResetBuffer();
|
||||
|
||||
// To compute spectral peak we perform LPC analysis to get spectral envelope.
|
||||
// For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
|
||||
// LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
|
||||
// we need 5 ms of past signal to create the input of LPC analysis.
|
||||
static constexpr size_t kNumPastSignalSamples = size_t{kSampleRateHz / 200};
|
||||
|
||||
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
|
||||
// all the code recognize it as "no-error."
|
||||
static constexpr int kNoError = 0;
|
||||
|
||||
static constexpr size_t kNum10msSubframes = 3;
|
||||
static constexpr size_t kNumSubframeSamples = size_t{kSampleRateHz / 100};
|
||||
// Samples in 30 ms @ given sampling rate.
|
||||
static constexpr size_t kNumSamplesToProcess =
|
||||
kNum10msSubframes * kNumSubframeSamples;
|
||||
static constexpr size_t kBufferLength =
|
||||
kNumPastSignalSamples + kNumSamplesToProcess;
|
||||
static constexpr size_t kIpLength = kDftSize >> 1;
|
||||
static constexpr size_t kWLength = kDftSize >> 1;
|
||||
static constexpr size_t kLpcOrder = 16;
|
||||
|
||||
size_t ip_[kIpLength];
|
||||
float w_fft_[kWLength];
|
||||
|
||||
// A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
|
||||
float audio_buffer_[kBufferLength];
|
||||
size_t num_buffer_samples_;
|
||||
|
||||
double log_old_gain_;
|
||||
double old_lag_;
|
||||
|
||||
std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
|
||||
std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
|
||||
std::unique_ptr<PoleZeroFilter> high_pass_filter_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
|
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// These values should match MATLAB counterparts for unit-tests to pass.
|
||||
static const double kCorrWeight[] = {
|
||||
1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217,
|
||||
0.913308, 0.899609, 0.886115, 0.872823, 0.859730, 0.846834,
|
||||
0.834132, 0.821620, 0.809296, 0.797156, 0.785199};
|
||||
|
||||
static const double kLpcAnalWin[] = {
|
||||
0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639,
|
||||
0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883,
|
||||
0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547,
|
||||
0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438,
|
||||
0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222,
|
||||
0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713,
|
||||
0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164,
|
||||
0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546,
|
||||
0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810,
|
||||
0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148,
|
||||
0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233,
|
||||
0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442,
|
||||
0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069,
|
||||
0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512,
|
||||
0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447,
|
||||
0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979,
|
||||
0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773,
|
||||
0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158,
|
||||
0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215,
|
||||
0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840,
|
||||
0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778,
|
||||
0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639,
|
||||
0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889,
|
||||
0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814,
|
||||
0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465,
|
||||
0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574,
|
||||
0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451,
|
||||
0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858,
|
||||
0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862,
|
||||
0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664,
|
||||
0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416,
|
||||
0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008,
|
||||
0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853,
|
||||
0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642,
|
||||
0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093,
|
||||
0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687,
|
||||
0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387,
|
||||
0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358,
|
||||
0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670,
|
||||
0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000};
|
||||
|
||||
static const size_t kFilterOrder = 2;
|
||||
static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f,
|
||||
0.974827f};
|
||||
static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f,
|
||||
0.972457f};
|
||||
|
||||
static_assert(kFilterOrder + 1 ==
|
||||
sizeof(kCoeffNumerator) / sizeof(kCoeffNumerator[0]),
|
||||
"numerator coefficients incorrect size");
|
||||
static_assert(kFilterOrder + 1 ==
|
||||
sizeof(kCoeffDenominator) / sizeof(kCoeffDenominator[0]),
|
||||
"denominator coefficients incorrect size");
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_
|
135
VocieProcess/modules/audio_processing/vad/vad_circular_buffer.cc
Normal file
135
VocieProcess/modules/audio_processing/vad/vad_circular_buffer.cc
Normal file
@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/vad/vad_circular_buffer.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
VadCircularBuffer::VadCircularBuffer(int buffer_size)
|
||||
: buffer_(new double[buffer_size]),
|
||||
is_full_(false),
|
||||
index_(0),
|
||||
buffer_size_(buffer_size),
|
||||
sum_(0) {}
|
||||
|
||||
VadCircularBuffer::~VadCircularBuffer() {}
|
||||
|
||||
void VadCircularBuffer::Reset() {
|
||||
is_full_ = false;
|
||||
index_ = 0;
|
||||
sum_ = 0;
|
||||
}
|
||||
|
||||
VadCircularBuffer* VadCircularBuffer::Create(int buffer_size) {
|
||||
if (buffer_size <= 0)
|
||||
return NULL;
|
||||
return new VadCircularBuffer(buffer_size);
|
||||
}
|
||||
|
||||
double VadCircularBuffer::Oldest() const {
|
||||
if (!is_full_)
|
||||
return buffer_[0];
|
||||
else
|
||||
return buffer_[index_];
|
||||
}
|
||||
|
||||
double VadCircularBuffer::Mean() {
|
||||
double m;
|
||||
if (is_full_) {
|
||||
m = sum_ / buffer_size_;
|
||||
} else {
|
||||
if (index_ > 0)
|
||||
m = sum_ / index_;
|
||||
else
|
||||
m = 0;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
void VadCircularBuffer::Insert(double value) {
|
||||
if (is_full_) {
|
||||
sum_ -= buffer_[index_];
|
||||
}
|
||||
sum_ += value;
|
||||
buffer_[index_] = value;
|
||||
index_++;
|
||||
if (index_ >= buffer_size_) {
|
||||
is_full_ = true;
|
||||
index_ = 0;
|
||||
}
|
||||
}
|
||||
int VadCircularBuffer::BufferLevel() {
|
||||
if (is_full_)
|
||||
return buffer_size_;
|
||||
return index_;
|
||||
}
|
||||
|
||||
int VadCircularBuffer::Get(int index, double* value) const {
|
||||
int err = ConvertToLinearIndex(&index);
|
||||
if (err < 0)
|
||||
return -1;
|
||||
*value = buffer_[index];
|
||||
return 0;
|
||||
}
|
||||
|
||||
int VadCircularBuffer::Set(int index, double value) {
|
||||
int err = ConvertToLinearIndex(&index);
|
||||
if (err < 0)
|
||||
return -1;
|
||||
|
||||
sum_ -= buffer_[index];
|
||||
buffer_[index] = value;
|
||||
sum_ += value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int VadCircularBuffer::ConvertToLinearIndex(int* index) const {
|
||||
if (*index < 0 || *index >= buffer_size_)
|
||||
return -1;
|
||||
|
||||
if (!is_full_ && *index >= index_)
|
||||
return -1;
|
||||
|
||||
*index = index_ - 1 - *index;
|
||||
if (*index < 0)
|
||||
*index += buffer_size_;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int VadCircularBuffer::RemoveTransient(int width_threshold,
|
||||
double val_threshold) {
|
||||
if (!is_full_ && index_ < width_threshold + 2)
|
||||
return 0;
|
||||
|
||||
int index_1 = 0;
|
||||
int index_2 = width_threshold + 1;
|
||||
double v = 0;
|
||||
if (Get(index_1, &v) < 0)
|
||||
return -1;
|
||||
if (v < val_threshold) {
|
||||
Set(index_1, 0);
|
||||
int index;
|
||||
for (index = index_2; index > index_1; index--) {
|
||||
if (Get(index, &v) < 0)
|
||||
return -1;
|
||||
if (v < val_threshold)
|
||||
break;
|
||||
}
|
||||
for (; index > index_1; index--) {
|
||||
if (Set(index, 0.0) < 0)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A circular buffer tailored to the need of this project. It stores last
|
||||
// K samples of the input, and keeps track of the mean of the last samples.
|
||||
//
|
||||
// It is used in class "PitchBasedActivity" to keep track of posterior
|
||||
// probabilities in the past few seconds. The posterior probabilities are used
|
||||
// to recursively update prior probabilities.
|
||||
class VadCircularBuffer {
|
||||
public:
|
||||
static VadCircularBuffer* Create(int buffer_size);
|
||||
~VadCircularBuffer();
|
||||
|
||||
// If buffer is wrapped around.
|
||||
bool is_full() const { return is_full_; }
|
||||
// Get the oldest entry in the buffer.
|
||||
double Oldest() const;
|
||||
// Insert new value into the buffer.
|
||||
void Insert(double value);
|
||||
// Reset buffer, forget the past, start fresh.
|
||||
void Reset();
|
||||
|
||||
// The mean value of the elements in the buffer. The return value is zero if
|
||||
// buffer is empty, i.e. no value is inserted.
|
||||
double Mean();
|
||||
// Remove transients. If the values exceed `val_threshold` for a period
|
||||
// shorter then or equal to `width_threshold`, then that period is considered
|
||||
// transient and set to zero.
|
||||
int RemoveTransient(int width_threshold, double val_threshold);
|
||||
|
||||
private:
|
||||
explicit VadCircularBuffer(int buffer_size);
|
||||
// Get previous values. |index = 0| corresponds to the most recent
|
||||
// insertion. |index = 1| is the one before the most recent insertion, and
|
||||
// so on.
|
||||
int Get(int index, double* value) const;
|
||||
// Set a given position to `value`. `index` is interpreted as above.
|
||||
int Set(int index, double value);
|
||||
// Return the number of valid elements in the buffer.
|
||||
int BufferLevel();
|
||||
|
||||
// Convert an index with the interpretation as get() method to the
|
||||
// corresponding linear index.
|
||||
int ConvertToLinearIndex(int* index) const;
|
||||
|
||||
std::unique_ptr<double[]> buffer_;
|
||||
bool is_full_;
|
||||
int index_;
|
||||
int buffer_size_;
|
||||
double sum_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
|
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/vad/voice_activity_detector.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const size_t kNumChannels = 1;
|
||||
|
||||
const double kDefaultVoiceValue = 1.0;
|
||||
const double kNeutralProbability = 0.5;
|
||||
const double kLowProbability = 0.01;
|
||||
|
||||
} // namespace
|
||||
|
||||
VoiceActivityDetector::VoiceActivityDetector()
|
||||
: last_voice_probability_(kDefaultVoiceValue),
|
||||
standalone_vad_(StandaloneVad::Create()) {}
|
||||
|
||||
VoiceActivityDetector::~VoiceActivityDetector() = default;
|
||||
|
||||
// Because ISAC has a different chunk length, it updates
|
||||
// `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data.
|
||||
// Otherwise it clears them.
|
||||
void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
|
||||
size_t length,
|
||||
int sample_rate_hz) {
|
||||
RTC_DCHECK_EQ(length, sample_rate_hz / 100);
|
||||
// TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio.
|
||||
// Resample to the required rate.
|
||||
const int16_t* resampled_ptr = audio;
|
||||
if (sample_rate_hz != kSampleRateHz) {
|
||||
RTC_CHECK_EQ(
|
||||
resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
|
||||
0);
|
||||
resampler_.Push(audio, length, resampled_, kLength10Ms, length);
|
||||
resampled_ptr = resampled_;
|
||||
}
|
||||
RTC_DCHECK_EQ(length, kLength10Ms);
|
||||
|
||||
// Each chunk needs to be passed into `standalone_vad_`, because internally it
|
||||
// buffers the audio and processes it all at once when GetActivity() is
|
||||
// called.
|
||||
RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
|
||||
|
||||
audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
|
||||
|
||||
chunkwise_voice_probabilities_.resize(features_.num_frames);
|
||||
chunkwise_rms_.resize(features_.num_frames);
|
||||
std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
|
||||
chunkwise_rms_.begin());
|
||||
if (features_.num_frames > 0) {
|
||||
if (features_.silence) {
|
||||
// The other features are invalid, so set the voice probabilities to an
|
||||
// arbitrary low value.
|
||||
std::fill(chunkwise_voice_probabilities_.begin(),
|
||||
chunkwise_voice_probabilities_.end(), kLowProbability);
|
||||
} else {
|
||||
std::fill(chunkwise_voice_probabilities_.begin(),
|
||||
chunkwise_voice_probabilities_.end(), kNeutralProbability);
|
||||
RTC_CHECK_GE(
|
||||
standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
|
||||
chunkwise_voice_probabilities_.size()),
|
||||
0);
|
||||
RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
|
||||
features_, &chunkwise_voice_probabilities_[0]),
|
||||
0);
|
||||
}
|
||||
last_voice_probability_ = chunkwise_voice_probabilities_.back();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common_audio/resampler/include/resampler.h"
|
||||
#include "modules/audio_processing/vad/common.h"
|
||||
#include "modules/audio_processing/vad/pitch_based_vad.h"
|
||||
#include "modules/audio_processing/vad/standalone_vad.h"
|
||||
#include "modules/audio_processing/vad/vad_audio_proc.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A Voice Activity Detector (VAD) that combines the voice probability from the
|
||||
// StandaloneVad and PitchBasedVad to get a more robust estimation.
|
||||
class VoiceActivityDetector {
|
||||
public:
|
||||
VoiceActivityDetector();
|
||||
~VoiceActivityDetector();
|
||||
|
||||
// Processes each audio chunk and estimates the voice probability.
|
||||
// TODO(bugs.webrtc.org/7494): Switch to rtc::ArrayView and remove
|
||||
// `sample_rate_hz`.
|
||||
void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);
|
||||
|
||||
// Returns a vector of voice probabilities for each chunk. It can be empty for
|
||||
// some chunks, but it catches up afterwards returning multiple values at
|
||||
// once.
|
||||
const std::vector<double>& chunkwise_voice_probabilities() const {
|
||||
return chunkwise_voice_probabilities_;
|
||||
}
|
||||
|
||||
// Returns a vector of RMS values for each chunk. It has the same length as
|
||||
// chunkwise_voice_probabilities().
|
||||
const std::vector<double>& chunkwise_rms() const { return chunkwise_rms_; }
|
||||
|
||||
// Returns the last voice probability, regardless of the internal
|
||||
// implementation, although it has a few chunks of delay.
|
||||
float last_voice_probability() const { return last_voice_probability_; }
|
||||
|
||||
private:
|
||||
// TODO(aluebs): Change these to float.
|
||||
std::vector<double> chunkwise_voice_probabilities_;
|
||||
std::vector<double> chunkwise_rms_;
|
||||
|
||||
float last_voice_probability_;
|
||||
|
||||
Resampler resampler_;
|
||||
VadAudioProc audio_processing_;
|
||||
|
||||
std::unique_ptr<StandaloneVad> standalone_vad_;
|
||||
PitchBasedVad pitch_based_vad_;
|
||||
|
||||
int16_t resampled_[kLength10Ms];
|
||||
AudioFeatures features_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
|
77
VocieProcess/modules/audio_processing/vad/voice_gmm_tables.h
Normal file
77
VocieProcess/modules/audio_processing/vad/voice_gmm_tables.h
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// GMM tables for active segments. Generated by MakeGmmTables.m.
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
|
||||
|
||||
static const int kVoiceGmmNumMixtures = 12;
|
||||
static const int kVoiceGmmDim = 3;
|
||||
|
||||
static const double
|
||||
kVoiceGmmCovarInverse[kVoiceGmmNumMixtures][kVoiceGmmDim][kVoiceGmmDim] = {
|
||||
{{1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03},
|
||||
{-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04},
|
||||
{4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}},
|
||||
{{6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03},
|
||||
{-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05},
|
||||
{-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}},
|
||||
{{9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03},
|
||||
{-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05},
|
||||
{-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}},
|
||||
{{3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02},
|
||||
{-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05},
|
||||
{-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}},
|
||||
{{1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02},
|
||||
{-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05},
|
||||
{-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}},
|
||||
{{1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02},
|
||||
{-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06},
|
||||
{-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}},
|
||||
{{8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02},
|
||||
{-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06},
|
||||
{-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}},
|
||||
{{2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04},
|
||||
{-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06},
|
||||
{7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}},
|
||||
{{3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02},
|
||||
{1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05},
|
||||
{-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}},
|
||||
{{6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04},
|
||||
{-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06},
|
||||
{-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}},
|
||||
{{2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03},
|
||||
{-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05},
|
||||
{-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}},
|
||||
{{1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02},
|
||||
{-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05},
|
||||
{-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}};
|
||||
|
||||
static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = {
|
||||
{-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02},
|
||||
{-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02},
|
||||
{-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02},
|
||||
{-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02},
|
||||
{-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02},
|
||||
{-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02},
|
||||
{-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02},
|
||||
{-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02},
|
||||
{-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02},
|
||||
{-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02},
|
||||
{-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02},
|
||||
{-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}};
|
||||
|
||||
static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = {
|
||||
-1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01,
|
||||
-1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01,
|
||||
-1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01,
|
||||
-1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00};
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
|
942
VocieProcess/modules/third_party/fft/fft.c
vendored
Normal file
942
VocieProcess/modules/third_party/fft/fft.c
vendored
Normal file
@ -0,0 +1,942 @@
|
||||
/*
|
||||
* Copyright(c)1995,97 Mark Olesen <olesen@me.QueensU.CA>
|
||||
* Queen's Univ at Kingston (Canada)
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for
|
||||
* any purpose without fee is hereby granted, provided that this
|
||||
* entire notice is included in all copies of any software which is
|
||||
* or includes a copy or modification of this software and in all
|
||||
* copies of the supporting documentation for such software.
|
||||
*
|
||||
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
|
||||
* IMPLIED WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR QUEEN'S
|
||||
* UNIVERSITY AT KINGSTON MAKES ANY REPRESENTATION OR WARRANTY OF ANY
|
||||
* KIND CONCERNING THE MERCHANTABILITY OF THIS SOFTWARE OR ITS
|
||||
* FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||
*
|
||||
* All of which is to say that you can do what you like with this
|
||||
* source code provided you don't try to sell it as your own and you
|
||||
* include an unaltered copy of this message (including the
|
||||
* copyright).
|
||||
*
|
||||
* It is also implicitly understood that bug fixes and improvements
|
||||
* should make their way back to the general Internet community so
|
||||
* that everyone benefits.
|
||||
*
|
||||
* Changes:
|
||||
* Trivial type modifications by the WebRTC authors.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* File:
|
||||
* WebRtcIsac_Fftn.c
|
||||
*
|
||||
* Public:
|
||||
* WebRtcIsac_Fftn / fftnf ();
|
||||
*
|
||||
* Private:
|
||||
* WebRtcIsac_Fftradix / fftradixf ();
|
||||
*
|
||||
* Descript:
|
||||
* multivariate complex Fourier transform, computed in place
|
||||
* using mixed-radix Fast Fourier Transform algorithm.
|
||||
*
|
||||
* Fortran code by:
|
||||
* RC Singleton, Stanford Research Institute, Sept. 1968
|
||||
*
|
||||
* translated by f2c (version 19950721).
|
||||
*
|
||||
* int WebRtcIsac_Fftn (int ndim, const int dims[], REAL Re[], REAL Im[],
|
||||
* int iSign, double scaling);
|
||||
*
|
||||
* NDIM = the total number dimensions
|
||||
* DIMS = a vector of array sizes
|
||||
* if NDIM is zero then DIMS must be zero-terminated
|
||||
*
|
||||
* RE and IM hold the real and imaginary components of the data, and return
|
||||
* the resulting real and imaginary Fourier coefficients. Multidimensional
|
||||
* data *must* be allocated contiguously. There is no limit on the number
|
||||
* of dimensions.
|
||||
*
|
||||
* ISIGN = the sign of the complex exponential (ie, forward or inverse FFT)
|
||||
* the magnitude of ISIGN (normally 1) is used to determine the
|
||||
* correct indexing increment (see below).
|
||||
*
|
||||
* SCALING = normalizing constant by which the final result is *divided*
|
||||
* if SCALING == -1, normalize by total dimension of the transform
|
||||
* if SCALING < -1, normalize by the square-root of the total dimension
|
||||
*
|
||||
* example:
|
||||
* tri-variate transform with Re[n1][n2][n3], Im[n1][n2][n3]
|
||||
*
|
||||
* int dims[3] = {n1,n2,n3}
|
||||
* WebRtcIsac_Fftn (3, dims, Re, Im, 1, scaling);
|
||||
*
|
||||
*-----------------------------------------------------------------------*
|
||||
* int WebRtcIsac_Fftradix (REAL Re[], REAL Im[], size_t nTotal, size_t nPass,
|
||||
* size_t nSpan, int iSign, size_t max_factors,
|
||||
* size_t max_perm);
|
||||
*
|
||||
* RE, IM - see above documentation
|
||||
*
|
||||
* Although there is no limit on the number of dimensions, WebRtcIsac_Fftradix() must
|
||||
* be called once for each dimension, but the calls may be in any order.
|
||||
*
|
||||
* NTOTAL = the total number of complex data values
|
||||
* NPASS = the dimension of the current variable
|
||||
* NSPAN/NPASS = the spacing of consecutive data values while indexing the
|
||||
* current variable
|
||||
* ISIGN - see above documentation
|
||||
*
|
||||
* example:
|
||||
* tri-variate transform with Re[n1][n2][n3], Im[n1][n2][n3]
|
||||
*
|
||||
* WebRtcIsac_Fftradix (Re, Im, n1*n2*n3, n1, n1, 1, maxf, maxp);
|
||||
* WebRtcIsac_Fftradix (Re, Im, n1*n2*n3, n2, n1*n2, 1, maxf, maxp);
|
||||
* WebRtcIsac_Fftradix (Re, Im, n1*n2*n3, n3, n1*n2*n3, 1, maxf, maxp);
|
||||
*
|
||||
* single-variate transform,
|
||||
* NTOTAL = N = NSPAN = (number of complex data values),
|
||||
*
|
||||
* WebRtcIsac_Fftradix (Re, Im, n, n, n, 1, maxf, maxp);
|
||||
*
|
||||
* The data can also be stored in a single array with alternating real and
|
||||
* imaginary parts, the magnitude of ISIGN is changed to 2 to give correct
|
||||
* indexing increment, and data [0] and data [1] used to pass the initial
|
||||
* addresses for the sequences of real and imaginary values,
|
||||
*
|
||||
* example:
|
||||
* REAL data [2*NTOTAL];
|
||||
* WebRtcIsac_Fftradix ( &data[0], &data[1], NTOTAL, nPass, nSpan, 2, maxf, maxp);
|
||||
*
|
||||
* for temporary allocation:
|
||||
*
|
||||
* MAX_FACTORS >= the maximum prime factor of NPASS
|
||||
* MAX_PERM >= the number of prime factors of NPASS. In addition,
|
||||
* if the square-free portion K of NPASS has two or more prime
|
||||
* factors, then MAX_PERM >= (K-1)
|
||||
*
|
||||
* storage in FACTOR for a maximum of 15 prime factors of NPASS. if NPASS
|
||||
* has more than one square-free factor, the product of the square-free
|
||||
* factors must be <= 210 array storage for maximum prime factor of 23 the
|
||||
* following two constants should agree with the array dimensions.
|
||||
*
|
||||
*----------------------------------------------------------------------*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "modules/third_party/fft/fft.h"
|
||||
|
||||
/* double precision routine */
|
||||
static int
|
||||
WebRtcIsac_Fftradix (double Re[], double Im[],
|
||||
size_t nTotal, size_t nPass, size_t nSpan, int isign,
|
||||
int max_factors, unsigned int max_perm,
|
||||
FFTstr *fftstate);
|
||||
|
||||
|
||||
|
||||
#ifndef M_PI
|
||||
# define M_PI 3.14159265358979323846264338327950288
|
||||
#endif
|
||||
|
||||
#ifndef SIN60
|
||||
# define SIN60 0.86602540378443865 /* sin(60 deg) */
|
||||
# define COS72 0.30901699437494742 /* cos(72 deg) */
|
||||
# define SIN72 0.95105651629515357 /* sin(72 deg) */
|
||||
#endif
|
||||
|
||||
# define REAL double
|
||||
# define FFTN WebRtcIsac_Fftn
|
||||
# define FFTNS "fftn"
|
||||
# define FFTRADIX WebRtcIsac_Fftradix
|
||||
# define FFTRADIXS "fftradix"
|
||||
|
||||
|
||||
int WebRtcIsac_Fftns(unsigned int ndim, const int dims[],
|
||||
double Re[],
|
||||
double Im[],
|
||||
int iSign,
|
||||
double scaling,
|
||||
FFTstr *fftstate)
|
||||
{
|
||||
|
||||
size_t nSpan, nPass, nTotal;
|
||||
unsigned int i;
|
||||
int ret, max_factors, max_perm;
|
||||
|
||||
/*
|
||||
* tally the number of elements in the data array
|
||||
* and determine the number of dimensions
|
||||
*/
|
||||
nTotal = 1;
|
||||
if (ndim && dims [0])
|
||||
{
|
||||
for (i = 0; i < ndim; i++)
|
||||
{
|
||||
if (dims [i] <= 0)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
nTotal *= dims [i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ndim = 0;
|
||||
for (i = 0; dims [i]; i++)
|
||||
{
|
||||
if (dims [i] <= 0)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
nTotal *= dims [i];
|
||||
ndim++;
|
||||
}
|
||||
}
|
||||
|
||||
/* determine maximum number of factors and permuations */
|
||||
#if 1
|
||||
/*
|
||||
* follow John Beale's example, just use the largest dimension and don't
|
||||
* worry about excess allocation. May be someone else will do it?
|
||||
*/
|
||||
max_factors = max_perm = 1;
|
||||
for (i = 0; i < ndim; i++)
|
||||
{
|
||||
nSpan = dims [i];
|
||||
if ((int)nSpan > max_factors)
|
||||
{
|
||||
max_factors = (int)nSpan;
|
||||
}
|
||||
if ((int)nSpan > max_perm)
|
||||
{
|
||||
max_perm = (int)nSpan;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* use the constants used in the original Fortran code */
|
||||
max_factors = 23;
|
||||
max_perm = 209;
|
||||
#endif
|
||||
/* loop over the dimensions: */
|
||||
nPass = 1;
|
||||
for (i = 0; i < ndim; i++)
|
||||
{
|
||||
nSpan = dims [i];
|
||||
nPass *= nSpan;
|
||||
ret = FFTRADIX (Re, Im, nTotal, nSpan, nPass, iSign,
|
||||
max_factors, max_perm, fftstate);
|
||||
/* exit, clean-up already done */
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Divide through by the normalizing constant: */
|
||||
if (scaling && scaling != 1.0)
|
||||
{
|
||||
if (iSign < 0) iSign = -iSign;
|
||||
if (scaling < 0.0)
|
||||
{
|
||||
scaling = (double)nTotal;
|
||||
if (scaling < -1.0)
|
||||
scaling = sqrt (scaling);
|
||||
}
|
||||
scaling = 1.0 / scaling; /* multiply is often faster */
|
||||
for (i = 0; i < nTotal; i += iSign)
|
||||
{
|
||||
Re [i] *= scaling;
|
||||
Im [i] *= scaling;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* singleton's mixed radix routine
|
||||
*
|
||||
* could move allocation out to WebRtcIsac_Fftn(), but leave it here so that it's
|
||||
* possible to make this a standalone function
|
||||
*/
|
||||
|
||||
static int FFTRADIX (REAL Re[],
|
||||
REAL Im[],
|
||||
size_t nTotal,
|
||||
size_t nPass,
|
||||
size_t nSpan,
|
||||
int iSign,
|
||||
int max_factors,
|
||||
unsigned int max_perm,
|
||||
FFTstr *fftstate)
|
||||
{
|
||||
int ii, mfactor, kspan, ispan, inc;
|
||||
int j, jc, jf, jj, k, k1, k2, k3, k4, kk, kt, nn, ns, nt;
|
||||
|
||||
|
||||
REAL radf;
|
||||
REAL c1, c2, c3, cd, aa, aj, ak, ajm, ajp, akm, akp;
|
||||
REAL s1, s2, s3, sd, bb, bj, bk, bjm, bjp, bkm, bkp;
|
||||
|
||||
REAL *Rtmp = NULL; /* temp space for real part*/
|
||||
REAL *Itmp = NULL; /* temp space for imaginary part */
|
||||
REAL *Cos = NULL; /* Cosine values */
|
||||
REAL *Sin = NULL; /* Sine values */
|
||||
|
||||
REAL s60 = SIN60; /* sin(60 deg) */
|
||||
REAL c72 = COS72; /* cos(72 deg) */
|
||||
REAL s72 = SIN72; /* sin(72 deg) */
|
||||
REAL pi2 = M_PI; /* use PI first, 2 PI later */
|
||||
|
||||
|
||||
fftstate->SpaceAlloced = 0;
|
||||
fftstate->MaxPermAlloced = 0;
|
||||
|
||||
|
||||
// initialize to avoid warnings
|
||||
k3 = c2 = c3 = s2 = s3 = 0.0;
|
||||
|
||||
if (nPass < 2)
|
||||
return 0;
|
||||
|
||||
/* allocate storage */
|
||||
if (fftstate->SpaceAlloced < max_factors * sizeof (REAL))
|
||||
{
|
||||
#ifdef SUN_BROKEN_REALLOC
|
||||
if (!fftstate->SpaceAlloced) /* first time */
|
||||
{
|
||||
fftstate->SpaceAlloced = max_factors * sizeof (REAL);
|
||||
}
|
||||
else
|
||||
{
|
||||
#endif
|
||||
fftstate->SpaceAlloced = max_factors * sizeof (REAL);
|
||||
#ifdef SUN_BROKEN_REALLOC
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
/* allow full use of alloc'd space */
|
||||
max_factors = fftstate->SpaceAlloced / sizeof (REAL);
|
||||
}
|
||||
if (fftstate->MaxPermAlloced < max_perm)
|
||||
{
|
||||
#ifdef SUN_BROKEN_REALLOC
|
||||
if (!fftstate->MaxPermAlloced) /* first time */
|
||||
else
|
||||
#endif
|
||||
fftstate->MaxPermAlloced = max_perm;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* allow full use of alloc'd space */
|
||||
max_perm = fftstate->MaxPermAlloced;
|
||||
}
|
||||
|
||||
/* assign pointers */
|
||||
Rtmp = (REAL *) fftstate->Tmp0;
|
||||
Itmp = (REAL *) fftstate->Tmp1;
|
||||
Cos = (REAL *) fftstate->Tmp2;
|
||||
Sin = (REAL *) fftstate->Tmp3;
|
||||
|
||||
/*
|
||||
* Function Body
|
||||
*/
|
||||
inc = iSign;
|
||||
if (iSign < 0) {
|
||||
s72 = -s72;
|
||||
s60 = -s60;
|
||||
pi2 = -pi2;
|
||||
inc = -inc; /* absolute value */
|
||||
}
|
||||
|
||||
/* adjust for strange increments */
|
||||
nt = inc * (int)nTotal;
|
||||
ns = inc * (int)nSpan;
|
||||
kspan = ns;
|
||||
|
||||
nn = nt - inc;
|
||||
jc = ns / (int)nPass;
|
||||
radf = pi2 * (double) jc;
|
||||
pi2 *= 2.0; /* use 2 PI from here on */
|
||||
|
||||
ii = 0;
|
||||
jf = 0;
|
||||
/* determine the factors of n */
|
||||
mfactor = 0;
|
||||
k = (int)nPass;
|
||||
while (k % 16 == 0) {
|
||||
mfactor++;
|
||||
fftstate->factor [mfactor - 1] = 4;
|
||||
k /= 16;
|
||||
}
|
||||
j = 3;
|
||||
jj = 9;
|
||||
do {
|
||||
while (k % jj == 0) {
|
||||
mfactor++;
|
||||
fftstate->factor [mfactor - 1] = j;
|
||||
k /= jj;
|
||||
}
|
||||
j += 2;
|
||||
jj = j * j;
|
||||
} while (jj <= k);
|
||||
if (k <= 4) {
|
||||
kt = mfactor;
|
||||
fftstate->factor [mfactor] = k;
|
||||
if (k != 1)
|
||||
mfactor++;
|
||||
} else {
|
||||
if (k - (k / 4 << 2) == 0) {
|
||||
mfactor++;
|
||||
fftstate->factor [mfactor - 1] = 2;
|
||||
k /= 4;
|
||||
}
|
||||
kt = mfactor;
|
||||
j = 2;
|
||||
do {
|
||||
if (k % j == 0) {
|
||||
mfactor++;
|
||||
fftstate->factor [mfactor - 1] = j;
|
||||
k /= j;
|
||||
}
|
||||
j = ((j + 1) / 2 << 1) + 1;
|
||||
} while (j <= k);
|
||||
}
|
||||
if (kt) {
|
||||
j = kt;
|
||||
do {
|
||||
mfactor++;
|
||||
fftstate->factor [mfactor - 1] = fftstate->factor [j - 1];
|
||||
j--;
|
||||
} while (j);
|
||||
}
|
||||
|
||||
/* test that mfactors is in range */
|
||||
if (mfactor > FFT_NFACTOR)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* compute fourier transform */
|
||||
for (;;) {
|
||||
sd = radf / (double) kspan;
|
||||
cd = sin(sd);
|
||||
cd = 2.0 * cd * cd;
|
||||
sd = sin(sd + sd);
|
||||
kk = 0;
|
||||
ii++;
|
||||
|
||||
switch (fftstate->factor [ii - 1]) {
|
||||
case 2:
|
||||
/* transform for factor of 2 (including rotation factor) */
|
||||
kspan /= 2;
|
||||
k1 = kspan + 2;
|
||||
do {
|
||||
do {
|
||||
k2 = kk + kspan;
|
||||
ak = Re [k2];
|
||||
bk = Im [k2];
|
||||
Re [k2] = Re [kk] - ak;
|
||||
Im [k2] = Im [kk] - bk;
|
||||
Re [kk] += ak;
|
||||
Im [kk] += bk;
|
||||
kk = k2 + kspan;
|
||||
} while (kk < nn);
|
||||
kk -= nn;
|
||||
} while (kk < jc);
|
||||
if (kk >= kspan)
|
||||
goto Permute_Results_Label; /* exit infinite loop */
|
||||
do {
|
||||
c1 = 1.0 - cd;
|
||||
s1 = sd;
|
||||
do {
|
||||
do {
|
||||
do {
|
||||
k2 = kk + kspan;
|
||||
ak = Re [kk] - Re [k2];
|
||||
bk = Im [kk] - Im [k2];
|
||||
Re [kk] += Re [k2];
|
||||
Im [kk] += Im [k2];
|
||||
Re [k2] = c1 * ak - s1 * bk;
|
||||
Im [k2] = s1 * ak + c1 * bk;
|
||||
kk = k2 + kspan;
|
||||
} while (kk < (nt-1));
|
||||
k2 = kk - nt;
|
||||
c1 = -c1;
|
||||
kk = k1 - k2;
|
||||
} while (kk > k2);
|
||||
ak = c1 - (cd * c1 + sd * s1);
|
||||
s1 = sd * c1 - cd * s1 + s1;
|
||||
c1 = 2.0 - (ak * ak + s1 * s1);
|
||||
s1 *= c1;
|
||||
c1 *= ak;
|
||||
kk += jc;
|
||||
} while (kk < k2);
|
||||
k1 += inc + inc;
|
||||
kk = (k1 - kspan + 1) / 2 + jc - 1;
|
||||
} while (kk < (jc + jc));
|
||||
break;
|
||||
|
||||
case 4: /* transform for factor of 4 */
|
||||
ispan = kspan;
|
||||
kspan /= 4;
|
||||
|
||||
do {
|
||||
c1 = 1.0;
|
||||
s1 = 0.0;
|
||||
do {
|
||||
do {
|
||||
k1 = kk + kspan;
|
||||
k2 = k1 + kspan;
|
||||
k3 = k2 + kspan;
|
||||
akp = Re [kk] + Re [k2];
|
||||
akm = Re [kk] - Re [k2];
|
||||
ajp = Re [k1] + Re [k3];
|
||||
ajm = Re [k1] - Re [k3];
|
||||
bkp = Im [kk] + Im [k2];
|
||||
bkm = Im [kk] - Im [k2];
|
||||
bjp = Im [k1] + Im [k3];
|
||||
bjm = Im [k1] - Im [k3];
|
||||
Re [kk] = akp + ajp;
|
||||
Im [kk] = bkp + bjp;
|
||||
ajp = akp - ajp;
|
||||
bjp = bkp - bjp;
|
||||
if (iSign < 0) {
|
||||
akp = akm + bjm;
|
||||
bkp = bkm - ajm;
|
||||
akm -= bjm;
|
||||
bkm += ajm;
|
||||
} else {
|
||||
akp = akm - bjm;
|
||||
bkp = bkm + ajm;
|
||||
akm += bjm;
|
||||
bkm -= ajm;
|
||||
}
|
||||
/* avoid useless multiplies */
|
||||
if (s1 == 0.0) {
|
||||
Re [k1] = akp;
|
||||
Re [k2] = ajp;
|
||||
Re [k3] = akm;
|
||||
Im [k1] = bkp;
|
||||
Im [k2] = bjp;
|
||||
Im [k3] = bkm;
|
||||
} else {
|
||||
Re [k1] = akp * c1 - bkp * s1;
|
||||
Re [k2] = ajp * c2 - bjp * s2;
|
||||
Re [k3] = akm * c3 - bkm * s3;
|
||||
Im [k1] = akp * s1 + bkp * c1;
|
||||
Im [k2] = ajp * s2 + bjp * c2;
|
||||
Im [k3] = akm * s3 + bkm * c3;
|
||||
}
|
||||
kk = k3 + kspan;
|
||||
} while (kk < nt);
|
||||
|
||||
c2 = c1 - (cd * c1 + sd * s1);
|
||||
s1 = sd * c1 - cd * s1 + s1;
|
||||
c1 = 2.0 - (c2 * c2 + s1 * s1);
|
||||
s1 *= c1;
|
||||
c1 *= c2;
|
||||
/* values of c2, c3, s2, s3 that will get used next time */
|
||||
c2 = c1 * c1 - s1 * s1;
|
||||
s2 = 2.0 * c1 * s1;
|
||||
c3 = c2 * c1 - s2 * s1;
|
||||
s3 = c2 * s1 + s2 * c1;
|
||||
kk = kk - nt + jc;
|
||||
} while (kk < kspan);
|
||||
kk = kk - kspan + inc;
|
||||
} while (kk < jc);
|
||||
if (kspan == jc)
|
||||
goto Permute_Results_Label; /* exit infinite loop */
|
||||
break;
|
||||
|
||||
default:
|
||||
/* transform for odd factors */
|
||||
#ifdef FFT_RADIX4
|
||||
return -1;
|
||||
break;
|
||||
#else /* FFT_RADIX4 */
|
||||
k = fftstate->factor [ii - 1];
|
||||
ispan = kspan;
|
||||
kspan /= k;
|
||||
|
||||
switch (k) {
|
||||
case 3: /* transform for factor of 3 (optional code) */
|
||||
do {
|
||||
do {
|
||||
k1 = kk + kspan;
|
||||
k2 = k1 + kspan;
|
||||
ak = Re [kk];
|
||||
bk = Im [kk];
|
||||
aj = Re [k1] + Re [k2];
|
||||
bj = Im [k1] + Im [k2];
|
||||
Re [kk] = ak + aj;
|
||||
Im [kk] = bk + bj;
|
||||
ak -= 0.5 * aj;
|
||||
bk -= 0.5 * bj;
|
||||
aj = (Re [k1] - Re [k2]) * s60;
|
||||
bj = (Im [k1] - Im [k2]) * s60;
|
||||
Re [k1] = ak - bj;
|
||||
Re [k2] = ak + bj;
|
||||
Im [k1] = bk + aj;
|
||||
Im [k2] = bk - aj;
|
||||
kk = k2 + kspan;
|
||||
} while (kk < (nn - 1));
|
||||
kk -= nn;
|
||||
} while (kk < kspan);
|
||||
break;
|
||||
|
||||
case 5: /* transform for factor of 5 (optional code) */
|
||||
c2 = c72 * c72 - s72 * s72;
|
||||
s2 = 2.0 * c72 * s72;
|
||||
do {
|
||||
do {
|
||||
k1 = kk + kspan;
|
||||
k2 = k1 + kspan;
|
||||
k3 = k2 + kspan;
|
||||
k4 = k3 + kspan;
|
||||
akp = Re [k1] + Re [k4];
|
||||
akm = Re [k1] - Re [k4];
|
||||
bkp = Im [k1] + Im [k4];
|
||||
bkm = Im [k1] - Im [k4];
|
||||
ajp = Re [k2] + Re [k3];
|
||||
ajm = Re [k2] - Re [k3];
|
||||
bjp = Im [k2] + Im [k3];
|
||||
bjm = Im [k2] - Im [k3];
|
||||
aa = Re [kk];
|
||||
bb = Im [kk];
|
||||
Re [kk] = aa + akp + ajp;
|
||||
Im [kk] = bb + bkp + bjp;
|
||||
ak = akp * c72 + ajp * c2 + aa;
|
||||
bk = bkp * c72 + bjp * c2 + bb;
|
||||
aj = akm * s72 + ajm * s2;
|
||||
bj = bkm * s72 + bjm * s2;
|
||||
Re [k1] = ak - bj;
|
||||
Re [k4] = ak + bj;
|
||||
Im [k1] = bk + aj;
|
||||
Im [k4] = bk - aj;
|
||||
ak = akp * c2 + ajp * c72 + aa;
|
||||
bk = bkp * c2 + bjp * c72 + bb;
|
||||
aj = akm * s2 - ajm * s72;
|
||||
bj = bkm * s2 - bjm * s72;
|
||||
Re [k2] = ak - bj;
|
||||
Re [k3] = ak + bj;
|
||||
Im [k2] = bk + aj;
|
||||
Im [k3] = bk - aj;
|
||||
kk = k4 + kspan;
|
||||
} while (kk < (nn-1));
|
||||
kk -= nn;
|
||||
} while (kk < kspan);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (k != jf) {
|
||||
jf = k;
|
||||
s1 = pi2 / (double) k;
|
||||
c1 = cos(s1);
|
||||
s1 = sin(s1);
|
||||
if (jf > max_factors){
|
||||
return -1;
|
||||
}
|
||||
Cos [jf - 1] = 1.0;
|
||||
Sin [jf - 1] = 0.0;
|
||||
j = 1;
|
||||
do {
|
||||
Cos [j - 1] = Cos [k - 1] * c1 + Sin [k - 1] * s1;
|
||||
Sin [j - 1] = Cos [k - 1] * s1 - Sin [k - 1] * c1;
|
||||
k--;
|
||||
Cos [k - 1] = Cos [j - 1];
|
||||
Sin [k - 1] = -Sin [j - 1];
|
||||
j++;
|
||||
} while (j < k);
|
||||
}
|
||||
do {
|
||||
do {
|
||||
k1 = kk;
|
||||
k2 = kk + ispan;
|
||||
ak = aa = Re [kk];
|
||||
bk = bb = Im [kk];
|
||||
j = 1;
|
||||
k1 += kspan;
|
||||
do {
|
||||
k2 -= kspan;
|
||||
j++;
|
||||
Rtmp [j - 1] = Re [k1] + Re [k2];
|
||||
ak += Rtmp [j - 1];
|
||||
Itmp [j - 1] = Im [k1] + Im [k2];
|
||||
bk += Itmp [j - 1];
|
||||
j++;
|
||||
Rtmp [j - 1] = Re [k1] - Re [k2];
|
||||
Itmp [j - 1] = Im [k1] - Im [k2];
|
||||
k1 += kspan;
|
||||
} while (k1 < k2);
|
||||
Re [kk] = ak;
|
||||
Im [kk] = bk;
|
||||
k1 = kk;
|
||||
k2 = kk + ispan;
|
||||
j = 1;
|
||||
do {
|
||||
k1 += kspan;
|
||||
k2 -= kspan;
|
||||
jj = j;
|
||||
ak = aa;
|
||||
bk = bb;
|
||||
aj = 0.0;
|
||||
bj = 0.0;
|
||||
k = 1;
|
||||
do {
|
||||
k++;
|
||||
ak += Rtmp [k - 1] * Cos [jj - 1];
|
||||
bk += Itmp [k - 1] * Cos [jj - 1];
|
||||
k++;
|
||||
aj += Rtmp [k - 1] * Sin [jj - 1];
|
||||
bj += Itmp [k - 1] * Sin [jj - 1];
|
||||
jj += j;
|
||||
if (jj > jf) {
|
||||
jj -= jf;
|
||||
}
|
||||
} while (k < jf);
|
||||
k = jf - j;
|
||||
Re [k1] = ak - bj;
|
||||
Im [k1] = bk + aj;
|
||||
Re [k2] = ak + bj;
|
||||
Im [k2] = bk - aj;
|
||||
j++;
|
||||
} while (j < k);
|
||||
kk += ispan;
|
||||
} while (kk < nn);
|
||||
kk -= nn;
|
||||
} while (kk < kspan);
|
||||
break;
|
||||
}
|
||||
|
||||
/* multiply by rotation factor (except for factors of 2 and 4) */
|
||||
if (ii == mfactor)
|
||||
goto Permute_Results_Label; /* exit infinite loop */
|
||||
kk = jc;
|
||||
do {
|
||||
c2 = 1.0 - cd;
|
||||
s1 = sd;
|
||||
do {
|
||||
c1 = c2;
|
||||
s2 = s1;
|
||||
kk += kspan;
|
||||
do {
|
||||
do {
|
||||
ak = Re [kk];
|
||||
Re [kk] = c2 * ak - s2 * Im [kk];
|
||||
Im [kk] = s2 * ak + c2 * Im [kk];
|
||||
kk += ispan;
|
||||
} while (kk < nt);
|
||||
ak = s1 * s2;
|
||||
s2 = s1 * c2 + c1 * s2;
|
||||
c2 = c1 * c2 - ak;
|
||||
kk = kk - nt + kspan;
|
||||
} while (kk < ispan);
|
||||
c2 = c1 - (cd * c1 + sd * s1);
|
||||
s1 += sd * c1 - cd * s1;
|
||||
c1 = 2.0 - (c2 * c2 + s1 * s1);
|
||||
s1 *= c1;
|
||||
c2 *= c1;
|
||||
kk = kk - ispan + jc;
|
||||
} while (kk < kspan);
|
||||
kk = kk - kspan + jc + inc;
|
||||
} while (kk < (jc + jc));
|
||||
break;
|
||||
#endif /* FFT_RADIX4 */
|
||||
}
|
||||
}
|
||||
|
||||
/* permute the results to normal order---done in two stages */
|
||||
/* permutation for square factors of n */
|
||||
Permute_Results_Label:
|
||||
fftstate->Perm [0] = ns;
|
||||
if (kt) {
|
||||
k = kt + kt + 1;
|
||||
if (mfactor < k)
|
||||
k--;
|
||||
j = 1;
|
||||
fftstate->Perm [k] = jc;
|
||||
do {
|
||||
fftstate->Perm [j] = fftstate->Perm [j - 1] / fftstate->factor [j - 1];
|
||||
fftstate->Perm [k - 1] = fftstate->Perm [k] * fftstate->factor [j - 1];
|
||||
j++;
|
||||
k--;
|
||||
} while (j < k);
|
||||
k3 = fftstate->Perm [k];
|
||||
kspan = fftstate->Perm [1];
|
||||
kk = jc;
|
||||
k2 = kspan;
|
||||
j = 1;
|
||||
if (nPass != nTotal) {
|
||||
/* permutation for multivariate transform */
|
||||
Permute_Multi_Label:
|
||||
do {
|
||||
do {
|
||||
k = kk + jc;
|
||||
do {
|
||||
/* swap Re [kk] <> Re [k2], Im [kk] <> Im [k2] */
|
||||
ak = Re [kk]; Re [kk] = Re [k2]; Re [k2] = ak;
|
||||
bk = Im [kk]; Im [kk] = Im [k2]; Im [k2] = bk;
|
||||
kk += inc;
|
||||
k2 += inc;
|
||||
} while (kk < (k-1));
|
||||
kk += ns - jc;
|
||||
k2 += ns - jc;
|
||||
} while (kk < (nt-1));
|
||||
k2 = k2 - nt + kspan;
|
||||
kk = kk - nt + jc;
|
||||
} while (k2 < (ns-1));
|
||||
do {
|
||||
do {
|
||||
k2 -= fftstate->Perm [j - 1];
|
||||
j++;
|
||||
k2 = fftstate->Perm [j] + k2;
|
||||
} while (k2 > fftstate->Perm [j - 1]);
|
||||
j = 1;
|
||||
do {
|
||||
if (kk < (k2-1))
|
||||
goto Permute_Multi_Label;
|
||||
kk += jc;
|
||||
k2 += kspan;
|
||||
} while (k2 < (ns-1));
|
||||
} while (kk < (ns-1));
|
||||
} else {
|
||||
/* permutation for single-variate transform (optional code) */
|
||||
Permute_Single_Label:
|
||||
do {
|
||||
/* swap Re [kk] <> Re [k2], Im [kk] <> Im [k2] */
|
||||
ak = Re [kk]; Re [kk] = Re [k2]; Re [k2] = ak;
|
||||
bk = Im [kk]; Im [kk] = Im [k2]; Im [k2] = bk;
|
||||
kk += inc;
|
||||
k2 += kspan;
|
||||
} while (k2 < (ns-1));
|
||||
do {
|
||||
do {
|
||||
k2 -= fftstate->Perm [j - 1];
|
||||
j++;
|
||||
k2 = fftstate->Perm [j] + k2;
|
||||
} while (k2 >= fftstate->Perm [j - 1]);
|
||||
j = 1;
|
||||
do {
|
||||
if (kk < k2)
|
||||
goto Permute_Single_Label;
|
||||
kk += inc;
|
||||
k2 += kspan;
|
||||
} while (k2 < (ns-1));
|
||||
} while (kk < (ns-1));
|
||||
}
|
||||
jc = k3;
|
||||
}
|
||||
|
||||
if ((kt << 1) + 1 >= mfactor)
|
||||
return 0;
|
||||
ispan = fftstate->Perm [kt];
|
||||
/* permutation for square-free factors of n */
|
||||
j = mfactor - kt;
|
||||
fftstate->factor [j] = 1;
|
||||
do {
|
||||
fftstate->factor [j - 1] *= fftstate->factor [j];
|
||||
j--;
|
||||
} while (j != kt);
|
||||
kt++;
|
||||
nn = fftstate->factor [kt - 1] - 1;
|
||||
if (nn > (int) max_perm) {
|
||||
return -1;
|
||||
}
|
||||
j = jj = 0;
|
||||
for (;;) {
|
||||
k = kt + 1;
|
||||
k2 = fftstate->factor [kt - 1];
|
||||
kk = fftstate->factor [k - 1];
|
||||
j++;
|
||||
if (j > nn)
|
||||
break; /* exit infinite loop */
|
||||
jj += kk;
|
||||
while (jj >= k2) {
|
||||
jj -= k2;
|
||||
k2 = kk;
|
||||
k++;
|
||||
kk = fftstate->factor [k - 1];
|
||||
jj += kk;
|
||||
}
|
||||
fftstate->Perm [j - 1] = jj;
|
||||
}
|
||||
/* determine the permutation cycles of length greater than 1 */
|
||||
j = 0;
|
||||
for (;;) {
|
||||
do {
|
||||
j++;
|
||||
kk = fftstate->Perm [j - 1];
|
||||
} while (kk < 0);
|
||||
if (kk != j) {
|
||||
do {
|
||||
k = kk;
|
||||
kk = fftstate->Perm [k - 1];
|
||||
fftstate->Perm [k - 1] = -kk;
|
||||
} while (kk != j);
|
||||
k3 = kk;
|
||||
} else {
|
||||
fftstate->Perm [j - 1] = -j;
|
||||
if (j == nn)
|
||||
break; /* exit infinite loop */
|
||||
}
|
||||
}
|
||||
max_factors *= inc;
|
||||
/* reorder a and b, following the permutation cycles */
|
||||
for (;;) {
|
||||
j = k3 + 1;
|
||||
nt -= ispan;
|
||||
ii = nt - inc + 1;
|
||||
if (nt < 0)
|
||||
break; /* exit infinite loop */
|
||||
do {
|
||||
do {
|
||||
j--;
|
||||
} while (fftstate->Perm [j - 1] < 0);
|
||||
jj = jc;
|
||||
do {
|
||||
kspan = jj;
|
||||
if (jj > max_factors) {
|
||||
kspan = max_factors;
|
||||
}
|
||||
jj -= kspan;
|
||||
k = fftstate->Perm [j - 1];
|
||||
kk = jc * k + ii + jj;
|
||||
k1 = kk + kspan - 1;
|
||||
k2 = 0;
|
||||
do {
|
||||
k2++;
|
||||
Rtmp [k2 - 1] = Re [k1];
|
||||
Itmp [k2 - 1] = Im [k1];
|
||||
k1 -= inc;
|
||||
} while (k1 != (kk-1));
|
||||
do {
|
||||
k1 = kk + kspan - 1;
|
||||
k2 = k1 - jc * (k + fftstate->Perm [k - 1]);
|
||||
k = -fftstate->Perm [k - 1];
|
||||
do {
|
||||
Re [k1] = Re [k2];
|
||||
Im [k1] = Im [k2];
|
||||
k1 -= inc;
|
||||
k2 -= inc;
|
||||
} while (k1 != (kk-1));
|
||||
kk = k2 + 1;
|
||||
} while (k != j);
|
||||
k1 = kk + kspan - 1;
|
||||
k2 = 0;
|
||||
do {
|
||||
k2++;
|
||||
Re [k1] = Rtmp [k2 - 1];
|
||||
Im [k1] = Itmp [k2 - 1];
|
||||
k1 -= inc;
|
||||
} while (k1 != (kk-1));
|
||||
} while (jj);
|
||||
} while (j != 1);
|
||||
}
|
||||
return 0; /* exit point here */
|
||||
}
|
||||
/* ---------------------- end-of-file (c source) ---------------------- */
|
||||
|
58
VocieProcess/modules/third_party/fft/fft.h
vendored
Normal file
58
VocieProcess/modules/third_party/fft/fft.h
vendored
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the ../../../LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*--------------------------------*-C-*---------------------------------*
|
||||
* File:
|
||||
* fftn.h
|
||||
* ---------------------------------------------------------------------*
|
||||
* Re[]: real value array
|
||||
* Im[]: imaginary value array
|
||||
* nTotal: total number of complex values
|
||||
* nPass: number of elements involved in this pass of transform
|
||||
* nSpan: nspan/nPass = number of bytes to increment pointer
|
||||
* in Re[] and Im[]
|
||||
* isign: exponent: +1 = forward -1 = reverse
|
||||
* scaling: normalizing constant by which the final result is *divided*
|
||||
* scaling == -1, normalize by total dimension of the transform
|
||||
* scaling < -1, normalize by the square-root of the total dimension
|
||||
*
|
||||
* ----------------------------------------------------------------------
|
||||
* See the comments in the code for correct usage!
|
||||
*/
|
||||
|
||||
#ifndef MODULES_THIRD_PARTY_FFT_FFT_H_
|
||||
#define MODULES_THIRD_PARTY_FFT_FFT_H_
|
||||
|
||||
#define FFT_MAXFFTSIZE 2048
|
||||
#define FFT_NFACTOR 11
|
||||
|
||||
typedef struct {
|
||||
unsigned int SpaceAlloced;
|
||||
unsigned int MaxPermAlloced;
|
||||
double Tmp0[FFT_MAXFFTSIZE];
|
||||
double Tmp1[FFT_MAXFFTSIZE];
|
||||
double Tmp2[FFT_MAXFFTSIZE];
|
||||
double Tmp3[FFT_MAXFFTSIZE];
|
||||
int Perm[FFT_MAXFFTSIZE];
|
||||
int factor[FFT_NFACTOR];
|
||||
|
||||
} FFTstr;
|
||||
|
||||
/* double precision routine */
|
||||
|
||||
int WebRtcIsac_Fftns(unsigned int ndim,
|
||||
const int dims[],
|
||||
double Re[],
|
||||
double Im[],
|
||||
int isign,
|
||||
double scaling,
|
||||
FFTstr* fftstate);
|
||||
|
||||
#endif /* MODULES_THIRD_PARTY_FFT_FFT_H_ */
|
Reference in New Issue
Block a user