add vad code.

This commit is contained in:
luocai
2024-09-06 18:26:45 +08:00
parent 35bf68338f
commit 2bed1dacf2
93 changed files with 12362 additions and 2 deletions

View File

@ -0,0 +1,24 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
#include <stdint.h>
typedef struct {
int in_use;
int32_t send_bw_avg;
int32_t send_max_delay_avg;
int16_t bottleneck_idx;
int16_t jitter_info;
} IsacBandwidthInfo;
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_

View File

@ -0,0 +1,195 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <memory.h>
#include <string.h>
#ifdef WEBRTC_ANDROID
#include <stdlib.h>
#endif
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
static void WebRtcIsac_AllPoleFilter(double* InOut,
double* Coef,
size_t lengthInOut,
int orderCoef) {
/* the state of filter is assumed to be in InOut[-1] to InOut[-orderCoef] */
double scal;
double sum;
size_t n;
int k;
//if (fabs(Coef[0]-1.0)<0.001) {
if ( (Coef[0] > 0.9999) && (Coef[0] < 1.0001) )
{
for(n = 0; n < lengthInOut; n++)
{
sum = Coef[1] * InOut[-1];
for(k = 2; k <= orderCoef; k++){
sum += Coef[k] * InOut[-k];
}
*InOut++ -= sum;
}
}
else
{
scal = 1.0 / Coef[0];
for(n=0;n<lengthInOut;n++)
{
*InOut *= scal;
for(k=1;k<=orderCoef;k++){
*InOut -= scal*Coef[k]*InOut[-k];
}
InOut++;
}
}
}
static void WebRtcIsac_AllZeroFilter(double* In,
double* Coef,
size_t lengthInOut,
int orderCoef,
double* Out) {
/* the state of filter is assumed to be in In[-1] to In[-orderCoef] */
size_t n;
int k;
double tmp;
for(n = 0; n < lengthInOut; n++)
{
tmp = In[0] * Coef[0];
for(k = 1; k <= orderCoef; k++){
tmp += Coef[k] * In[-k];
}
*Out++ = tmp;
In++;
}
}
static void WebRtcIsac_ZeroPoleFilter(double* In,
double* ZeroCoef,
double* PoleCoef,
size_t lengthInOut,
int orderCoef,
double* Out) {
/* the state of the zero section is assumed to be in In[-1] to In[-orderCoef] */
/* the state of the pole section is assumed to be in Out[-1] to Out[-orderCoef] */
WebRtcIsac_AllZeroFilter(In,ZeroCoef,lengthInOut,orderCoef,Out);
WebRtcIsac_AllPoleFilter(Out,PoleCoef,lengthInOut,orderCoef);
}
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order) {
size_t lag, n;
double sum, prod;
const double *x_lag;
for (lag = 0; lag <= order; lag++)
{
sum = 0.0f;
x_lag = &x[lag];
prod = x[0] * x_lag[0];
for (n = 1; n < N - lag; n++) {
sum += prod;
prod = x[n] * x_lag[n];
}
sum += prod;
r[lag] = sum;
}
}
static void WebRtcIsac_BwExpand(double* out,
double* in,
double coef,
size_t length) {
size_t i;
double chirp;
chirp = coef;
out[0] = in[0];
for (i = 1; i < length; i++) {
out[i] = chirp * in[i];
chirp *= coef;
}
}
void WebRtcIsac_WeightingFilter(const double* in,
double* weiout,
double* whiout,
WeightFiltstr* wfdata) {
double tmpbuffer[PITCH_FRAME_LEN + PITCH_WLPCBUFLEN];
double corr[PITCH_WLPCORDER+1], rc[PITCH_WLPCORDER+1];
double apol[PITCH_WLPCORDER+1], apolr[PITCH_WLPCORDER+1];
double rho=0.9, *inp, *dp, *dp2;
double whoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER];
double weoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER];
double *weo, *who, opol[PITCH_WLPCORDER+1], ext[PITCH_WLPCWINLEN];
int k, n, endpos, start;
/* Set up buffer and states */
memcpy(tmpbuffer, wfdata->buffer, sizeof(double) * PITCH_WLPCBUFLEN);
memcpy(tmpbuffer+PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN);
memcpy(wfdata->buffer, tmpbuffer+PITCH_FRAME_LEN, sizeof(double) * PITCH_WLPCBUFLEN);
dp=weoutbuf;
dp2=whoutbuf;
for (k=0;k<PITCH_WLPCORDER;k++) {
*dp++ = wfdata->weostate[k];
*dp2++ = wfdata->whostate[k];
opol[k]=0.0;
}
opol[0]=1.0;
opol[PITCH_WLPCORDER]=0.0;
weo=dp;
who=dp2;
endpos=PITCH_WLPCBUFLEN + PITCH_SUBFRAME_LEN;
inp=tmpbuffer + PITCH_WLPCBUFLEN;
for (n=0; n<PITCH_SUBFRAMES; n++) {
/* Windowing */
start=endpos-PITCH_WLPCWINLEN;
for (k=0; k<PITCH_WLPCWINLEN; k++) {
ext[k]=wfdata->window[k]*tmpbuffer[start+k];
}
/* Get LPC polynomial */
WebRtcIsac_AutoCorr(corr, ext, PITCH_WLPCWINLEN, PITCH_WLPCORDER);
corr[0]=1.01*corr[0]+1.0; /* White noise correction */
WebRtcIsac_LevDurb(apol, rc, corr, PITCH_WLPCORDER);
WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER+1);
/* Filtering */
WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, weo);
WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, who);
inp+=PITCH_SUBFRAME_LEN;
endpos+=PITCH_SUBFRAME_LEN;
weo+=PITCH_SUBFRAME_LEN;
who+=PITCH_SUBFRAME_LEN;
}
/* Export filter states */
for (k=0;k<PITCH_WLPCORDER;k++) {
wfdata->weostate[k]=weoutbuf[PITCH_FRAME_LEN+k];
wfdata->whostate[k]=whoutbuf[PITCH_FRAME_LEN+k];
}
/* Export output data */
memcpy(weiout, weoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN);
memcpy(whiout, whoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN);
}

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
#include <stddef.h>
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order);
void WebRtcIsac_WeightingFilter(const double* in,
double* weiout,
double* whiout,
WeightFiltstr* wfdata);
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_

View File

@ -0,0 +1,409 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
#include <math.h>
void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata) {
int k;
for (k = 0; k < PITCH_BUFFSIZE; k++) {
pitchfiltdata->ubuf[k] = 0.0;
}
pitchfiltdata->ystate[0] = 0.0;
for (k = 1; k < (PITCH_DAMPORDER); k++) {
pitchfiltdata->ystate[k] = 0.0;
}
pitchfiltdata->oldlagp[0] = 50.0;
pitchfiltdata->oldgainp[0] = 0.0;
}
static void WebRtcIsac_InitWeightingFilter(WeightFiltstr* wfdata) {
int k;
double t, dtmp, dtmp2, denum, denum2;
for (k = 0; k < PITCH_WLPCBUFLEN; k++)
wfdata->buffer[k] = 0.0;
for (k = 0; k < PITCH_WLPCORDER; k++) {
wfdata->istate[k] = 0.0;
wfdata->weostate[k] = 0.0;
wfdata->whostate[k] = 0.0;
}
/* next part should be in Matlab, writing to a global table */
t = 0.5;
denum = 1.0 / ((double)PITCH_WLPCWINLEN);
denum2 = denum * denum;
for (k = 0; k < PITCH_WLPCWINLEN; k++) {
dtmp = PITCH_WLPCASYM * t * denum + (1 - PITCH_WLPCASYM) * t * t * denum2;
dtmp *= 3.14159265;
dtmp2 = sin(dtmp);
wfdata->window[k] = dtmp2 * dtmp2;
t++;
}
}
void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* State) {
int k;
for (k = 0; k < PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
PITCH_FRAME_LEN / 2 + 2;
k++)
State->dec_buffer[k] = 0.0;
for (k = 0; k < 2 * ALLPASSSECTIONS + 1; k++)
State->decimator_state[k] = 0.0;
for (k = 0; k < 2; k++)
State->hp_state[k] = 0.0;
for (k = 0; k < QLOOKAHEAD; k++)
State->whitened_buf[k] = 0.0;
for (k = 0; k < QLOOKAHEAD; k++)
State->inbuf[k] = 0.0;
WebRtcIsac_InitPitchFilter(&(State->PFstr_wght));
WebRtcIsac_InitPitchFilter(&(State->PFstr));
WebRtcIsac_InitWeightingFilter(&(State->Wghtstr));
}
void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata) {
int k;
for (k = 0; k < QLOOKAHEAD; k++) {
prefiltdata->INLABUF1[k] = 0;
prefiltdata->INLABUF2[k] = 0;
prefiltdata->INLABUF1_float[k] = 0;
prefiltdata->INLABUF2_float[k] = 0;
}
for (k = 0; k < 2 * (QORDER - 1); k++) {
prefiltdata->INSTAT1[k] = 0;
prefiltdata->INSTAT2[k] = 0;
prefiltdata->INSTATLA1[k] = 0;
prefiltdata->INSTATLA2[k] = 0;
prefiltdata->INSTAT1_float[k] = 0;
prefiltdata->INSTAT2_float[k] = 0;
prefiltdata->INSTATLA1_float[k] = 0;
prefiltdata->INSTATLA2_float[k] = 0;
}
/* High pass filter states */
prefiltdata->HPstates[0] = 0.0;
prefiltdata->HPstates[1] = 0.0;
prefiltdata->HPstates_float[0] = 0.0f;
prefiltdata->HPstates_float[1] = 0.0f;
return;
}
double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order) {
const double LEVINSON_EPS = 1.0e-10;
double sum, alpha;
size_t m, m_h, i;
alpha = 0; // warning -DH
a[0] = 1.0;
if (r[0] < LEVINSON_EPS) { /* if r[0] <= 0, set LPC coeff. to zero */
for (i = 0; i < order; i++) {
k[i] = 0;
a[i + 1] = 0;
}
} else {
a[1] = k[0] = -r[1] / r[0];
alpha = r[0] + r[1] * k[0];
for (m = 1; m < order; m++) {
sum = r[m + 1];
for (i = 0; i < m; i++) {
sum += a[i + 1] * r[m - i];
}
k[m] = -sum / alpha;
alpha += k[m] * sum;
m_h = (m + 1) >> 1;
for (i = 0; i < m_h; i++) {
sum = a[i + 1] + k[m] * a[m - i];
a[m - i] += k[m] * a[i + 1];
a[i + 1] = sum;
}
a[m + 1] = k[m];
}
}
return alpha;
}
/* The upper channel all-pass filter factors */
const float WebRtcIsac_kUpperApFactorsFloat[2] = {0.03470000000000f,
0.38260000000000f};
/* The lower channel all-pass filter factors */
const float WebRtcIsac_kLowerApFactorsFloat[2] = {0.15440000000000f,
0.74400000000000f};
/* This function performs all-pass filtering--a series of first order all-pass
* sections are used to filter the input in a cascade manner.
* The input is overwritten!!
*/
void WebRtcIsac_AllPassFilter2Float(float* InOut,
const float* APSectionFactors,
int lengthInOut,
int NumberOfSections,
float* FilterState) {
int n, j;
float temp;
for (j = 0; j < NumberOfSections; j++) {
for (n = 0; n < lengthInOut; n++) {
temp = FilterState[j] + APSectionFactors[j] * InOut[n];
FilterState[j] = -APSectionFactors[j] * temp + InOut[n];
InOut[n] = temp;
}
}
}
/* The number of composite all-pass filter factors */
#define NUMBEROFCOMPOSITEAPSECTIONS 4
/* Function WebRtcIsac_SplitAndFilter
* This function creates low-pass and high-pass decimated versions of part of
the input signal, and part of the signal in the input 'lookahead buffer'.
INPUTS:
in: a length FRAMESAMPLES array of input samples
prefiltdata: input data structure containing the filterbank states
and lookahead samples from the previous encoding
iteration.
OUTPUTS:
LP: a FRAMESAMPLES_HALF array of low-pass filtered samples that
have been phase equalized. The first QLOOKAHEAD samples are
based on the samples in the two prefiltdata->INLABUFx arrays
each of length QLOOKAHEAD.
The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based
on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input
array in[].
HP: a FRAMESAMPLES_HALF array of high-pass filtered samples that
have been phase equalized. The first QLOOKAHEAD samples are
based on the samples in the two prefiltdata->INLABUFx arrays
each of length QLOOKAHEAD.
The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based
on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input
array in[].
LP_la: a FRAMESAMPLES_HALF array of low-pass filtered samples.
These samples are not phase equalized. They are computed
from the samples in the in[] array.
HP_la: a FRAMESAMPLES_HALF array of high-pass filtered samples
that are not phase equalized. They are computed from
the in[] vector.
prefiltdata: this input data structure's filterbank state and
lookahead sample buffers are updated for the next
encoding iteration.
*/
void WebRtcIsac_SplitAndFilterFloat(float* pin,
float* LP,
float* HP,
double* LP_la,
double* HP_la,
PreFiltBankstr* prefiltdata) {
int k, n;
float CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS];
float ForTransform_CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS];
float ForTransform_CompositeAPFilterState2[NUMBEROFCOMPOSITEAPSECTIONS];
float tempinoutvec[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
float tempin_ch1[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
float tempin_ch2[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
float in[FRAMESAMPLES];
float ftmp;
/* HPstcoeff_in = {a1, a2, b1 - b0 * a1, b2 - b0 * a2}; */
static const float kHpStCoefInFloat[4] = {
-1.94895953203325f, 0.94984516000000f, -0.05101826139794f,
0.05015484000000f};
/* The composite all-pass filter factors */
static const float WebRtcIsac_kCompositeApFactorsFloat[4] = {
0.03470000000000f, 0.15440000000000f, 0.38260000000000f,
0.74400000000000f};
// The matrix for transforming the backward composite state to upper channel
// state.
static const float WebRtcIsac_kTransform1Float[8] = {
-0.00158678506084f, 0.00127157815343f, -0.00104805672709f,
0.00084837248079f, 0.00134467983258f, -0.00107756549387f,
0.00088814793277f, -0.00071893072525f};
// The matrix for transforming the backward composite state to lower channel
// state.
static const float WebRtcIsac_kTransform2Float[8] = {
-0.00170686041697f, 0.00136780109829f, -0.00112736532350f,
0.00091257055385f, 0.00103094281812f, -0.00082615076557f,
0.00068092756088f, -0.00055119165484f};
/* High pass filter */
for (k = 0; k < FRAMESAMPLES; k++) {
in[k] = pin[k] + kHpStCoefInFloat[2] * prefiltdata->HPstates_float[0] +
kHpStCoefInFloat[3] * prefiltdata->HPstates_float[1];
ftmp = pin[k] - kHpStCoefInFloat[0] * prefiltdata->HPstates_float[0] -
kHpStCoefInFloat[1] * prefiltdata->HPstates_float[1];
prefiltdata->HPstates_float[1] = prefiltdata->HPstates_float[0];
prefiltdata->HPstates_float[0] = ftmp;
}
/* First Channel */
/*initial state of composite filter is zero */
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
CompositeAPFilterState[k] = 0.0;
}
/* put every other sample of input into a temporary vector in reverse
* (backward) order*/
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempinoutvec[k] = in[FRAMESAMPLES - 1 - 2 * k];
}
/* now all-pass filter the backwards vector. Output values overwrite the
* input vector. */
WebRtcIsac_AllPassFilter2Float(
tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF,
NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
/* save the backwards filtered output for later forward filtering,
but write it in forward order*/
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempin_ch1[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k];
}
/* save the backwards filter state becaue it will be transformed
later into a forward state */
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
ForTransform_CompositeAPFilterState[k] = CompositeAPFilterState[k];
}
/* now backwards filter the samples in the lookahead buffer. The samples were
placed there in the encoding of the previous frame. The output samples
overwrite the input samples */
WebRtcIsac_AllPassFilter2Float(
prefiltdata->INLABUF1_float, WebRtcIsac_kCompositeApFactorsFloat,
QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
/* save the output, but write it in forward order */
/* write the lookahead samples for the next encoding iteration. Every other
sample at the end of the input frame is written in reverse order for the
lookahead length. Exported in the prefiltdata structure. */
for (k = 0; k < QLOOKAHEAD; k++) {
tempin_ch1[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF1_float[k];
prefiltdata->INLABUF1_float[k] = in[FRAMESAMPLES - 1 - 2 * k];
}
/* Second Channel. This is exactly like the first channel, except that the
even samples are now filtered instead (lower channel). */
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
CompositeAPFilterState[k] = 0.0;
}
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempinoutvec[k] = in[FRAMESAMPLES - 2 - 2 * k];
}
WebRtcIsac_AllPassFilter2Float(
tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF,
NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempin_ch2[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k];
}
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
ForTransform_CompositeAPFilterState2[k] = CompositeAPFilterState[k];
}
WebRtcIsac_AllPassFilter2Float(
prefiltdata->INLABUF2_float, WebRtcIsac_kCompositeApFactorsFloat,
QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
for (k = 0; k < QLOOKAHEAD; k++) {
tempin_ch2[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF2_float[k];
prefiltdata->INLABUF2_float[k] = in[FRAMESAMPLES - 2 - 2 * k];
}
/* Transform filter states from backward to forward */
/*At this point, each of the states of the backwards composite filters for the
two channels are transformed into forward filtering states for the
corresponding forward channel filters. Each channel's forward filtering
state from the previous
encoding iteration is added to the transformed state to get a proper forward
state */
/* So the existing NUMBEROFCOMPOSITEAPSECTIONS x 1 (4x1) state vector is
multiplied by a NUMBEROFCHANNELAPSECTIONSxNUMBEROFCOMPOSITEAPSECTIONS (2x4)
transform matrix to get the new state that is added to the previous 2x1
input state */
for (k = 0; k < NUMBEROFCHANNELAPSECTIONS; k++) { /* k is row variable */
for (n = 0; n < NUMBEROFCOMPOSITEAPSECTIONS;
n++) { /* n is column variable */
prefiltdata->INSTAT1_float[k] +=
ForTransform_CompositeAPFilterState[n] *
WebRtcIsac_kTransform1Float[k * NUMBEROFCHANNELAPSECTIONS + n];
prefiltdata->INSTAT2_float[k] +=
ForTransform_CompositeAPFilterState2[n] *
WebRtcIsac_kTransform2Float[k * NUMBEROFCHANNELAPSECTIONS + n];
}
}
/*obtain polyphase components by forward all-pass filtering through each
* channel */
/* the backward filtered samples are now forward filtered with the
* corresponding channel filters */
/* The all pass filtering automatically updates the filter states which are
exported in the prefiltdata structure */
WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat,
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
prefiltdata->INSTAT1_float);
WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat,
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
prefiltdata->INSTAT2_float);
/* Now Construct low-pass and high-pass signals as combinations of polyphase
* components */
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
LP[k] = 0.5f * (tempin_ch1[k] + tempin_ch2[k]); /* low pass signal*/
HP[k] = 0.5f * (tempin_ch1[k] - tempin_ch2[k]); /* high pass signal*/
}
/* Lookahead LP and HP signals */
/* now create low pass and high pass signals of the input vector. However, no
backwards filtering is performed, and hence no phase equalization is
involved. Also, the input contains some samples that are lookahead samples.
The high pass and low pass signals that are created are used outside this
function for analysis (not encoding) purposes */
/* set up input */
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
tempin_ch1[k] = in[2 * k + 1];
tempin_ch2[k] = in[2 * k];
}
/* the input filter states are passed in and updated by the all-pass filtering
routine and exported in the prefiltdata structure*/
WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat,
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
prefiltdata->INSTATLA1_float);
WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat,
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
prefiltdata->INSTATLA2_float);
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
LP_la[k] = (float)(0.5f * (tempin_ch1[k] + tempin_ch2[k])); /*low pass */
HP_la[k] = (double)(0.5f * (tempin_ch1[k] - tempin_ch2[k])); /* high pass */
}
}

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
#include <stddef.h>
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata);
void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* state);
void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata);
double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order);
/* The number of all-pass filter factors in an upper or lower channel*/
#define NUMBEROFCHANNELAPSECTIONS 2
/* The upper channel all-pass filter factors */
extern const float WebRtcIsac_kUpperApFactorsFloat[2];
/* The lower channel all-pass filter factors */
extern const float WebRtcIsac_kLowerApFactorsFloat[2];
void WebRtcIsac_AllPassFilter2Float(float* InOut,
const float* APSectionFactors,
int lengthInOut,
int NumberOfSections,
float* FilterState);
void WebRtcIsac_SplitAndFilterFloat(float* in,
float* LP,
float* HP,
double* LP_la,
double* HP_la,
PreFiltBankstr* prefiltdata);
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
#include <math.h>
#include "rtc_base/system/arch.h"
#if defined(WEBRTC_POSIX)
#define WebRtcIsac_lrint lrint
#elif (defined(WEBRTC_ARCH_X86) && defined(WIN32))
static __inline long int WebRtcIsac_lrint(double x_dbl) {
long int x_int;
__asm {
fld x_dbl
fistp x_int
}
;
return x_int;
}
#else // Do a slow but correct implementation of lrint
static __inline long int WebRtcIsac_lrint(double x_dbl) {
long int x_int;
x_int = (long int)floor(x_dbl + 0.499999999999);
return x_int;
}
#endif
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_

View File

@ -0,0 +1,695 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include <math.h>
#include <memory.h>
#include <string.h>
#ifdef WEBRTC_ANDROID
#include <stdlib.h>
#endif
#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h"
#include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h"
#include "rtc_base/system/ignore_warnings.h"
static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160,
0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640};
/* interpolation filter */
__inline static void IntrepolFilter(double *data_ptr, double *intrp)
{
*intrp = kInterpolWin[0] * data_ptr[-3];
*intrp += kInterpolWin[1] * data_ptr[-2];
*intrp += kInterpolWin[2] * data_ptr[-1];
*intrp += kInterpolWin[3] * data_ptr[0];
*intrp += kInterpolWin[4] * data_ptr[1];
*intrp += kInterpolWin[5] * data_ptr[2];
*intrp += kInterpolWin[6] * data_ptr[3];
*intrp += kInterpolWin[7] * data_ptr[4];
}
/* 2D parabolic interpolation */
/* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */
__inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val)
{
double c, b[2], A[2][2];
double t1, t2, d;
double delta1, delta2;
// double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}};
// should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0
c = T[1][1];
b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]);
b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]);
A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]);
t1 = 0.5 * (T[0][0] + T[2][2]) - c;
t2 = 0.5 * (T[2][0] + T[0][2]) - c;
d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2;
A[0][0] = -t1 - 0.5 * d;
A[1][1] = -t2 - 0.5 * d;
/* deal with singularities or ill-conditioned cases */
if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) {
*peak_val = T[1][1];
return;
}
/* Cholesky decomposition: replace A by upper-triangular factor */
A[0][0] = sqrt(A[0][0]);
A[0][1] = A[0][1] / A[0][0];
A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]);
/* compute [x; y] = -0.5 * inv(A) * b */
t1 = b[0] / A[0][0];
t2 = (b[1] - t1 * A[0][1]) / A[1][1];
delta2 = t2 / A[1][1];
delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0];
delta2 *= 0.5;
/* limit norm */
t1 = delta1 * delta1 + delta2 * delta2;
if (t1 > 1.0) {
delta1 /= t1;
delta2 /= t1;
}
*peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c;
*x += delta1;
*y += delta2;
}
static void PCorr(const double *in, double *outcorr)
{
double sum, ysum, prod;
const double *x, *inptr;
int k, n;
//ysum = 1e-6; /* use this with float (i.s.o. double)! */
ysum = 1e-13;
sum = 0.0;
x = in + PITCH_MAX_LAG/2 + 2;
for (n = 0; n < PITCH_CORR_LEN2; n++) {
ysum += in[n] * in[n];
sum += x[n] * in[n];
}
outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */
*outcorr = sum / sqrt(ysum);
for (k = 1; k < PITCH_LAG_SPAN2; k++) {
ysum -= in[k-1] * in[k-1];
ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1];
sum = 0.0;
inptr = &in[k];
prod = x[0] * inptr[0];
for (n = 1; n < PITCH_CORR_LEN2; n++) {
sum += prod;
prod = x[n] * inptr[n];
}
sum += prod;
outcorr--;
*outcorr = sum / sqrt(ysum);
}
}
static void WebRtcIsac_AllpassFilterForDec(double* InOut,
const double* APSectionFactors,
size_t lengthInOut,
double* FilterState) {
// This performs all-pass filtering--a series of first order all-pass
// sections are used to filter the input in a cascade manner.
size_t n, j;
double temp;
for (j = 0; j < ALLPASSSECTIONS; j++) {
for (n = 0; n < lengthInOut; n += 2) {
temp = InOut[n]; // store input
InOut[n] = FilterState[j] + APSectionFactors[j] * temp;
FilterState[j] = -APSectionFactors[j] * InOut[n] + temp;
}
}
}
static void WebRtcIsac_DecimateAllpass(
const double* in,
double* state_in, // array of size: 2*ALLPASSSECTIONS+1
size_t N, // number of input samples
double* out) { // array of size N/2
static const double APupper[ALLPASSSECTIONS] = {0.0347, 0.3826};
static const double APlower[ALLPASSSECTIONS] = {0.1544, 0.744};
size_t n;
double data_vec[PITCH_FRAME_LEN];
/* copy input */
memcpy(data_vec + 1, in, sizeof(double) * (N - 1));
data_vec[0] = state_in[2 * ALLPASSSECTIONS]; // the z^(-1) state
state_in[2 * ALLPASSSECTIONS] = in[N - 1];
WebRtcIsac_AllpassFilterForDec(data_vec + 1, APupper, N, state_in);
WebRtcIsac_AllpassFilterForDec(data_vec, APlower, N,
state_in + ALLPASSSECTIONS);
for (n = 0; n < N / 2; n++)
out[n] = data_vec[2 * n] + data_vec[2 * n + 1];
}
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
static void WebRtcIsac_InitializePitch(const double* in,
const double old_lag,
const double old_gain,
PitchAnalysisStruct* State,
double* lags) {
double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2];
double ratio, log_lag, gain_bias;
double bias;
double corrvec1[PITCH_LAG_SPAN2];
double corrvec2[PITCH_LAG_SPAN2];
int m, k;
// Allocating 10 extra entries at the begining of the CorrSurf
double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)];
double* CorrSurf[2*PITCH_BW+3];
double *CorrSurfPtr1, *CorrSurfPtr2;
double LagWin[3] = {0.2, 0.5, 0.98};
int ind1, ind2, peaks_ind, peak, max_ind;
int peaks[PITCH_MAX_NUM_PEAKS];
double adj, gain_tmp;
double corr, corr_max;
double intrp_a, intrp_b, intrp_c, intrp_d;
double peak_vals[PITCH_MAX_NUM_PEAKS];
double lags1[PITCH_MAX_NUM_PEAKS];
double lags2[PITCH_MAX_NUM_PEAKS];
double T[3][3];
int row;
for(k = 0; k < 2*PITCH_BW+3; k++)
{
CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)];
}
/* reset CorrSurf matrix */
memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4)));
//warnings -DH
max_ind = 0;
peak = 0;
/* copy old values from state buffer */
memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
/* decimation; put result after the old values */
WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN,
&buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]);
/* low-pass filtering */
for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++)
buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2];
/* copy end part back into state buffer */
memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
/* compute correlation for first and second half of the frame */
PCorr(buf_dec, corrvec1);
PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2);
/* bias towards pitch lag of previous frame */
log_lag = log(0.5 * old_lag);
gain_bias = 4.0 * old_gain * old_gain;
if (gain_bias > 0.8) gain_bias = 0.8;
for (k = 0; k < PITCH_LAG_SPAN2; k++)
{
ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag;
bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio);
corrvec1[k] *= bias;
}
/* taper correlation functions */
for (k = 0; k < 3; k++) {
gain_tmp = LagWin[k];
corrvec1[k] *= gain_tmp;
corrvec2[k] *= gain_tmp;
corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
}
corr_max = 0.0;
/* fill middle row of correlation surface */
ind1 = 0;
ind2 = 0;
CorrSurfPtr1 = &CorrSurf[PITCH_BW][2];
for (k = 0; k < PITCH_LAG_SPAN2; k++) {
corr = corrvec1[ind1++] + corrvec2[ind2++];
CorrSurfPtr1[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
}
}
/* fill first and last rows of correlation surface */
ind1 = 0;
ind2 = PITCH_BW;
CorrSurfPtr1 = &CorrSurf[0][2];
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2];
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) {
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
CorrSurfPtr1[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
}
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
CorrSurfPtr2[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
}
}
/* fill second and next to last rows of correlation surface */
ind1 = 0;
ind2 = PITCH_BW-1;
CorrSurfPtr1 = &CorrSurf[1][2];
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1];
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) {
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
CorrSurfPtr1[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
}
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
CorrSurfPtr2[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
}
}
/* fill remainder of correlation surface */
for (m = 2; m < PITCH_BW; m++) {
ind1 = 0;
ind2 = PITCH_BW - m; /* always larger than ind1 */
CorrSurfPtr1 = &CorrSurf[m][2];
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m];
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) {
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
CorrSurfPtr1[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
}
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
CorrSurfPtr2[k] = corr;
if (corr > corr_max) {
corr_max = corr; /* update maximum */
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
}
}
}
/* threshold value to qualify as a peak */
corr_max *= 0.6;
peaks_ind = 0;
/* find peaks */
for (m = 1; m < PITCH_BW+1; m++) {
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
CorrSurfPtr1 = &CorrSurf[m][2];
for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) {
corr = CorrSurfPtr1[k];
if (corr > corr_max) {
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
/* found a peak; store index into matrix */
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
}
}
}
}
}
for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) {
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
CorrSurfPtr1 = &CorrSurf[m][2];
for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) {
corr = CorrSurfPtr1[k];
if (corr > corr_max) {
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
/* found a peak; store index into matrix */
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
}
}
}
}
}
if (peaks_ind > 0) {
/* examine each peak */
CorrSurfPtr1 = &CorrSurf[0][0];
for (k = 0; k < peaks_ind; k++) {
peak = peaks[k];
/* compute four interpolated values around current peak */
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a);
IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b);
IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c);
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d);
/* determine maximum of the interpolated values */
corr = CorrSurfPtr1[peak];
corr_max = intrp_a;
if (intrp_b > corr_max) corr_max = intrp_b;
if (intrp_c > corr_max) corr_max = intrp_c;
if (intrp_d > corr_max) corr_max = intrp_d;
/* determine where the peak sits and fill a 3x3 matrix around it */
row = peak / (PITCH_LAG_SPAN2+4);
lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
lags2[k] = (double) (lags1[k] + PITCH_BW - row);
if ( corr > corr_max ) {
T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
T[1][1] = corr;
T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
T[1][0] = intrp_a;
T[0][1] = intrp_b;
T[2][1] = intrp_c;
T[1][2] = intrp_d;
} else {
if (intrp_a == corr_max) {
lags1[k] -= 0.5;
lags2[k] += 0.5;
IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]);
IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]);
T[1][1] = intrp_a;
T[0][2] = intrp_b;
T[2][2] = intrp_c;
T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)];
T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
T[1][2] = corr;
} else if (intrp_b == corr_max) {
lags1[k] -= 0.5;
lags2[k] -= 0.5;
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]);
T[2][0] = intrp_a;
T[1][1] = intrp_b;
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]);
T[2][2] = intrp_d;
T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
T[0][1] = CorrSurfPtr1[peak - 1];
T[2][1] = corr;
T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
} else if (intrp_c == corr_max) {
lags1[k] += 0.5;
lags2[k] += 0.5;
T[0][0] = intrp_a;
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]);
T[1][1] = intrp_c;
T[0][2] = intrp_d;
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]);
T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
T[0][1] = corr;
T[2][1] = CorrSurfPtr1[peak + 1];
T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
} else {
lags1[k] += 0.5;
lags2[k] -= 0.5;
T[0][0] = intrp_b;
T[2][0] = intrp_c;
T[1][1] = intrp_d;
IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]);
IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]);
T[1][0] = corr;
T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)];
}
}
/* 2D parabolic interpolation gives more accurate lags and peak value */
Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]);
}
/* determine the highest peak, after applying a bias towards short lags */
corr_max = 0.0;
for (k = 0; k < peaks_ind; k++) {
corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k]));
if (corr > corr_max) {
corr_max = corr;
peak = k;
}
}
lags1[peak] *= 2.0;
lags2[peak] *= 2.0;
if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG;
if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG;
if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG;
if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG;
/* store lags of highest peak in output array */
lags[0] = lags1[peak];
lags[1] = lags1[peak];
lags[2] = lags2[peak];
lags[3] = lags2[peak];
}
else
{
row = max_ind / (PITCH_LAG_SPAN2+4);
lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
lags2[0] = (double) (lags1[0] + PITCH_BW - row);
if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG;
if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG;
if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG;
if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG;
/* store lags of highest peak in output array */
lags[0] = lags1[0];
lags[1] = lags1[0];
lags[2] = lags2[0];
lags[3] = lags2[0];
}
}
RTC_POP_IGNORING_WFRAME_LARGER_THAN()
/* create weighting matrix by orthogonalizing a basis of polynomials of increasing order
* t = (0:4)';
* A = [t.^0, t.^1, t.^2, t.^3, t.^4];
* [Q, dummy] = qr(A);
* P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
static const double kWeight[5][5] = {
{ 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286},
{-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857},
{-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286},
{ 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143},
{ 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286}
};
/* second order high-pass filter */
static void WebRtcIsac_Highpass(const double* in,
double* out,
double* state,
size_t N) {
/* create high-pass filter ocefficients
* z = 0.998 * exp(j*2*pi*35/8000);
* p = 0.94 * exp(j*2*pi*140/8000);
* HP_b = [1, -2*real(z), abs(z)^2];
* HP_a = [1, -2*real(p), abs(p)^2]; */
static const double a_coef[2] = { 1.86864659625574, -0.88360000000000};
static const double b_coef[2] = {-1.99524591718270, 0.99600400000000};
size_t k;
for (k=0; k<N; k++) {
*out = *in + state[1];
state[1] = state[0] + b_coef[0] * *in + a_coef[0] * *out;
state[0] = b_coef[1] * *in++ + a_coef[1] * *out++;
}
}
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */
double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
PitchAnalysisStruct *State,
double *lags,
double *gains)
{
double HPin[PITCH_FRAME_LEN];
double Weighted[PITCH_FRAME_LEN];
double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD];
double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD];
double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead
double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD];
double old_lag, old_gain;
double nrg_wht, tmp;
double Wnrg, Wfluct, Wgain;
double H[4][4];
double grad[4];
double dG[4];
int k, m, n, iter;
/* high pass filtering using second order pole-zero filter */
WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN);
/* copy from state into buffer */
memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD);
/* compute weighted and whitened signals */
WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr));
/* copy from buffer into state */
memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD);
old_lag = State->PFstr_wght.oldlagp[0];
old_gain = State->PFstr_wght.oldgainp[0];
/* inital pitch estimate */
WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags);
/* Iterative optimization of lags - to be done */
/* compute energy of whitened signal */
nrg_wht = 0.0;
for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++)
nrg_wht += Whitened[k] * Whitened[k];
/* Iterative optimization of gains */
/* set weights for energy, gain fluctiation, and spectral gain penalty functions */
Wnrg = 1.0 / nrg_wht;
Wgain = 0.005;
Wfluct = 3.0;
/* set initial gains */
for (k = 0; k < 4; k++)
gains[k] = PITCH_MAX_GAIN_06;
/* two iterations should be enough */
for (iter = 0; iter < 2; iter++) {
/* compute Jacobian of pre-filter output towards gains */
WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains);
/* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */
for (k = 0; k < 4; k++) {
tmp = 0.0;
for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
tmp += out_G[n] * out_dG[k][n];
grad[k] = tmp * Wnrg;
}
for (k = 0; k < 4; k++) {
for (m = 0; m <= k; m++) {
tmp = 0.0;
for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
tmp += out_dG[m][n] * out_dG[k][n];
H[k][m] = tmp * Wnrg;
}
}
/* add gradient and Hessian (lower triangle) for dampening fast gain changes */
for (k = 0; k < 4; k++) {
tmp = kWeight[k+1][0] * old_gain;
for (m = 0; m < 4; m++)
tmp += kWeight[k+1][m+1] * gains[m];
grad[k] += tmp * Wfluct;
}
for (k = 0; k < 4; k++) {
for (m = 0; m <= k; m++) {
H[k][m] += kWeight[k+1][m+1] * Wfluct;
}
}
/* add gradient and Hessian for dampening gain */
for (k = 0; k < 3; k++) {
tmp = 1.0 / (1 - gains[k]);
grad[k] += tmp * tmp * Wgain;
H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain);
}
tmp = 1.0 / (1 - gains[3]);
grad[3] += 1.33 * (tmp * tmp * Wgain);
H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain);
/* compute Cholesky factorization of Hessian
* by overwritting the upper triangle; scale factors on diagonal
* (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */
H[0][1] = H[1][0] / H[0][0];
H[0][2] = H[2][0] / H[0][0];
H[0][3] = H[3][0] / H[0][0];
H[1][1] -= H[0][0] * H[0][1] * H[0][1];
H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1];
H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1];
H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2];
H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2];
H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3];
/* Compute update as delta_gains = -inv(H) * grad */
/* copy and negate */
for (k = 0; k < 4; k++)
dG[k] = -grad[k];
/* back substitution */
dG[1] -= dG[0] * H[0][1];
dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2];
dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3];
/* scale */
for (k = 0; k < 4; k++)
dG[k] /= H[k][k];
/* back substitution */
dG[2] -= dG[3] * H[2][3];
dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2];
dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1];
/* update gains and check range */
for (k = 0; k < 4; k++) {
gains[k] += dG[k];
if (gains[k] > PITCH_MAX_GAIN)
gains[k] = PITCH_MAX_GAIN;
else if (gains[k] < 0.0)
gains[k] = 0.0;
}
}
/* update state for next frame */
WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains);
/* concatenate previous input's end and current input */
memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD);
memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN);
/* lookahead pitch filtering for masking analysis */
WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains);
/* store last part of input */
for (k = 0; k < QLOOKAHEAD; k++)
State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN];
}
RTC_POP_IGNORING_WFRAME_LARGER_THAN()

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* pitch_estimator.h
*
* Pitch functions
*
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_
#include <stddef.h>
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
void WebRtcIsac_PitchAnalysis(
const double* in, /* PITCH_FRAME_LEN samples */
double* out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
PitchAnalysisStruct* State,
double* lags,
double* gains);
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ */

View File

@ -0,0 +1,388 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <memory.h>
#include <stdlib.h>
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h"
#include "rtc_base/compile_assert_c.h"
/*
* We are implementing the following filters;
*
* Pre-filtering:
* y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
*
* Post-filtering:
* y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
*
* Note that `lag` is a floating number so we perform an interpolation to
* obtain the correct `lag`.
*
*/
static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25,
-0.07};
/* interpolation coefficients; generated by design_pitch_filter.m */
static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
{-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125,
0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109,
0.01754159521746},
{-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643,
0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084,
0.01654127246314},
{-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787,
0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311,
0.01295781500709},
{-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393,
0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166,
0.00719783432422},
{-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001,
0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000,
-0.00000000000000},
{0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562,
0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377,
-0.00764851320885},
{0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650,
0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059,
-0.01463300534216},
{0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190,
0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865,
-0.01985640750433}
};
/*
* Enumerating the operation of the filter.
* iSAC has 4 different pitch-filter which are very similar in their structure.
*
* kPitchFilterPre : In this mode the filter is operating as pitch
* pre-filter. This is used at the encoder.
* kPitchFilterPost : In this mode the filter is operating as pitch
* post-filter. This is the inverse of pre-filter and used
* in the decoder.
* kPitchFilterPreLa : This is, in structure, similar to pre-filtering but
* utilizing 3 millisecond lookahead. It is used to
* obtain the signal for LPC analysis.
* kPitchFilterPreGain : This is, in structure, similar to pre-filtering but
* differential changes in gain is considered. This is
* used to find the optimal gain.
*/
typedef enum {
kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain
} PitchFilterOperation;
/*
* Structure with parameters used for pitch-filtering.
* buffer : a buffer where the sum of previous inputs and outputs
* are stored.
* damper_state : the state of the damping filter. The filter is defined by
* `kDampFilter`.
* interpol_coeff : pointer to a set of coefficient which are used to utilize
* fractional pitch by interpolation.
* gain : pitch-gain to be applied to the current segment of input.
* lag : pitch-lag for the current segment of input.
* lag_offset : the offset of lag w.r.t. current sample.
* sub_frame : sub-frame index, there are 4 pitch sub-frames in an iSAC
* frame.
* This specifies the usage of the filter. See
* 'PitchFilterOperation' for operational modes.
* num_samples : number of samples to be processed in each segment.
* index : index of the input and output sample.
* damper_state_dg : state of damping filter for different trial gains.
* gain_mult : differential changes to gain.
*/
typedef struct {
double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD];
double damper_state[PITCH_DAMPORDER];
const double *interpol_coeff;
double gain;
double lag;
int lag_offset;
int sub_frame;
PitchFilterOperation mode;
int num_samples;
int index;
double damper_state_dg[4][PITCH_DAMPORDER];
double gain_mult[4];
} PitchFilterParam;
/**********************************************************************
* FilterSegment()
* Filter one segment, a quarter of a frame.
*
* Inputs
* in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
* filter_param : pitch filter parameters.
*
* Outputs
* out_data : pointer to a buffer where the filtered signal is written to.
* out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
* where the output of different gain values (differential
* change to gain) is written.
*/
static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
double* out_data,
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
int n;
int m;
int j;
double sum;
double sum2;
/* Index of `parameters->buffer` where the output is written to. */
int pos = parameters->index + PITCH_BUFFSIZE;
/* Index of `parameters->buffer` where samples are read for fractional-lag
* computation. */
int pos_lag = pos - parameters->lag_offset;
for (n = 0; n < parameters->num_samples; ++n) {
/* Shift low pass filter states. */
for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
parameters->damper_state[m] = parameters->damper_state[m - 1];
}
/* Filter to get fractional pitch. */
sum = 0.0;
for (m = 0; m < PITCH_FRACORDER; ++m) {
sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m];
}
/* Multiply with gain. */
parameters->damper_state[0] = parameters->gain * sum;
if (parameters->mode == kPitchFilterPreGain) {
int lag_index = parameters->index - parameters->lag_offset;
int m_tmp = (lag_index < 0) ? -lag_index : 0;
/* Update the damper state for the new sample. */
for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
for (j = 0; j < 4; ++j) {
parameters->damper_state_dg[j][m] =
parameters->damper_state_dg[j][m - 1];
}
}
for (j = 0; j < parameters->sub_frame + 1; ++j) {
/* Filter for fractional pitch. */
sum2 = 0.0;
for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) {
/* `lag_index + m` is always larger than or equal to zero, see how
* m_tmp is computed. This is equivalent to assume samples outside
* `out_dg[j]` are zero. */
sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
}
/* Add the contribution of differential gain change. */
parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum +
parameters->gain * sum2;
}
/* Filter with damping filter, and store the results. */
for (j = 0; j < parameters->sub_frame + 1; ++j) {
sum = 0.0;
for (m = 0; m < PITCH_DAMPORDER; ++m) {
sum -= parameters->damper_state_dg[j][m] * kDampFilter[m];
}
out_dg[j][parameters->index] = sum;
}
}
/* Filter with damping filter. */
sum = 0.0;
for (m = 0; m < PITCH_DAMPORDER; ++m) {
sum += parameters->damper_state[m] * kDampFilter[m];
}
/* Subtract from input and update buffer. */
out_data[parameters->index] = in_data[parameters->index] - sum;
parameters->buffer[pos] = in_data[parameters->index] +
out_data[parameters->index];
++parameters->index;
++pos;
++pos_lag;
}
return;
}
/* Update filter parameters based on the pitch-gains and pitch-lags. */
static void Update(PitchFilterParam* parameters) {
double fraction;
int fraction_index;
/* Compute integer lag-offset. */
parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY +
0.5);
/* Find correct set of coefficients for computing fractional pitch. */
fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY);
fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5);
parameters->interpol_coeff = kIntrpCoef[fraction_index];
if (parameters->mode == kPitchFilterPreGain) {
/* If in this mode make a differential change to pitch gain. */
parameters->gain_mult[parameters->sub_frame] += 0.2;
if (parameters->gain_mult[parameters->sub_frame] > 1.0) {
parameters->gain_mult[parameters->sub_frame] = 1.0;
}
if (parameters->sub_frame > 0) {
parameters->gain_mult[parameters->sub_frame - 1] -= 0.2;
}
}
}
/******************************************************************************
* FilterFrame()
* Filter a frame of 30 millisecond, given pitch-lags and pitch-gains.
*
* Inputs
* in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
* lags : pointer to pitch-lags, 4 lags per frame.
* gains : pointer to pitch-gians, 4 gains per frame.
* mode : defining the functionality of the filter. It takes the
* following values.
* kPitchFilterPre: Pitch pre-filter, used at encoder.
* kPitchFilterPost: Pitch post-filter, used at decoder.
* kPitchFilterPreLa: Pitch pre-filter with lookahead.
* kPitchFilterPreGain: Pitch pre-filter used to otain optimal
* pitch-gains.
*
* Outputs
* out_data : pointer to a buffer where the filtered signal is written to.
* out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
* where the output of different gain values (differential
* change to gain) is written.
*/
static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
double* lags, double* gains, PitchFilterOperation mode,
double* out_data,
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
PitchFilterParam filter_parameters;
double gain_delta, lag_delta;
double old_lag, old_gain;
int n;
int m;
const double kEnhancer = 1.3;
/* Set up buffer and states. */
filter_parameters.index = 0;
filter_parameters.lag_offset = 0;
filter_parameters.mode = mode;
/* Copy states to local variables. */
memcpy(filter_parameters.buffer, filter_state->ubuf,
sizeof(filter_state->ubuf));
RTC_COMPILE_ASSERT(sizeof(filter_parameters.buffer) >=
sizeof(filter_state->ubuf));
memset(filter_parameters.buffer +
sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]),
0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf));
memcpy(filter_parameters.damper_state, filter_state->ystate,
sizeof(filter_state->ystate));
if (mode == kPitchFilterPreGain) {
/* Clear buffers. */
memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult));
memset(filter_parameters.damper_state_dg, 0,
sizeof(filter_parameters.damper_state_dg));
for (n = 0; n < PITCH_SUBFRAMES; ++n) {
//memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD));
memset(out_dg[n], 0, sizeof(out_dg[n]));
}
} else if (mode == kPitchFilterPost) {
/* Make output more periodic. Negative sign is to change the structure
* of the filter. */
for (n = 0; n < PITCH_SUBFRAMES; ++n) {
gains[n] *= -kEnhancer;
}
}
old_lag = *filter_state->oldlagp;
old_gain = *filter_state->oldgainp;
/* No interpolation if pitch lag step is big. */
if ((lags[0] > (PITCH_UPSTEP * old_lag)) ||
(lags[0] < (PITCH_DOWNSTEP * old_lag))) {
old_lag = lags[0];
old_gain = gains[0];
if (mode == kPitchFilterPreGain) {
filter_parameters.gain_mult[0] = 1.0;
}
}
filter_parameters.num_samples = PITCH_UPDATE;
for (m = 0; m < PITCH_SUBFRAMES; ++m) {
/* Set the sub-frame value. */
filter_parameters.sub_frame = m;
/* Calculate interpolation steps for pitch-lag and pitch-gain. */
lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME;
filter_parameters.lag = old_lag;
gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME;
filter_parameters.gain = old_gain;
/* Store for the next sub-frame. */
old_lag = lags[m];
old_gain = gains[m];
for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) {
/* Step-wise interpolation of pitch gains and lags. As pitch-lag changes,
* some parameters of filter need to be update. */
filter_parameters.gain += gain_delta;
filter_parameters.lag += lag_delta;
/* Update parameters according to new lag value. */
Update(&filter_parameters);
/* Filter a segment of input. */
FilterSegment(in_data, &filter_parameters, out_data, out_dg);
}
}
if (mode != kPitchFilterPreGain) {
/* Export buffer and states. */
memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN],
sizeof(filter_state->ubuf));
memcpy(filter_state->ystate, filter_parameters.damper_state,
sizeof(filter_state->ystate));
/* Store for the next frame. */
*filter_state->oldlagp = old_lag;
*filter_state->oldgainp = old_gain;
}
if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) {
/* Filter the lookahead segment, this is treated as the last sub-frame. So
* set `pf_param` to last sub-frame. */
filter_parameters.sub_frame = PITCH_SUBFRAMES - 1;
filter_parameters.num_samples = QLOOKAHEAD;
FilterSegment(in_data, &filter_parameters, out_data, out_dg);
}
}
void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data,
PitchFiltstr* pf_state, double* lags,
double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL);
}
void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data,
PitchFiltstr* pf_state, double* lags,
double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data,
NULL);
}
void WebRtcIsac_PitchfilterPre_gains(
double* in_data, double* out_data,
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state,
double* lags, double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data,
out_dg);
}
void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data,
PitchFiltstr* pf_state, double* lags,
double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL);
}

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
void WebRtcIsac_PitchfilterPre(double* indat,
double* outdat,
PitchFiltstr* pfp,
double* lags,
double* gains);
void WebRtcIsac_PitchfilterPost(double* indat,
double* outdat,
PitchFiltstr* pfp,
double* lags,
double* gains);
void WebRtcIsac_PitchfilterPre_la(double* indat,
double* outdat,
PitchFiltstr* pfp,
double* lags,
double* gains);
void WebRtcIsac_PitchfilterPre_gains(
double* indat,
double* outdat,
double out_dG[][PITCH_FRAME_LEN + QLOOKAHEAD],
PitchFiltstr* pfp,
double* lags,
double* gains);
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_

View File

@ -0,0 +1,196 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* settings.h
*
* Declaration of #defines used in the iSAC codec
*
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_
/* sampling frequency (Hz) */
#define FS 16000
/* number of samples per frame (either 320 (20ms), 480 (30ms) or 960 (60ms)) */
#define INITIAL_FRAMESAMPLES 960
/* do not modify the following; this will have to be modified if we
* have a 20ms framesize option */
/**********************************************************************/
/* miliseconds */
#define FRAMESIZE 30
/* number of samples per frame processed in the encoder, 480 */
#define FRAMESAMPLES 480 /* ((FRAMESIZE*FS)/1000) */
#define FRAMESAMPLES_HALF 240
#define FRAMESAMPLES_QUARTER 120
/**********************************************************************/
/* max number of samples per frame (= 60 ms frame) */
#define MAX_FRAMESAMPLES 960
#define MAX_SWBFRAMESAMPLES (MAX_FRAMESAMPLES * 2)
/* number of samples per 10ms frame */
#define FRAMESAMPLES_10ms ((10 * FS) / 1000)
#define SWBFRAMESAMPLES_10ms (FRAMESAMPLES_10ms * 2)
/* number of samples in 30 ms frame */
#define FRAMESAMPLES_30ms 480
/* number of subframes */
#define SUBFRAMES 6
/* length of a subframe */
#define UPDATE 80
/* length of half a subframe (low/high band) */
#define HALF_SUBFRAMELEN (UPDATE / 2)
/* samples of look ahead (in a half-band, so actually
* half the samples of look ahead @ FS) */
#define QLOOKAHEAD 24 /* 3 ms */
/* order of AR model in spectral entropy coder */
#define AR_ORDER 6
/* order of LP model in spectral entropy coder */
#define LP_ORDER 0
/* window length (masking analysis) */
#define WINLEN 256
/* order of low-band pole filter used to approximate masking curve */
#define ORDERLO 12
/* order of hi-band pole filter used to approximate masking curve */
#define ORDERHI 6
#define UB_LPC_ORDER 4
#define UB_LPC_VEC_PER_FRAME 2
#define UB16_LPC_VEC_PER_FRAME 4
#define UB_ACTIVE_SUBFRAMES 2
#define UB_MAX_LPC_ORDER 6
#define UB_INTERPOL_SEGMENTS 1
#define UB16_INTERPOL_SEGMENTS 3
#define LB_TOTAL_DELAY_SAMPLES 48
enum ISACBandwidth { isac8kHz = 8, isac12kHz = 12, isac16kHz = 16 };
enum ISACBand {
kIsacLowerBand = 0,
kIsacUpperBand12 = 1,
kIsacUpperBand16 = 2
};
enum IsacSamplingRate { kIsacWideband = 16, kIsacSuperWideband = 32 };
#define UB_LPC_GAIN_DIM SUBFRAMES
#define FB_STATE_SIZE_WORD32 6
/* order for post_filter_bank */
#define POSTQORDER 3
/* order for pre-filterbank */
#define QORDER 3
/* another order */
#define QORDER_ALL (POSTQORDER + QORDER - 1)
/* for decimator */
#define ALLPASSSECTIONS 2
/* array size for byte stream in number of bytes. */
/* The old maximum size still needed for the decoding */
#define STREAM_SIZE_MAX 600
#define STREAM_SIZE_MAX_30 200 /* 200 bytes=53.4 kbps @ 30 ms.framelength */
#define STREAM_SIZE_MAX_60 400 /* 400 bytes=53.4 kbps @ 60 ms.framelength */
/* storage size for bit counts */
#define BIT_COUNTER_SIZE 30
/* maximum order of any AR model or filter */
#define MAX_AR_MODEL_ORDER 12 // 50
/* For pitch analysis */
#define PITCH_FRAME_LEN (FRAMESAMPLES_HALF) /* 30 ms */
#define PITCH_MAX_LAG 140 /* 57 Hz */
#define PITCH_MIN_LAG 20 /* 400 Hz */
#define PITCH_MAX_GAIN 0.45
#define PITCH_MAX_GAIN_06 0.27 /* PITCH_MAX_GAIN*0.6 */
#define PITCH_MAX_GAIN_Q12 1843
#define PITCH_LAG_SPAN2 (PITCH_MAX_LAG / 2 - PITCH_MIN_LAG / 2 + 5)
#define PITCH_CORR_LEN2 60 /* 15 ms */
#define PITCH_CORR_STEP2 (PITCH_FRAME_LEN / 4)
#define PITCH_BW 11 /* half the band width of correlation surface */
#define PITCH_SUBFRAMES 4
#define PITCH_GRAN_PER_SUBFRAME 5
#define PITCH_SUBFRAME_LEN (PITCH_FRAME_LEN / PITCH_SUBFRAMES)
#define PITCH_UPDATE (PITCH_SUBFRAME_LEN / PITCH_GRAN_PER_SUBFRAME)
/* maximum number of peaks to be examined in correlation surface */
#define PITCH_MAX_NUM_PEAKS 10
#define PITCH_PEAK_DECAY 0.85
/* For weighting filter */
#define PITCH_WLPCORDER 6
#define PITCH_WLPCWINLEN PITCH_FRAME_LEN
#define PITCH_WLPCASYM 0.3 /* asymmetry parameter */
#define PITCH_WLPCBUFLEN PITCH_WLPCWINLEN
/* For pitch filter */
/* Extra 50 for fraction and LP filters */
#define PITCH_BUFFSIZE (PITCH_MAX_LAG + 50)
#define PITCH_INTBUFFSIZE (PITCH_FRAME_LEN + PITCH_BUFFSIZE)
/* Max rel. step for interpolation */
#define PITCH_UPSTEP 1.5
/* Max rel. step for interpolation */
#define PITCH_DOWNSTEP 0.67
#define PITCH_FRACS 8
#define PITCH_FRACORDER 9
#define PITCH_DAMPORDER 5
#define PITCH_FILTDELAY 1.5f
/* stepsize for quantization of the pitch Gain */
#define PITCH_GAIN_STEPSIZE 0.125
/* Order of high pass filter */
#define HPORDER 2
/* some mathematical constants */
/* log2(exp) */
#define LOG2EXP 1.44269504088896
#define PI 3.14159265358979
/* Maximum number of iterations allowed to limit payload size */
#define MAX_PAYLOAD_LIMIT_ITERATION 5
/* Redundant Coding */
#define RCU_BOTTLENECK_BPS 16000
#define RCU_TRANSCODING_SCALE 0.40f
#define RCU_TRANSCODING_SCALE_INVERSE 2.5f
#define RCU_TRANSCODING_SCALE_UB 0.50f
#define RCU_TRANSCODING_SCALE_UB_INVERSE 2.0f
/* Define Error codes */
/* 6000 General */
#define ISAC_MEMORY_ALLOCATION_FAILED 6010
#define ISAC_MODE_MISMATCH 6020
#define ISAC_DISALLOWED_BOTTLENECK 6030
#define ISAC_DISALLOWED_FRAME_LENGTH 6040
#define ISAC_UNSUPPORTED_SAMPLING_FREQUENCY 6050
/* 6200 Bandwidth estimator */
#define ISAC_RANGE_ERROR_BW_ESTIMATOR 6240
/* 6400 Encoder */
#define ISAC_ENCODER_NOT_INITIATED 6410
#define ISAC_DISALLOWED_CODING_MODE 6420
#define ISAC_DISALLOWED_FRAME_MODE_ENCODER 6430
#define ISAC_DISALLOWED_BITSTREAM_LENGTH 6440
#define ISAC_PAYLOAD_LARGER_THAN_LIMIT 6450
#define ISAC_DISALLOWED_ENCODER_BANDWIDTH 6460
/* 6600 Decoder */
#define ISAC_DECODER_NOT_INITIATED 6610
#define ISAC_EMPTY_PACKET 6620
#define ISAC_DISALLOWED_FRAME_MODE_DECODER 6630
#define ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH 6640
#define ISAC_RANGE_ERROR_DECODE_BANDWIDTH 6650
#define ISAC_RANGE_ERROR_DECODE_PITCH_GAIN 6660
#define ISAC_RANGE_ERROR_DECODE_PITCH_LAG 6670
#define ISAC_RANGE_ERROR_DECODE_LPC 6680
#define ISAC_RANGE_ERROR_DECODE_SPECTRUM 6690
#define ISAC_LENGTH_MISMATCH 6730
#define ISAC_RANGE_ERROR_DECODE_BANDWITH 6740
#define ISAC_DISALLOWED_BANDWIDTH_MODE_DECODER 6750
#define ISAC_DISALLOWED_LPC_MODEL 6760
/* 6800 Call setup formats */
#define ISAC_INCOMPATIBLE_FORMATS 6810
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ */

View File

@ -0,0 +1,448 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* structs.h
*
* This header file contains all the structs used in the ISAC codec
*
*/
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_
#include "modules/audio_coding/codecs/isac/bandwidth_info.h"
#include "modules/audio_coding/codecs/isac/main/source/settings.h"
#include "modules/third_party/fft/fft.h"
typedef struct Bitstreamstruct {
uint8_t stream[STREAM_SIZE_MAX];
uint32_t W_upper;
uint32_t streamval;
uint32_t stream_index;
} Bitstr;
typedef struct {
double DataBufferLo[WINLEN];
double DataBufferHi[WINLEN];
double CorrBufLo[ORDERLO + 1];
double CorrBufHi[ORDERHI + 1];
float PreStateLoF[ORDERLO + 1];
float PreStateLoG[ORDERLO + 1];
float PreStateHiF[ORDERHI + 1];
float PreStateHiG[ORDERHI + 1];
float PostStateLoF[ORDERLO + 1];
float PostStateLoG[ORDERLO + 1];
float PostStateHiF[ORDERHI + 1];
float PostStateHiG[ORDERHI + 1];
double OldEnergy;
} MaskFiltstr;
typedef struct {
// state vectors for each of the two analysis filters
double INSTAT1[2 * (QORDER - 1)];
double INSTAT2[2 * (QORDER - 1)];
double INSTATLA1[2 * (QORDER - 1)];
double INSTATLA2[2 * (QORDER - 1)];
double INLABUF1[QLOOKAHEAD];
double INLABUF2[QLOOKAHEAD];
float INSTAT1_float[2 * (QORDER - 1)];
float INSTAT2_float[2 * (QORDER - 1)];
float INSTATLA1_float[2 * (QORDER - 1)];
float INSTATLA2_float[2 * (QORDER - 1)];
float INLABUF1_float[QLOOKAHEAD];
float INLABUF2_float[QLOOKAHEAD];
/* High pass filter */
double HPstates[HPORDER];
float HPstates_float[HPORDER];
} PreFiltBankstr;
typedef struct {
// state vectors for each of the two analysis filters
double STATE_0_LOWER[2 * POSTQORDER];
double STATE_0_UPPER[2 * POSTQORDER];
/* High pass filter */
double HPstates1[HPORDER];
double HPstates2[HPORDER];
float STATE_0_LOWER_float[2 * POSTQORDER];
float STATE_0_UPPER_float[2 * POSTQORDER];
float HPstates1_float[HPORDER];
float HPstates2_float[HPORDER];
} PostFiltBankstr;
typedef struct {
// data buffer for pitch filter
double ubuf[PITCH_BUFFSIZE];
// low pass state vector
double ystate[PITCH_DAMPORDER];
// old lag and gain
double oldlagp[1];
double oldgainp[1];
} PitchFiltstr;
typedef struct {
// data buffer
double buffer[PITCH_WLPCBUFLEN];
// state vectors
double istate[PITCH_WLPCORDER];
double weostate[PITCH_WLPCORDER];
double whostate[PITCH_WLPCORDER];
// LPC window -> should be a global array because constant
double window[PITCH_WLPCWINLEN];
} WeightFiltstr;
typedef struct {
// for inital estimator
double dec_buffer[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
PITCH_FRAME_LEN / 2 + 2];
double decimator_state[2 * ALLPASSSECTIONS + 1];
double hp_state[2];
double whitened_buf[QLOOKAHEAD];
double inbuf[QLOOKAHEAD];
PitchFiltstr PFstr_wght;
PitchFiltstr PFstr;
WeightFiltstr Wghtstr;
} PitchAnalysisStruct;
/* Have instance of struct together with other iSAC structs */
typedef struct {
/* Previous frame length (in ms) */
int32_t prev_frame_length;
/* Previous RTP timestamp from received
packet (in samples relative beginning) */
int32_t prev_rec_rtp_number;
/* Send timestamp for previous packet (in ms using timeGetTime()) */
uint32_t prev_rec_send_ts;
/* Arrival time for previous packet (in ms using timeGetTime()) */
uint32_t prev_rec_arr_ts;
/* rate of previous packet, derived from RTP timestamps (in bits/s) */
float prev_rec_rtp_rate;
/* Time sinse the last update of the BN estimate (in ms) */
uint32_t last_update_ts;
/* Time sinse the last reduction (in ms) */
uint32_t last_reduction_ts;
/* How many times the estimate was update in the beginning */
int32_t count_tot_updates_rec;
/* The estimated bottle neck rate from there to here (in bits/s) */
int32_t rec_bw;
float rec_bw_inv;
float rec_bw_avg;
float rec_bw_avg_Q;
/* The estimated mean absolute jitter value,
as seen on this side (in ms) */
float rec_jitter;
float rec_jitter_short_term;
float rec_jitter_short_term_abs;
float rec_max_delay;
float rec_max_delay_avg_Q;
/* (assumed) bitrate for headers (bps) */
float rec_header_rate;
/* The estimated bottle neck rate from here to there (in bits/s) */
float send_bw_avg;
/* The estimated mean absolute jitter value, as seen on
the other siee (in ms) */
float send_max_delay_avg;
// number of packets received since last update
int num_pkts_rec;
int num_consec_rec_pkts_over_30k;
// flag for marking that a high speed network has been
// detected downstream
int hsn_detect_rec;
int num_consec_snt_pkts_over_30k;
// flag for marking that a high speed network has
// been detected upstream
int hsn_detect_snd;
uint32_t start_wait_period;
int in_wait_period;
int change_to_WB;
uint32_t senderTimestamp;
uint32_t receiverTimestamp;
// enum IsacSamplingRate incomingStreamSampFreq;
uint16_t numConsecLatePkts;
float consecLatency;
int16_t inWaitLatePkts;
IsacBandwidthInfo external_bw_info;
} BwEstimatorstr;
typedef struct {
/* boolean, flags if previous packet exceeded B.N. */
int PrevExceed;
/* ms */
int ExceedAgo;
/* packets left to send in current burst */
int BurstCounter;
/* packets */
int InitCounter;
/* ms remaining in buffer when next packet will be sent */
double StillBuffered;
} RateModel;
/* The following strutc is used to store data from encoding, to make it
fast and easy to construct a new bitstream with a different Bandwidth
estimate. All values (except framelength and minBytes) is double size to
handle 60 ms of data.
*/
typedef struct {
/* Used to keep track of if it is first or second part of 60 msec packet */
int startIdx;
/* Frame length in samples */
int16_t framelength;
/* Pitch Gain */
int pitchGain_index[2];
/* Pitch Lag */
double meanGain[2];
int pitchIndex[PITCH_SUBFRAMES * 2];
/* LPC */
int LPCindex_s[108 * 2]; /* KLT_ORDER_SHAPE = 108 */
int LPCindex_g[12 * 2]; /* KLT_ORDER_GAIN = 12 */
double LPCcoeffs_lo[(ORDERLO + 1) * SUBFRAMES * 2];
double LPCcoeffs_hi[(ORDERHI + 1) * SUBFRAMES * 2];
/* Encode Spec */
int16_t fre[FRAMESAMPLES];
int16_t fim[FRAMESAMPLES];
int16_t AvgPitchGain[2];
/* Used in adaptive mode only */
int minBytes;
} IsacSaveEncoderData;
typedef struct {
int indexLPCShape[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME];
double lpcGain[SUBFRAMES << 1];
int lpcGainIndex[SUBFRAMES << 1];
Bitstr bitStreamObj;
int16_t realFFT[FRAMESAMPLES_HALF];
int16_t imagFFT[FRAMESAMPLES_HALF];
} ISACUBSaveEncDataStruct;
typedef struct {
Bitstr bitstr_obj;
MaskFiltstr maskfiltstr_obj;
PreFiltBankstr prefiltbankstr_obj;
PitchFiltstr pitchfiltstr_obj;
PitchAnalysisStruct pitchanalysisstr_obj;
FFTstr fftstr_obj;
IsacSaveEncoderData SaveEnc_obj;
int buffer_index;
int16_t current_framesamples;
float data_buffer_float[FRAMESAMPLES_30ms];
int frame_nb;
double bottleneck;
int16_t new_framelength;
double s2nr;
/* Maximum allowed number of bits for a 30 msec packet */
int16_t payloadLimitBytes30;
/* Maximum allowed number of bits for a 30 msec packet */
int16_t payloadLimitBytes60;
/* Maximum allowed number of bits for both 30 and 60 msec packet */
int16_t maxPayloadBytes;
/* Maximum allowed rate in bytes per 30 msec packet */
int16_t maxRateInBytes;
/*---
If set to 1 iSAC will not adapt the frame-size, if used in
channel-adaptive mode. The initial value will be used for all rates.
---*/
int16_t enforceFrameSize;
/*-----
This records the BWE index the encoder injected into the bit-stream.
It will be used in RCU. The same BWE index of main payload will be in
the redundant payload. We can not retrieve it from BWE because it is
a recursive procedure (WebRtcIsac_GetDownlinkBwJitIndexImpl) and has to be
called only once per each encode.
-----*/
int16_t lastBWIdx;
} ISACLBEncStruct;
typedef struct {
Bitstr bitstr_obj;
MaskFiltstr maskfiltstr_obj;
PreFiltBankstr prefiltbankstr_obj;
FFTstr fftstr_obj;
ISACUBSaveEncDataStruct SaveEnc_obj;
int buffer_index;
float data_buffer_float[MAX_FRAMESAMPLES + LB_TOTAL_DELAY_SAMPLES];
double bottleneck;
/* Maximum allowed number of bits for a 30 msec packet */
// int16_t payloadLimitBytes30;
/* Maximum allowed number of bits for both 30 and 60 msec packet */
// int16_t maxPayloadBytes;
int16_t maxPayloadSizeBytes;
double lastLPCVec[UB_LPC_ORDER];
int16_t numBytesUsed;
int16_t lastJitterInfo;
} ISACUBEncStruct;
typedef struct {
Bitstr bitstr_obj;
MaskFiltstr maskfiltstr_obj;
PostFiltBankstr postfiltbankstr_obj;
PitchFiltstr pitchfiltstr_obj;
FFTstr fftstr_obj;
} ISACLBDecStruct;
typedef struct {
Bitstr bitstr_obj;
MaskFiltstr maskfiltstr_obj;
PostFiltBankstr postfiltbankstr_obj;
FFTstr fftstr_obj;
} ISACUBDecStruct;
typedef struct {
ISACLBEncStruct ISACencLB_obj;
ISACLBDecStruct ISACdecLB_obj;
} ISACLBStruct;
typedef struct {
ISACUBEncStruct ISACencUB_obj;
ISACUBDecStruct ISACdecUB_obj;
} ISACUBStruct;
/*
This struct is used to take a snapshot of the entropy coder and LPC gains
right before encoding LPC gains. This allows us to go back to that state
if we like to limit the payload size.
*/
typedef struct {
/* 6 lower-band & 6 upper-band */
double loFiltGain[SUBFRAMES];
double hiFiltGain[SUBFRAMES];
/* Upper boundary of interval W */
uint32_t W_upper;
uint32_t streamval;
/* Index to the current position in bytestream */
uint32_t stream_index;
uint8_t stream[3];
} transcode_obj;
typedef struct {
// TODO(kwiberg): The size of these tables could be reduced by storing floats
// instead of doubles, and by making use of the identity cos(x) =
// sin(x+pi/2). They could also be made global constants that we fill in at
// compile time.
double costab1[FRAMESAMPLES_HALF];
double sintab1[FRAMESAMPLES_HALF];
double costab2[FRAMESAMPLES_QUARTER];
double sintab2[FRAMESAMPLES_QUARTER];
} TransformTables;
typedef struct {
// lower-band codec instance
ISACLBStruct instLB;
// upper-band codec instance
ISACUBStruct instUB;
// Bandwidth Estimator and model for the rate.
BwEstimatorstr bwestimator_obj;
RateModel rate_data_obj;
double MaxDelay;
/* 0 = adaptive; 1 = instantaneous */
int16_t codingMode;
// overall bottleneck of the codec
int32_t bottleneck;
// QMF Filter state
int32_t analysisFBState1[FB_STATE_SIZE_WORD32];
int32_t analysisFBState2[FB_STATE_SIZE_WORD32];
int32_t synthesisFBState1[FB_STATE_SIZE_WORD32];
int32_t synthesisFBState2[FB_STATE_SIZE_WORD32];
// Error Code
int16_t errorCode;
// bandwidth of the encoded audio 8, 12 or 16 kHz
enum ISACBandwidth bandwidthKHz;
// Sampling rate of audio, encoder and decode, 8 or 16 kHz
enum IsacSamplingRate encoderSamplingRateKHz;
enum IsacSamplingRate decoderSamplingRateKHz;
// Flag to keep track of initializations, lower & upper-band
// encoder and decoder.
int16_t initFlag;
// Flag to to indicate signal bandwidth switch
int16_t resetFlag_8kHz;
// Maximum allowed rate, measured in Bytes per 30 ms.
int16_t maxRateBytesPer30Ms;
// Maximum allowed payload-size, measured in Bytes.
int16_t maxPayloadSizeBytes;
/* The expected sampling rate of the input signal. Valid values are 16000
* and 32000. This is not the operation sampling rate of the codec. */
uint16_t in_sample_rate_hz;
// Trig tables for WebRtcIsac_Time2Spec and WebRtcIsac_Spec2time.
TransformTables transform_tables;
} ISACMainStruct;
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ */