add vad code.
This commit is contained in:
@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct {
|
||||
int in_use;
|
||||
int32_t send_bw_avg;
|
||||
int32_t send_max_delay_avg;
|
||||
int16_t bottleneck_idx;
|
||||
int16_t jitter_info;
|
||||
} IsacBandwidthInfo;
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_
|
@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
#ifdef WEBRTC_ANDROID
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
|
||||
|
||||
static void WebRtcIsac_AllPoleFilter(double* InOut,
|
||||
double* Coef,
|
||||
size_t lengthInOut,
|
||||
int orderCoef) {
|
||||
/* the state of filter is assumed to be in InOut[-1] to InOut[-orderCoef] */
|
||||
double scal;
|
||||
double sum;
|
||||
size_t n;
|
||||
int k;
|
||||
|
||||
//if (fabs(Coef[0]-1.0)<0.001) {
|
||||
if ( (Coef[0] > 0.9999) && (Coef[0] < 1.0001) )
|
||||
{
|
||||
for(n = 0; n < lengthInOut; n++)
|
||||
{
|
||||
sum = Coef[1] * InOut[-1];
|
||||
for(k = 2; k <= orderCoef; k++){
|
||||
sum += Coef[k] * InOut[-k];
|
||||
}
|
||||
*InOut++ -= sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
scal = 1.0 / Coef[0];
|
||||
for(n=0;n<lengthInOut;n++)
|
||||
{
|
||||
*InOut *= scal;
|
||||
for(k=1;k<=orderCoef;k++){
|
||||
*InOut -= scal*Coef[k]*InOut[-k];
|
||||
}
|
||||
InOut++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void WebRtcIsac_AllZeroFilter(double* In,
|
||||
double* Coef,
|
||||
size_t lengthInOut,
|
||||
int orderCoef,
|
||||
double* Out) {
|
||||
/* the state of filter is assumed to be in In[-1] to In[-orderCoef] */
|
||||
|
||||
size_t n;
|
||||
int k;
|
||||
double tmp;
|
||||
|
||||
for(n = 0; n < lengthInOut; n++)
|
||||
{
|
||||
tmp = In[0] * Coef[0];
|
||||
|
||||
for(k = 1; k <= orderCoef; k++){
|
||||
tmp += Coef[k] * In[-k];
|
||||
}
|
||||
|
||||
*Out++ = tmp;
|
||||
In++;
|
||||
}
|
||||
}
|
||||
|
||||
static void WebRtcIsac_ZeroPoleFilter(double* In,
|
||||
double* ZeroCoef,
|
||||
double* PoleCoef,
|
||||
size_t lengthInOut,
|
||||
int orderCoef,
|
||||
double* Out) {
|
||||
/* the state of the zero section is assumed to be in In[-1] to In[-orderCoef] */
|
||||
/* the state of the pole section is assumed to be in Out[-1] to Out[-orderCoef] */
|
||||
|
||||
WebRtcIsac_AllZeroFilter(In,ZeroCoef,lengthInOut,orderCoef,Out);
|
||||
WebRtcIsac_AllPoleFilter(Out,PoleCoef,lengthInOut,orderCoef);
|
||||
}
|
||||
|
||||
|
||||
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order) {
|
||||
size_t lag, n;
|
||||
double sum, prod;
|
||||
const double *x_lag;
|
||||
|
||||
for (lag = 0; lag <= order; lag++)
|
||||
{
|
||||
sum = 0.0f;
|
||||
x_lag = &x[lag];
|
||||
prod = x[0] * x_lag[0];
|
||||
for (n = 1; n < N - lag; n++) {
|
||||
sum += prod;
|
||||
prod = x[n] * x_lag[n];
|
||||
}
|
||||
sum += prod;
|
||||
r[lag] = sum;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void WebRtcIsac_BwExpand(double* out,
|
||||
double* in,
|
||||
double coef,
|
||||
size_t length) {
|
||||
size_t i;
|
||||
double chirp;
|
||||
|
||||
chirp = coef;
|
||||
|
||||
out[0] = in[0];
|
||||
for (i = 1; i < length; i++) {
|
||||
out[i] = chirp * in[i];
|
||||
chirp *= coef;
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcIsac_WeightingFilter(const double* in,
|
||||
double* weiout,
|
||||
double* whiout,
|
||||
WeightFiltstr* wfdata) {
|
||||
double tmpbuffer[PITCH_FRAME_LEN + PITCH_WLPCBUFLEN];
|
||||
double corr[PITCH_WLPCORDER+1], rc[PITCH_WLPCORDER+1];
|
||||
double apol[PITCH_WLPCORDER+1], apolr[PITCH_WLPCORDER+1];
|
||||
double rho=0.9, *inp, *dp, *dp2;
|
||||
double whoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER];
|
||||
double weoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER];
|
||||
double *weo, *who, opol[PITCH_WLPCORDER+1], ext[PITCH_WLPCWINLEN];
|
||||
int k, n, endpos, start;
|
||||
|
||||
/* Set up buffer and states */
|
||||
memcpy(tmpbuffer, wfdata->buffer, sizeof(double) * PITCH_WLPCBUFLEN);
|
||||
memcpy(tmpbuffer+PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN);
|
||||
memcpy(wfdata->buffer, tmpbuffer+PITCH_FRAME_LEN, sizeof(double) * PITCH_WLPCBUFLEN);
|
||||
|
||||
dp=weoutbuf;
|
||||
dp2=whoutbuf;
|
||||
for (k=0;k<PITCH_WLPCORDER;k++) {
|
||||
*dp++ = wfdata->weostate[k];
|
||||
*dp2++ = wfdata->whostate[k];
|
||||
opol[k]=0.0;
|
||||
}
|
||||
opol[0]=1.0;
|
||||
opol[PITCH_WLPCORDER]=0.0;
|
||||
weo=dp;
|
||||
who=dp2;
|
||||
|
||||
endpos=PITCH_WLPCBUFLEN + PITCH_SUBFRAME_LEN;
|
||||
inp=tmpbuffer + PITCH_WLPCBUFLEN;
|
||||
|
||||
for (n=0; n<PITCH_SUBFRAMES; n++) {
|
||||
/* Windowing */
|
||||
start=endpos-PITCH_WLPCWINLEN;
|
||||
for (k=0; k<PITCH_WLPCWINLEN; k++) {
|
||||
ext[k]=wfdata->window[k]*tmpbuffer[start+k];
|
||||
}
|
||||
|
||||
/* Get LPC polynomial */
|
||||
WebRtcIsac_AutoCorr(corr, ext, PITCH_WLPCWINLEN, PITCH_WLPCORDER);
|
||||
corr[0]=1.01*corr[0]+1.0; /* White noise correction */
|
||||
WebRtcIsac_LevDurb(apol, rc, corr, PITCH_WLPCORDER);
|
||||
WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER+1);
|
||||
|
||||
/* Filtering */
|
||||
WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, weo);
|
||||
WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, who);
|
||||
|
||||
inp+=PITCH_SUBFRAME_LEN;
|
||||
endpos+=PITCH_SUBFRAME_LEN;
|
||||
weo+=PITCH_SUBFRAME_LEN;
|
||||
who+=PITCH_SUBFRAME_LEN;
|
||||
}
|
||||
|
||||
/* Export filter states */
|
||||
for (k=0;k<PITCH_WLPCORDER;k++) {
|
||||
wfdata->weostate[k]=weoutbuf[PITCH_FRAME_LEN+k];
|
||||
wfdata->whostate[k]=whoutbuf[PITCH_FRAME_LEN+k];
|
||||
}
|
||||
|
||||
/* Export output data */
|
||||
memcpy(weiout, weoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN);
|
||||
memcpy(whiout, whoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN);
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
|
||||
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order);
|
||||
|
||||
void WebRtcIsac_WeightingFilter(const double* in,
|
||||
double* weiout,
|
||||
double* whiout,
|
||||
WeightFiltstr* wfdata);
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
|
@ -0,0 +1,409 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata) {
|
||||
int k;
|
||||
|
||||
for (k = 0; k < PITCH_BUFFSIZE; k++) {
|
||||
pitchfiltdata->ubuf[k] = 0.0;
|
||||
}
|
||||
pitchfiltdata->ystate[0] = 0.0;
|
||||
for (k = 1; k < (PITCH_DAMPORDER); k++) {
|
||||
pitchfiltdata->ystate[k] = 0.0;
|
||||
}
|
||||
pitchfiltdata->oldlagp[0] = 50.0;
|
||||
pitchfiltdata->oldgainp[0] = 0.0;
|
||||
}
|
||||
|
||||
static void WebRtcIsac_InitWeightingFilter(WeightFiltstr* wfdata) {
|
||||
int k;
|
||||
double t, dtmp, dtmp2, denum, denum2;
|
||||
|
||||
for (k = 0; k < PITCH_WLPCBUFLEN; k++)
|
||||
wfdata->buffer[k] = 0.0;
|
||||
|
||||
for (k = 0; k < PITCH_WLPCORDER; k++) {
|
||||
wfdata->istate[k] = 0.0;
|
||||
wfdata->weostate[k] = 0.0;
|
||||
wfdata->whostate[k] = 0.0;
|
||||
}
|
||||
|
||||
/* next part should be in Matlab, writing to a global table */
|
||||
t = 0.5;
|
||||
denum = 1.0 / ((double)PITCH_WLPCWINLEN);
|
||||
denum2 = denum * denum;
|
||||
for (k = 0; k < PITCH_WLPCWINLEN; k++) {
|
||||
dtmp = PITCH_WLPCASYM * t * denum + (1 - PITCH_WLPCASYM) * t * t * denum2;
|
||||
dtmp *= 3.14159265;
|
||||
dtmp2 = sin(dtmp);
|
||||
wfdata->window[k] = dtmp2 * dtmp2;
|
||||
t++;
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* State) {
|
||||
int k;
|
||||
|
||||
for (k = 0; k < PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
|
||||
PITCH_FRAME_LEN / 2 + 2;
|
||||
k++)
|
||||
State->dec_buffer[k] = 0.0;
|
||||
for (k = 0; k < 2 * ALLPASSSECTIONS + 1; k++)
|
||||
State->decimator_state[k] = 0.0;
|
||||
for (k = 0; k < 2; k++)
|
||||
State->hp_state[k] = 0.0;
|
||||
for (k = 0; k < QLOOKAHEAD; k++)
|
||||
State->whitened_buf[k] = 0.0;
|
||||
for (k = 0; k < QLOOKAHEAD; k++)
|
||||
State->inbuf[k] = 0.0;
|
||||
|
||||
WebRtcIsac_InitPitchFilter(&(State->PFstr_wght));
|
||||
|
||||
WebRtcIsac_InitPitchFilter(&(State->PFstr));
|
||||
|
||||
WebRtcIsac_InitWeightingFilter(&(State->Wghtstr));
|
||||
}
|
||||
|
||||
void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata) {
|
||||
int k;
|
||||
|
||||
for (k = 0; k < QLOOKAHEAD; k++) {
|
||||
prefiltdata->INLABUF1[k] = 0;
|
||||
prefiltdata->INLABUF2[k] = 0;
|
||||
|
||||
prefiltdata->INLABUF1_float[k] = 0;
|
||||
prefiltdata->INLABUF2_float[k] = 0;
|
||||
}
|
||||
for (k = 0; k < 2 * (QORDER - 1); k++) {
|
||||
prefiltdata->INSTAT1[k] = 0;
|
||||
prefiltdata->INSTAT2[k] = 0;
|
||||
prefiltdata->INSTATLA1[k] = 0;
|
||||
prefiltdata->INSTATLA2[k] = 0;
|
||||
|
||||
prefiltdata->INSTAT1_float[k] = 0;
|
||||
prefiltdata->INSTAT2_float[k] = 0;
|
||||
prefiltdata->INSTATLA1_float[k] = 0;
|
||||
prefiltdata->INSTATLA2_float[k] = 0;
|
||||
}
|
||||
|
||||
/* High pass filter states */
|
||||
prefiltdata->HPstates[0] = 0.0;
|
||||
prefiltdata->HPstates[1] = 0.0;
|
||||
|
||||
prefiltdata->HPstates_float[0] = 0.0f;
|
||||
prefiltdata->HPstates_float[1] = 0.0f;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order) {
|
||||
const double LEVINSON_EPS = 1.0e-10;
|
||||
|
||||
double sum, alpha;
|
||||
size_t m, m_h, i;
|
||||
alpha = 0; // warning -DH
|
||||
a[0] = 1.0;
|
||||
if (r[0] < LEVINSON_EPS) { /* if r[0] <= 0, set LPC coeff. to zero */
|
||||
for (i = 0; i < order; i++) {
|
||||
k[i] = 0;
|
||||
a[i + 1] = 0;
|
||||
}
|
||||
} else {
|
||||
a[1] = k[0] = -r[1] / r[0];
|
||||
alpha = r[0] + r[1] * k[0];
|
||||
for (m = 1; m < order; m++) {
|
||||
sum = r[m + 1];
|
||||
for (i = 0; i < m; i++) {
|
||||
sum += a[i + 1] * r[m - i];
|
||||
}
|
||||
k[m] = -sum / alpha;
|
||||
alpha += k[m] * sum;
|
||||
m_h = (m + 1) >> 1;
|
||||
for (i = 0; i < m_h; i++) {
|
||||
sum = a[i + 1] + k[m] * a[m - i];
|
||||
a[m - i] += k[m] * a[i + 1];
|
||||
a[i + 1] = sum;
|
||||
}
|
||||
a[m + 1] = k[m];
|
||||
}
|
||||
}
|
||||
return alpha;
|
||||
}
|
||||
|
||||
/* The upper channel all-pass filter factors */
|
||||
const float WebRtcIsac_kUpperApFactorsFloat[2] = {0.03470000000000f,
|
||||
0.38260000000000f};
|
||||
|
||||
/* The lower channel all-pass filter factors */
|
||||
const float WebRtcIsac_kLowerApFactorsFloat[2] = {0.15440000000000f,
|
||||
0.74400000000000f};
|
||||
|
||||
/* This function performs all-pass filtering--a series of first order all-pass
|
||||
* sections are used to filter the input in a cascade manner.
|
||||
* The input is overwritten!!
|
||||
*/
|
||||
void WebRtcIsac_AllPassFilter2Float(float* InOut,
|
||||
const float* APSectionFactors,
|
||||
int lengthInOut,
|
||||
int NumberOfSections,
|
||||
float* FilterState) {
|
||||
int n, j;
|
||||
float temp;
|
||||
for (j = 0; j < NumberOfSections; j++) {
|
||||
for (n = 0; n < lengthInOut; n++) {
|
||||
temp = FilterState[j] + APSectionFactors[j] * InOut[n];
|
||||
FilterState[j] = -APSectionFactors[j] * temp + InOut[n];
|
||||
InOut[n] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* The number of composite all-pass filter factors */
|
||||
#define NUMBEROFCOMPOSITEAPSECTIONS 4
|
||||
|
||||
/* Function WebRtcIsac_SplitAndFilter
|
||||
* This function creates low-pass and high-pass decimated versions of part of
|
||||
the input signal, and part of the signal in the input 'lookahead buffer'.
|
||||
|
||||
INPUTS:
|
||||
in: a length FRAMESAMPLES array of input samples
|
||||
prefiltdata: input data structure containing the filterbank states
|
||||
and lookahead samples from the previous encoding
|
||||
iteration.
|
||||
OUTPUTS:
|
||||
LP: a FRAMESAMPLES_HALF array of low-pass filtered samples that
|
||||
have been phase equalized. The first QLOOKAHEAD samples are
|
||||
based on the samples in the two prefiltdata->INLABUFx arrays
|
||||
each of length QLOOKAHEAD.
|
||||
The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based
|
||||
on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input
|
||||
array in[].
|
||||
HP: a FRAMESAMPLES_HALF array of high-pass filtered samples that
|
||||
have been phase equalized. The first QLOOKAHEAD samples are
|
||||
based on the samples in the two prefiltdata->INLABUFx arrays
|
||||
each of length QLOOKAHEAD.
|
||||
The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based
|
||||
on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input
|
||||
array in[].
|
||||
|
||||
LP_la: a FRAMESAMPLES_HALF array of low-pass filtered samples.
|
||||
These samples are not phase equalized. They are computed
|
||||
from the samples in the in[] array.
|
||||
HP_la: a FRAMESAMPLES_HALF array of high-pass filtered samples
|
||||
that are not phase equalized. They are computed from
|
||||
the in[] vector.
|
||||
prefiltdata: this input data structure's filterbank state and
|
||||
lookahead sample buffers are updated for the next
|
||||
encoding iteration.
|
||||
*/
|
||||
void WebRtcIsac_SplitAndFilterFloat(float* pin,
|
||||
float* LP,
|
||||
float* HP,
|
||||
double* LP_la,
|
||||
double* HP_la,
|
||||
PreFiltBankstr* prefiltdata) {
|
||||
int k, n;
|
||||
float CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS];
|
||||
float ForTransform_CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS];
|
||||
float ForTransform_CompositeAPFilterState2[NUMBEROFCOMPOSITEAPSECTIONS];
|
||||
float tempinoutvec[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
|
||||
float tempin_ch1[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
|
||||
float tempin_ch2[FRAMESAMPLES + MAX_AR_MODEL_ORDER];
|
||||
float in[FRAMESAMPLES];
|
||||
float ftmp;
|
||||
|
||||
/* HPstcoeff_in = {a1, a2, b1 - b0 * a1, b2 - b0 * a2}; */
|
||||
static const float kHpStCoefInFloat[4] = {
|
||||
-1.94895953203325f, 0.94984516000000f, -0.05101826139794f,
|
||||
0.05015484000000f};
|
||||
|
||||
/* The composite all-pass filter factors */
|
||||
static const float WebRtcIsac_kCompositeApFactorsFloat[4] = {
|
||||
0.03470000000000f, 0.15440000000000f, 0.38260000000000f,
|
||||
0.74400000000000f};
|
||||
|
||||
// The matrix for transforming the backward composite state to upper channel
|
||||
// state.
|
||||
static const float WebRtcIsac_kTransform1Float[8] = {
|
||||
-0.00158678506084f, 0.00127157815343f, -0.00104805672709f,
|
||||
0.00084837248079f, 0.00134467983258f, -0.00107756549387f,
|
||||
0.00088814793277f, -0.00071893072525f};
|
||||
|
||||
// The matrix for transforming the backward composite state to lower channel
|
||||
// state.
|
||||
static const float WebRtcIsac_kTransform2Float[8] = {
|
||||
-0.00170686041697f, 0.00136780109829f, -0.00112736532350f,
|
||||
0.00091257055385f, 0.00103094281812f, -0.00082615076557f,
|
||||
0.00068092756088f, -0.00055119165484f};
|
||||
|
||||
/* High pass filter */
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES; k++) {
|
||||
in[k] = pin[k] + kHpStCoefInFloat[2] * prefiltdata->HPstates_float[0] +
|
||||
kHpStCoefInFloat[3] * prefiltdata->HPstates_float[1];
|
||||
ftmp = pin[k] - kHpStCoefInFloat[0] * prefiltdata->HPstates_float[0] -
|
||||
kHpStCoefInFloat[1] * prefiltdata->HPstates_float[1];
|
||||
prefiltdata->HPstates_float[1] = prefiltdata->HPstates_float[0];
|
||||
prefiltdata->HPstates_float[0] = ftmp;
|
||||
}
|
||||
|
||||
/* First Channel */
|
||||
|
||||
/*initial state of composite filter is zero */
|
||||
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
|
||||
CompositeAPFilterState[k] = 0.0;
|
||||
}
|
||||
/* put every other sample of input into a temporary vector in reverse
|
||||
* (backward) order*/
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempinoutvec[k] = in[FRAMESAMPLES - 1 - 2 * k];
|
||||
}
|
||||
|
||||
/* now all-pass filter the backwards vector. Output values overwrite the
|
||||
* input vector. */
|
||||
WebRtcIsac_AllPassFilter2Float(
|
||||
tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF,
|
||||
NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
|
||||
|
||||
/* save the backwards filtered output for later forward filtering,
|
||||
but write it in forward order*/
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempin_ch1[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k];
|
||||
}
|
||||
|
||||
/* save the backwards filter state becaue it will be transformed
|
||||
later into a forward state */
|
||||
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
|
||||
ForTransform_CompositeAPFilterState[k] = CompositeAPFilterState[k];
|
||||
}
|
||||
|
||||
/* now backwards filter the samples in the lookahead buffer. The samples were
|
||||
placed there in the encoding of the previous frame. The output samples
|
||||
overwrite the input samples */
|
||||
WebRtcIsac_AllPassFilter2Float(
|
||||
prefiltdata->INLABUF1_float, WebRtcIsac_kCompositeApFactorsFloat,
|
||||
QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
|
||||
|
||||
/* save the output, but write it in forward order */
|
||||
/* write the lookahead samples for the next encoding iteration. Every other
|
||||
sample at the end of the input frame is written in reverse order for the
|
||||
lookahead length. Exported in the prefiltdata structure. */
|
||||
for (k = 0; k < QLOOKAHEAD; k++) {
|
||||
tempin_ch1[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF1_float[k];
|
||||
prefiltdata->INLABUF1_float[k] = in[FRAMESAMPLES - 1 - 2 * k];
|
||||
}
|
||||
|
||||
/* Second Channel. This is exactly like the first channel, except that the
|
||||
even samples are now filtered instead (lower channel). */
|
||||
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
|
||||
CompositeAPFilterState[k] = 0.0;
|
||||
}
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempinoutvec[k] = in[FRAMESAMPLES - 2 - 2 * k];
|
||||
}
|
||||
|
||||
WebRtcIsac_AllPassFilter2Float(
|
||||
tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF,
|
||||
NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempin_ch2[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k];
|
||||
}
|
||||
|
||||
for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) {
|
||||
ForTransform_CompositeAPFilterState2[k] = CompositeAPFilterState[k];
|
||||
}
|
||||
|
||||
WebRtcIsac_AllPassFilter2Float(
|
||||
prefiltdata->INLABUF2_float, WebRtcIsac_kCompositeApFactorsFloat,
|
||||
QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState);
|
||||
|
||||
for (k = 0; k < QLOOKAHEAD; k++) {
|
||||
tempin_ch2[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF2_float[k];
|
||||
prefiltdata->INLABUF2_float[k] = in[FRAMESAMPLES - 2 - 2 * k];
|
||||
}
|
||||
|
||||
/* Transform filter states from backward to forward */
|
||||
/*At this point, each of the states of the backwards composite filters for the
|
||||
two channels are transformed into forward filtering states for the
|
||||
corresponding forward channel filters. Each channel's forward filtering
|
||||
state from the previous
|
||||
encoding iteration is added to the transformed state to get a proper forward
|
||||
state */
|
||||
|
||||
/* So the existing NUMBEROFCOMPOSITEAPSECTIONS x 1 (4x1) state vector is
|
||||
multiplied by a NUMBEROFCHANNELAPSECTIONSxNUMBEROFCOMPOSITEAPSECTIONS (2x4)
|
||||
transform matrix to get the new state that is added to the previous 2x1
|
||||
input state */
|
||||
|
||||
for (k = 0; k < NUMBEROFCHANNELAPSECTIONS; k++) { /* k is row variable */
|
||||
for (n = 0; n < NUMBEROFCOMPOSITEAPSECTIONS;
|
||||
n++) { /* n is column variable */
|
||||
prefiltdata->INSTAT1_float[k] +=
|
||||
ForTransform_CompositeAPFilterState[n] *
|
||||
WebRtcIsac_kTransform1Float[k * NUMBEROFCHANNELAPSECTIONS + n];
|
||||
prefiltdata->INSTAT2_float[k] +=
|
||||
ForTransform_CompositeAPFilterState2[n] *
|
||||
WebRtcIsac_kTransform2Float[k * NUMBEROFCHANNELAPSECTIONS + n];
|
||||
}
|
||||
}
|
||||
|
||||
/*obtain polyphase components by forward all-pass filtering through each
|
||||
* channel */
|
||||
/* the backward filtered samples are now forward filtered with the
|
||||
* corresponding channel filters */
|
||||
/* The all pass filtering automatically updates the filter states which are
|
||||
exported in the prefiltdata structure */
|
||||
WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat,
|
||||
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
|
||||
prefiltdata->INSTAT1_float);
|
||||
WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat,
|
||||
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
|
||||
prefiltdata->INSTAT2_float);
|
||||
|
||||
/* Now Construct low-pass and high-pass signals as combinations of polyphase
|
||||
* components */
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
LP[k] = 0.5f * (tempin_ch1[k] + tempin_ch2[k]); /* low pass signal*/
|
||||
HP[k] = 0.5f * (tempin_ch1[k] - tempin_ch2[k]); /* high pass signal*/
|
||||
}
|
||||
|
||||
/* Lookahead LP and HP signals */
|
||||
/* now create low pass and high pass signals of the input vector. However, no
|
||||
backwards filtering is performed, and hence no phase equalization is
|
||||
involved. Also, the input contains some samples that are lookahead samples.
|
||||
The high pass and low pass signals that are created are used outside this
|
||||
function for analysis (not encoding) purposes */
|
||||
|
||||
/* set up input */
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
tempin_ch1[k] = in[2 * k + 1];
|
||||
tempin_ch2[k] = in[2 * k];
|
||||
}
|
||||
|
||||
/* the input filter states are passed in and updated by the all-pass filtering
|
||||
routine and exported in the prefiltdata structure*/
|
||||
WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat,
|
||||
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
|
||||
prefiltdata->INSTATLA1_float);
|
||||
WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat,
|
||||
FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS,
|
||||
prefiltdata->INSTATLA2_float);
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES_HALF; k++) {
|
||||
LP_la[k] = (float)(0.5f * (tempin_ch1[k] + tempin_ch2[k])); /*low pass */
|
||||
HP_la[k] = (double)(0.5f * (tempin_ch1[k] - tempin_ch2[k])); /* high pass */
|
||||
}
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
|
||||
void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata);
|
||||
void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* state);
|
||||
void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata);
|
||||
|
||||
double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order);
|
||||
|
||||
/* The number of all-pass filter factors in an upper or lower channel*/
|
||||
#define NUMBEROFCHANNELAPSECTIONS 2
|
||||
|
||||
/* The upper channel all-pass filter factors */
|
||||
extern const float WebRtcIsac_kUpperApFactorsFloat[2];
|
||||
|
||||
/* The lower channel all-pass filter factors */
|
||||
extern const float WebRtcIsac_kLowerApFactorsFloat[2];
|
||||
|
||||
void WebRtcIsac_AllPassFilter2Float(float* InOut,
|
||||
const float* APSectionFactors,
|
||||
int lengthInOut,
|
||||
int NumberOfSections,
|
||||
float* FilterState);
|
||||
void WebRtcIsac_SplitAndFilterFloat(float* in,
|
||||
float* LP,
|
||||
float* HP,
|
||||
double* LP_la,
|
||||
double* HP_la,
|
||||
PreFiltBankstr* prefiltdata);
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_
|
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
#if defined(WEBRTC_POSIX)
|
||||
#define WebRtcIsac_lrint lrint
|
||||
#elif (defined(WEBRTC_ARCH_X86) && defined(WIN32))
|
||||
static __inline long int WebRtcIsac_lrint(double x_dbl) {
|
||||
long int x_int;
|
||||
|
||||
__asm {
|
||||
fld x_dbl
|
||||
fistp x_int
|
||||
}
|
||||
;
|
||||
|
||||
return x_int;
|
||||
}
|
||||
#else // Do a slow but correct implementation of lrint
|
||||
|
||||
static __inline long int WebRtcIsac_lrint(double x_dbl) {
|
||||
long int x_int;
|
||||
x_int = (long int)floor(x_dbl + 0.499999999999);
|
||||
return x_int;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_
|
@ -0,0 +1,695 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
#ifdef WEBRTC_ANDROID
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h"
|
||||
#include "rtc_base/system/ignore_warnings.h"
|
||||
|
||||
static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160,
|
||||
0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640};
|
||||
|
||||
/* interpolation filter */
|
||||
__inline static void IntrepolFilter(double *data_ptr, double *intrp)
|
||||
{
|
||||
*intrp = kInterpolWin[0] * data_ptr[-3];
|
||||
*intrp += kInterpolWin[1] * data_ptr[-2];
|
||||
*intrp += kInterpolWin[2] * data_ptr[-1];
|
||||
*intrp += kInterpolWin[3] * data_ptr[0];
|
||||
*intrp += kInterpolWin[4] * data_ptr[1];
|
||||
*intrp += kInterpolWin[5] * data_ptr[2];
|
||||
*intrp += kInterpolWin[6] * data_ptr[3];
|
||||
*intrp += kInterpolWin[7] * data_ptr[4];
|
||||
}
|
||||
|
||||
|
||||
/* 2D parabolic interpolation */
|
||||
/* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */
|
||||
__inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val)
|
||||
{
|
||||
double c, b[2], A[2][2];
|
||||
double t1, t2, d;
|
||||
double delta1, delta2;
|
||||
|
||||
|
||||
// double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}};
|
||||
// should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0
|
||||
|
||||
c = T[1][1];
|
||||
b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]);
|
||||
b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]);
|
||||
A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]);
|
||||
t1 = 0.5 * (T[0][0] + T[2][2]) - c;
|
||||
t2 = 0.5 * (T[2][0] + T[0][2]) - c;
|
||||
d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2;
|
||||
A[0][0] = -t1 - 0.5 * d;
|
||||
A[1][1] = -t2 - 0.5 * d;
|
||||
|
||||
/* deal with singularities or ill-conditioned cases */
|
||||
if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) {
|
||||
*peak_val = T[1][1];
|
||||
return;
|
||||
}
|
||||
|
||||
/* Cholesky decomposition: replace A by upper-triangular factor */
|
||||
A[0][0] = sqrt(A[0][0]);
|
||||
A[0][1] = A[0][1] / A[0][0];
|
||||
A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]);
|
||||
|
||||
/* compute [x; y] = -0.5 * inv(A) * b */
|
||||
t1 = b[0] / A[0][0];
|
||||
t2 = (b[1] - t1 * A[0][1]) / A[1][1];
|
||||
delta2 = t2 / A[1][1];
|
||||
delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0];
|
||||
delta2 *= 0.5;
|
||||
|
||||
/* limit norm */
|
||||
t1 = delta1 * delta1 + delta2 * delta2;
|
||||
if (t1 > 1.0) {
|
||||
delta1 /= t1;
|
||||
delta2 /= t1;
|
||||
}
|
||||
|
||||
*peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c;
|
||||
|
||||
*x += delta1;
|
||||
*y += delta2;
|
||||
}
|
||||
|
||||
|
||||
static void PCorr(const double *in, double *outcorr)
|
||||
{
|
||||
double sum, ysum, prod;
|
||||
const double *x, *inptr;
|
||||
int k, n;
|
||||
|
||||
//ysum = 1e-6; /* use this with float (i.s.o. double)! */
|
||||
ysum = 1e-13;
|
||||
sum = 0.0;
|
||||
x = in + PITCH_MAX_LAG/2 + 2;
|
||||
for (n = 0; n < PITCH_CORR_LEN2; n++) {
|
||||
ysum += in[n] * in[n];
|
||||
sum += x[n] * in[n];
|
||||
}
|
||||
|
||||
outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */
|
||||
*outcorr = sum / sqrt(ysum);
|
||||
|
||||
for (k = 1; k < PITCH_LAG_SPAN2; k++) {
|
||||
ysum -= in[k-1] * in[k-1];
|
||||
ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1];
|
||||
sum = 0.0;
|
||||
inptr = &in[k];
|
||||
prod = x[0] * inptr[0];
|
||||
for (n = 1; n < PITCH_CORR_LEN2; n++) {
|
||||
sum += prod;
|
||||
prod = x[n] * inptr[n];
|
||||
}
|
||||
sum += prod;
|
||||
outcorr--;
|
||||
*outcorr = sum / sqrt(ysum);
|
||||
}
|
||||
}
|
||||
|
||||
static void WebRtcIsac_AllpassFilterForDec(double* InOut,
|
||||
const double* APSectionFactors,
|
||||
size_t lengthInOut,
|
||||
double* FilterState) {
|
||||
// This performs all-pass filtering--a series of first order all-pass
|
||||
// sections are used to filter the input in a cascade manner.
|
||||
size_t n, j;
|
||||
double temp;
|
||||
for (j = 0; j < ALLPASSSECTIONS; j++) {
|
||||
for (n = 0; n < lengthInOut; n += 2) {
|
||||
temp = InOut[n]; // store input
|
||||
InOut[n] = FilterState[j] + APSectionFactors[j] * temp;
|
||||
FilterState[j] = -APSectionFactors[j] * InOut[n] + temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void WebRtcIsac_DecimateAllpass(
|
||||
const double* in,
|
||||
double* state_in, // array of size: 2*ALLPASSSECTIONS+1
|
||||
size_t N, // number of input samples
|
||||
double* out) { // array of size N/2
|
||||
|
||||
static const double APupper[ALLPASSSECTIONS] = {0.0347, 0.3826};
|
||||
static const double APlower[ALLPASSSECTIONS] = {0.1544, 0.744};
|
||||
|
||||
size_t n;
|
||||
double data_vec[PITCH_FRAME_LEN];
|
||||
|
||||
/* copy input */
|
||||
memcpy(data_vec + 1, in, sizeof(double) * (N - 1));
|
||||
|
||||
data_vec[0] = state_in[2 * ALLPASSSECTIONS]; // the z^(-1) state
|
||||
state_in[2 * ALLPASSSECTIONS] = in[N - 1];
|
||||
|
||||
WebRtcIsac_AllpassFilterForDec(data_vec + 1, APupper, N, state_in);
|
||||
WebRtcIsac_AllpassFilterForDec(data_vec, APlower, N,
|
||||
state_in + ALLPASSSECTIONS);
|
||||
|
||||
for (n = 0; n < N / 2; n++)
|
||||
out[n] = data_vec[2 * n] + data_vec[2 * n + 1];
|
||||
}
|
||||
|
||||
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
|
||||
|
||||
static void WebRtcIsac_InitializePitch(const double* in,
|
||||
const double old_lag,
|
||||
const double old_gain,
|
||||
PitchAnalysisStruct* State,
|
||||
double* lags) {
|
||||
double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2];
|
||||
double ratio, log_lag, gain_bias;
|
||||
double bias;
|
||||
double corrvec1[PITCH_LAG_SPAN2];
|
||||
double corrvec2[PITCH_LAG_SPAN2];
|
||||
int m, k;
|
||||
// Allocating 10 extra entries at the begining of the CorrSurf
|
||||
double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)];
|
||||
double* CorrSurf[2*PITCH_BW+3];
|
||||
double *CorrSurfPtr1, *CorrSurfPtr2;
|
||||
double LagWin[3] = {0.2, 0.5, 0.98};
|
||||
int ind1, ind2, peaks_ind, peak, max_ind;
|
||||
int peaks[PITCH_MAX_NUM_PEAKS];
|
||||
double adj, gain_tmp;
|
||||
double corr, corr_max;
|
||||
double intrp_a, intrp_b, intrp_c, intrp_d;
|
||||
double peak_vals[PITCH_MAX_NUM_PEAKS];
|
||||
double lags1[PITCH_MAX_NUM_PEAKS];
|
||||
double lags2[PITCH_MAX_NUM_PEAKS];
|
||||
double T[3][3];
|
||||
int row;
|
||||
|
||||
for(k = 0; k < 2*PITCH_BW+3; k++)
|
||||
{
|
||||
CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)];
|
||||
}
|
||||
/* reset CorrSurf matrix */
|
||||
memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4)));
|
||||
|
||||
//warnings -DH
|
||||
max_ind = 0;
|
||||
peak = 0;
|
||||
|
||||
/* copy old values from state buffer */
|
||||
memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
|
||||
|
||||
/* decimation; put result after the old values */
|
||||
WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN,
|
||||
&buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]);
|
||||
|
||||
/* low-pass filtering */
|
||||
for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++)
|
||||
buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2];
|
||||
|
||||
/* copy end part back into state buffer */
|
||||
memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
|
||||
|
||||
/* compute correlation for first and second half of the frame */
|
||||
PCorr(buf_dec, corrvec1);
|
||||
PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2);
|
||||
|
||||
/* bias towards pitch lag of previous frame */
|
||||
log_lag = log(0.5 * old_lag);
|
||||
gain_bias = 4.0 * old_gain * old_gain;
|
||||
if (gain_bias > 0.8) gain_bias = 0.8;
|
||||
for (k = 0; k < PITCH_LAG_SPAN2; k++)
|
||||
{
|
||||
ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag;
|
||||
bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio);
|
||||
corrvec1[k] *= bias;
|
||||
}
|
||||
|
||||
/* taper correlation functions */
|
||||
for (k = 0; k < 3; k++) {
|
||||
gain_tmp = LagWin[k];
|
||||
corrvec1[k] *= gain_tmp;
|
||||
corrvec2[k] *= gain_tmp;
|
||||
corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
|
||||
corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
|
||||
}
|
||||
|
||||
corr_max = 0.0;
|
||||
/* fill middle row of correlation surface */
|
||||
ind1 = 0;
|
||||
ind2 = 0;
|
||||
CorrSurfPtr1 = &CorrSurf[PITCH_BW][2];
|
||||
for (k = 0; k < PITCH_LAG_SPAN2; k++) {
|
||||
corr = corrvec1[ind1++] + corrvec2[ind2++];
|
||||
CorrSurfPtr1[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
}
|
||||
/* fill first and last rows of correlation surface */
|
||||
ind1 = 0;
|
||||
ind2 = PITCH_BW;
|
||||
CorrSurfPtr1 = &CorrSurf[0][2];
|
||||
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2];
|
||||
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) {
|
||||
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
|
||||
adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
|
||||
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
|
||||
CorrSurfPtr1[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
|
||||
CorrSurfPtr2[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
}
|
||||
/* fill second and next to last rows of correlation surface */
|
||||
ind1 = 0;
|
||||
ind2 = PITCH_BW-1;
|
||||
CorrSurfPtr1 = &CorrSurf[1][2];
|
||||
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1];
|
||||
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) {
|
||||
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
|
||||
adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
|
||||
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
|
||||
CorrSurfPtr1[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
|
||||
CorrSurfPtr2[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
}
|
||||
/* fill remainder of correlation surface */
|
||||
for (m = 2; m < PITCH_BW; m++) {
|
||||
ind1 = 0;
|
||||
ind2 = PITCH_BW - m; /* always larger than ind1 */
|
||||
CorrSurfPtr1 = &CorrSurf[m][2];
|
||||
CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m];
|
||||
for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) {
|
||||
ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
|
||||
adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
|
||||
corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
|
||||
CorrSurfPtr1[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
|
||||
CorrSurfPtr2[k] = corr;
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr; /* update maximum */
|
||||
max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* threshold value to qualify as a peak */
|
||||
corr_max *= 0.6;
|
||||
|
||||
peaks_ind = 0;
|
||||
/* find peaks */
|
||||
for (m = 1; m < PITCH_BW+1; m++) {
|
||||
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
|
||||
CorrSurfPtr1 = &CorrSurf[m][2];
|
||||
for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) {
|
||||
corr = CorrSurfPtr1[k];
|
||||
if (corr > corr_max) {
|
||||
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
|
||||
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
|
||||
/* found a peak; store index into matrix */
|
||||
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) {
|
||||
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
|
||||
CorrSurfPtr1 = &CorrSurf[m][2];
|
||||
for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) {
|
||||
corr = CorrSurfPtr1[k];
|
||||
if (corr > corr_max) {
|
||||
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
|
||||
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
|
||||
/* found a peak; store index into matrix */
|
||||
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
|
||||
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (peaks_ind > 0) {
|
||||
/* examine each peak */
|
||||
CorrSurfPtr1 = &CorrSurf[0][0];
|
||||
for (k = 0; k < peaks_ind; k++) {
|
||||
peak = peaks[k];
|
||||
|
||||
/* compute four interpolated values around current peak */
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d);
|
||||
|
||||
/* determine maximum of the interpolated values */
|
||||
corr = CorrSurfPtr1[peak];
|
||||
corr_max = intrp_a;
|
||||
if (intrp_b > corr_max) corr_max = intrp_b;
|
||||
if (intrp_c > corr_max) corr_max = intrp_c;
|
||||
if (intrp_d > corr_max) corr_max = intrp_d;
|
||||
|
||||
/* determine where the peak sits and fill a 3x3 matrix around it */
|
||||
row = peak / (PITCH_LAG_SPAN2+4);
|
||||
lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
|
||||
lags2[k] = (double) (lags1[k] + PITCH_BW - row);
|
||||
if ( corr > corr_max ) {
|
||||
T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
|
||||
T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
|
||||
T[1][1] = corr;
|
||||
T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
|
||||
T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
|
||||
T[1][0] = intrp_a;
|
||||
T[0][1] = intrp_b;
|
||||
T[2][1] = intrp_c;
|
||||
T[1][2] = intrp_d;
|
||||
} else {
|
||||
if (intrp_a == corr_max) {
|
||||
lags1[k] -= 0.5;
|
||||
lags2[k] += 0.5;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]);
|
||||
T[1][1] = intrp_a;
|
||||
T[0][2] = intrp_b;
|
||||
T[2][2] = intrp_c;
|
||||
T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)];
|
||||
T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
|
||||
T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
|
||||
T[1][2] = corr;
|
||||
} else if (intrp_b == corr_max) {
|
||||
lags1[k] -= 0.5;
|
||||
lags2[k] -= 0.5;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]);
|
||||
T[2][0] = intrp_a;
|
||||
T[1][1] = intrp_b;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]);
|
||||
T[2][2] = intrp_d;
|
||||
T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
|
||||
T[0][1] = CorrSurfPtr1[peak - 1];
|
||||
T[2][1] = corr;
|
||||
T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
|
||||
} else if (intrp_c == corr_max) {
|
||||
lags1[k] += 0.5;
|
||||
lags2[k] += 0.5;
|
||||
T[0][0] = intrp_a;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]);
|
||||
T[1][1] = intrp_c;
|
||||
T[0][2] = intrp_d;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]);
|
||||
T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
|
||||
T[0][1] = corr;
|
||||
T[2][1] = CorrSurfPtr1[peak + 1];
|
||||
T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
|
||||
} else {
|
||||
lags1[k] += 0.5;
|
||||
lags2[k] -= 0.5;
|
||||
T[0][0] = intrp_b;
|
||||
T[2][0] = intrp_c;
|
||||
T[1][1] = intrp_d;
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]);
|
||||
IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]);
|
||||
T[1][0] = corr;
|
||||
T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
|
||||
T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
|
||||
T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)];
|
||||
}
|
||||
}
|
||||
|
||||
/* 2D parabolic interpolation gives more accurate lags and peak value */
|
||||
Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]);
|
||||
}
|
||||
|
||||
/* determine the highest peak, after applying a bias towards short lags */
|
||||
corr_max = 0.0;
|
||||
for (k = 0; k < peaks_ind; k++) {
|
||||
corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k]));
|
||||
if (corr > corr_max) {
|
||||
corr_max = corr;
|
||||
peak = k;
|
||||
}
|
||||
}
|
||||
|
||||
lags1[peak] *= 2.0;
|
||||
lags2[peak] *= 2.0;
|
||||
|
||||
if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG;
|
||||
if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG;
|
||||
if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG;
|
||||
if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG;
|
||||
|
||||
/* store lags of highest peak in output array */
|
||||
lags[0] = lags1[peak];
|
||||
lags[1] = lags1[peak];
|
||||
lags[2] = lags2[peak];
|
||||
lags[3] = lags2[peak];
|
||||
}
|
||||
else
|
||||
{
|
||||
row = max_ind / (PITCH_LAG_SPAN2+4);
|
||||
lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
|
||||
lags2[0] = (double) (lags1[0] + PITCH_BW - row);
|
||||
|
||||
if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG;
|
||||
if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG;
|
||||
if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG;
|
||||
if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG;
|
||||
|
||||
/* store lags of highest peak in output array */
|
||||
lags[0] = lags1[0];
|
||||
lags[1] = lags1[0];
|
||||
lags[2] = lags2[0];
|
||||
lags[3] = lags2[0];
|
||||
}
|
||||
}
|
||||
|
||||
RTC_POP_IGNORING_WFRAME_LARGER_THAN()
|
||||
|
||||
/* create weighting matrix by orthogonalizing a basis of polynomials of increasing order
|
||||
* t = (0:4)';
|
||||
* A = [t.^0, t.^1, t.^2, t.^3, t.^4];
|
||||
* [Q, dummy] = qr(A);
|
||||
* P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
|
||||
static const double kWeight[5][5] = {
|
||||
{ 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286},
|
||||
{-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857},
|
||||
{-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286},
|
||||
{ 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143},
|
||||
{ 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286}
|
||||
};
|
||||
|
||||
/* second order high-pass filter */
|
||||
static void WebRtcIsac_Highpass(const double* in,
|
||||
double* out,
|
||||
double* state,
|
||||
size_t N) {
|
||||
/* create high-pass filter ocefficients
|
||||
* z = 0.998 * exp(j*2*pi*35/8000);
|
||||
* p = 0.94 * exp(j*2*pi*140/8000);
|
||||
* HP_b = [1, -2*real(z), abs(z)^2];
|
||||
* HP_a = [1, -2*real(p), abs(p)^2]; */
|
||||
static const double a_coef[2] = { 1.86864659625574, -0.88360000000000};
|
||||
static const double b_coef[2] = {-1.99524591718270, 0.99600400000000};
|
||||
|
||||
size_t k;
|
||||
|
||||
for (k=0; k<N; k++) {
|
||||
*out = *in + state[1];
|
||||
state[1] = state[0] + b_coef[0] * *in + a_coef[0] * *out;
|
||||
state[0] = b_coef[1] * *in++ + a_coef[1] * *out++;
|
||||
}
|
||||
}
|
||||
|
||||
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
|
||||
|
||||
void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */
|
||||
double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
|
||||
PitchAnalysisStruct *State,
|
||||
double *lags,
|
||||
double *gains)
|
||||
{
|
||||
double HPin[PITCH_FRAME_LEN];
|
||||
double Weighted[PITCH_FRAME_LEN];
|
||||
double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD];
|
||||
double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD];
|
||||
double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead
|
||||
double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD];
|
||||
double old_lag, old_gain;
|
||||
double nrg_wht, tmp;
|
||||
double Wnrg, Wfluct, Wgain;
|
||||
double H[4][4];
|
||||
double grad[4];
|
||||
double dG[4];
|
||||
int k, m, n, iter;
|
||||
|
||||
/* high pass filtering using second order pole-zero filter */
|
||||
WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN);
|
||||
|
||||
/* copy from state into buffer */
|
||||
memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD);
|
||||
|
||||
/* compute weighted and whitened signals */
|
||||
WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr));
|
||||
|
||||
/* copy from buffer into state */
|
||||
memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD);
|
||||
|
||||
old_lag = State->PFstr_wght.oldlagp[0];
|
||||
old_gain = State->PFstr_wght.oldgainp[0];
|
||||
|
||||
/* inital pitch estimate */
|
||||
WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags);
|
||||
|
||||
|
||||
/* Iterative optimization of lags - to be done */
|
||||
|
||||
/* compute energy of whitened signal */
|
||||
nrg_wht = 0.0;
|
||||
for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++)
|
||||
nrg_wht += Whitened[k] * Whitened[k];
|
||||
|
||||
|
||||
/* Iterative optimization of gains */
|
||||
|
||||
/* set weights for energy, gain fluctiation, and spectral gain penalty functions */
|
||||
Wnrg = 1.0 / nrg_wht;
|
||||
Wgain = 0.005;
|
||||
Wfluct = 3.0;
|
||||
|
||||
/* set initial gains */
|
||||
for (k = 0; k < 4; k++)
|
||||
gains[k] = PITCH_MAX_GAIN_06;
|
||||
|
||||
/* two iterations should be enough */
|
||||
for (iter = 0; iter < 2; iter++) {
|
||||
/* compute Jacobian of pre-filter output towards gains */
|
||||
WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains);
|
||||
|
||||
/* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */
|
||||
for (k = 0; k < 4; k++) {
|
||||
tmp = 0.0;
|
||||
for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
|
||||
tmp += out_G[n] * out_dG[k][n];
|
||||
grad[k] = tmp * Wnrg;
|
||||
}
|
||||
for (k = 0; k < 4; k++) {
|
||||
for (m = 0; m <= k; m++) {
|
||||
tmp = 0.0;
|
||||
for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
|
||||
tmp += out_dG[m][n] * out_dG[k][n];
|
||||
H[k][m] = tmp * Wnrg;
|
||||
}
|
||||
}
|
||||
|
||||
/* add gradient and Hessian (lower triangle) for dampening fast gain changes */
|
||||
for (k = 0; k < 4; k++) {
|
||||
tmp = kWeight[k+1][0] * old_gain;
|
||||
for (m = 0; m < 4; m++)
|
||||
tmp += kWeight[k+1][m+1] * gains[m];
|
||||
grad[k] += tmp * Wfluct;
|
||||
}
|
||||
for (k = 0; k < 4; k++) {
|
||||
for (m = 0; m <= k; m++) {
|
||||
H[k][m] += kWeight[k+1][m+1] * Wfluct;
|
||||
}
|
||||
}
|
||||
|
||||
/* add gradient and Hessian for dampening gain */
|
||||
for (k = 0; k < 3; k++) {
|
||||
tmp = 1.0 / (1 - gains[k]);
|
||||
grad[k] += tmp * tmp * Wgain;
|
||||
H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain);
|
||||
}
|
||||
tmp = 1.0 / (1 - gains[3]);
|
||||
grad[3] += 1.33 * (tmp * tmp * Wgain);
|
||||
H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain);
|
||||
|
||||
|
||||
/* compute Cholesky factorization of Hessian
|
||||
* by overwritting the upper triangle; scale factors on diagonal
|
||||
* (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */
|
||||
H[0][1] = H[1][0] / H[0][0];
|
||||
H[0][2] = H[2][0] / H[0][0];
|
||||
H[0][3] = H[3][0] / H[0][0];
|
||||
H[1][1] -= H[0][0] * H[0][1] * H[0][1];
|
||||
H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1];
|
||||
H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1];
|
||||
H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2];
|
||||
H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2];
|
||||
H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3];
|
||||
|
||||
/* Compute update as delta_gains = -inv(H) * grad */
|
||||
/* copy and negate */
|
||||
for (k = 0; k < 4; k++)
|
||||
dG[k] = -grad[k];
|
||||
/* back substitution */
|
||||
dG[1] -= dG[0] * H[0][1];
|
||||
dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2];
|
||||
dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3];
|
||||
/* scale */
|
||||
for (k = 0; k < 4; k++)
|
||||
dG[k] /= H[k][k];
|
||||
/* back substitution */
|
||||
dG[2] -= dG[3] * H[2][3];
|
||||
dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2];
|
||||
dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1];
|
||||
|
||||
/* update gains and check range */
|
||||
for (k = 0; k < 4; k++) {
|
||||
gains[k] += dG[k];
|
||||
if (gains[k] > PITCH_MAX_GAIN)
|
||||
gains[k] = PITCH_MAX_GAIN;
|
||||
else if (gains[k] < 0.0)
|
||||
gains[k] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/* update state for next frame */
|
||||
WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains);
|
||||
|
||||
/* concatenate previous input's end and current input */
|
||||
memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD);
|
||||
memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN);
|
||||
|
||||
/* lookahead pitch filtering for masking analysis */
|
||||
WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains);
|
||||
|
||||
/* store last part of input */
|
||||
for (k = 0; k < QLOOKAHEAD; k++)
|
||||
State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN];
|
||||
}
|
||||
|
||||
RTC_POP_IGNORING_WFRAME_LARGER_THAN()
|
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* pitch_estimator.h
|
||||
*
|
||||
* Pitch functions
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
|
||||
void WebRtcIsac_PitchAnalysis(
|
||||
const double* in, /* PITCH_FRAME_LEN samples */
|
||||
double* out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
|
||||
PitchAnalysisStruct* State,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ */
|
@ -0,0 +1,388 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <memory.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h"
|
||||
#include "rtc_base/compile_assert_c.h"
|
||||
|
||||
/*
|
||||
* We are implementing the following filters;
|
||||
*
|
||||
* Pre-filtering:
|
||||
* y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
|
||||
*
|
||||
* Post-filtering:
|
||||
* y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
|
||||
*
|
||||
* Note that `lag` is a floating number so we perform an interpolation to
|
||||
* obtain the correct `lag`.
|
||||
*
|
||||
*/
|
||||
|
||||
static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25,
|
||||
-0.07};
|
||||
|
||||
/* interpolation coefficients; generated by design_pitch_filter.m */
|
||||
static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
|
||||
{-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125,
|
||||
0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109,
|
||||
0.01754159521746},
|
||||
{-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643,
|
||||
0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084,
|
||||
0.01654127246314},
|
||||
{-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787,
|
||||
0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311,
|
||||
0.01295781500709},
|
||||
{-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393,
|
||||
0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166,
|
||||
0.00719783432422},
|
||||
{-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001,
|
||||
0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000,
|
||||
-0.00000000000000},
|
||||
{0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562,
|
||||
0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377,
|
||||
-0.00764851320885},
|
||||
{0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650,
|
||||
0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059,
|
||||
-0.01463300534216},
|
||||
{0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190,
|
||||
0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865,
|
||||
-0.01985640750433}
|
||||
};
|
||||
|
||||
/*
|
||||
* Enumerating the operation of the filter.
|
||||
* iSAC has 4 different pitch-filter which are very similar in their structure.
|
||||
*
|
||||
* kPitchFilterPre : In this mode the filter is operating as pitch
|
||||
* pre-filter. This is used at the encoder.
|
||||
* kPitchFilterPost : In this mode the filter is operating as pitch
|
||||
* post-filter. This is the inverse of pre-filter and used
|
||||
* in the decoder.
|
||||
* kPitchFilterPreLa : This is, in structure, similar to pre-filtering but
|
||||
* utilizing 3 millisecond lookahead. It is used to
|
||||
* obtain the signal for LPC analysis.
|
||||
* kPitchFilterPreGain : This is, in structure, similar to pre-filtering but
|
||||
* differential changes in gain is considered. This is
|
||||
* used to find the optimal gain.
|
||||
*/
|
||||
typedef enum {
|
||||
kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain
|
||||
} PitchFilterOperation;
|
||||
|
||||
/*
|
||||
* Structure with parameters used for pitch-filtering.
|
||||
* buffer : a buffer where the sum of previous inputs and outputs
|
||||
* are stored.
|
||||
* damper_state : the state of the damping filter. The filter is defined by
|
||||
* `kDampFilter`.
|
||||
* interpol_coeff : pointer to a set of coefficient which are used to utilize
|
||||
* fractional pitch by interpolation.
|
||||
* gain : pitch-gain to be applied to the current segment of input.
|
||||
* lag : pitch-lag for the current segment of input.
|
||||
* lag_offset : the offset of lag w.r.t. current sample.
|
||||
* sub_frame : sub-frame index, there are 4 pitch sub-frames in an iSAC
|
||||
* frame.
|
||||
* This specifies the usage of the filter. See
|
||||
* 'PitchFilterOperation' for operational modes.
|
||||
* num_samples : number of samples to be processed in each segment.
|
||||
* index : index of the input and output sample.
|
||||
* damper_state_dg : state of damping filter for different trial gains.
|
||||
* gain_mult : differential changes to gain.
|
||||
*/
|
||||
typedef struct {
|
||||
double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD];
|
||||
double damper_state[PITCH_DAMPORDER];
|
||||
const double *interpol_coeff;
|
||||
double gain;
|
||||
double lag;
|
||||
int lag_offset;
|
||||
|
||||
int sub_frame;
|
||||
PitchFilterOperation mode;
|
||||
int num_samples;
|
||||
int index;
|
||||
|
||||
double damper_state_dg[4][PITCH_DAMPORDER];
|
||||
double gain_mult[4];
|
||||
} PitchFilterParam;
|
||||
|
||||
/**********************************************************************
|
||||
* FilterSegment()
|
||||
* Filter one segment, a quarter of a frame.
|
||||
*
|
||||
* Inputs
|
||||
* in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
|
||||
* filter_param : pitch filter parameters.
|
||||
*
|
||||
* Outputs
|
||||
* out_data : pointer to a buffer where the filtered signal is written to.
|
||||
* out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
|
||||
* where the output of different gain values (differential
|
||||
* change to gain) is written.
|
||||
*/
|
||||
static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
|
||||
double* out_data,
|
||||
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
|
||||
int n;
|
||||
int m;
|
||||
int j;
|
||||
double sum;
|
||||
double sum2;
|
||||
/* Index of `parameters->buffer` where the output is written to. */
|
||||
int pos = parameters->index + PITCH_BUFFSIZE;
|
||||
/* Index of `parameters->buffer` where samples are read for fractional-lag
|
||||
* computation. */
|
||||
int pos_lag = pos - parameters->lag_offset;
|
||||
|
||||
for (n = 0; n < parameters->num_samples; ++n) {
|
||||
/* Shift low pass filter states. */
|
||||
for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
|
||||
parameters->damper_state[m] = parameters->damper_state[m - 1];
|
||||
}
|
||||
/* Filter to get fractional pitch. */
|
||||
sum = 0.0;
|
||||
for (m = 0; m < PITCH_FRACORDER; ++m) {
|
||||
sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m];
|
||||
}
|
||||
/* Multiply with gain. */
|
||||
parameters->damper_state[0] = parameters->gain * sum;
|
||||
|
||||
if (parameters->mode == kPitchFilterPreGain) {
|
||||
int lag_index = parameters->index - parameters->lag_offset;
|
||||
int m_tmp = (lag_index < 0) ? -lag_index : 0;
|
||||
/* Update the damper state for the new sample. */
|
||||
for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
parameters->damper_state_dg[j][m] =
|
||||
parameters->damper_state_dg[j][m - 1];
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < parameters->sub_frame + 1; ++j) {
|
||||
/* Filter for fractional pitch. */
|
||||
sum2 = 0.0;
|
||||
for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) {
|
||||
/* `lag_index + m` is always larger than or equal to zero, see how
|
||||
* m_tmp is computed. This is equivalent to assume samples outside
|
||||
* `out_dg[j]` are zero. */
|
||||
sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
|
||||
}
|
||||
/* Add the contribution of differential gain change. */
|
||||
parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum +
|
||||
parameters->gain * sum2;
|
||||
}
|
||||
|
||||
/* Filter with damping filter, and store the results. */
|
||||
for (j = 0; j < parameters->sub_frame + 1; ++j) {
|
||||
sum = 0.0;
|
||||
for (m = 0; m < PITCH_DAMPORDER; ++m) {
|
||||
sum -= parameters->damper_state_dg[j][m] * kDampFilter[m];
|
||||
}
|
||||
out_dg[j][parameters->index] = sum;
|
||||
}
|
||||
}
|
||||
/* Filter with damping filter. */
|
||||
sum = 0.0;
|
||||
for (m = 0; m < PITCH_DAMPORDER; ++m) {
|
||||
sum += parameters->damper_state[m] * kDampFilter[m];
|
||||
}
|
||||
|
||||
/* Subtract from input and update buffer. */
|
||||
out_data[parameters->index] = in_data[parameters->index] - sum;
|
||||
parameters->buffer[pos] = in_data[parameters->index] +
|
||||
out_data[parameters->index];
|
||||
|
||||
++parameters->index;
|
||||
++pos;
|
||||
++pos_lag;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Update filter parameters based on the pitch-gains and pitch-lags. */
|
||||
static void Update(PitchFilterParam* parameters) {
|
||||
double fraction;
|
||||
int fraction_index;
|
||||
/* Compute integer lag-offset. */
|
||||
parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY +
|
||||
0.5);
|
||||
/* Find correct set of coefficients for computing fractional pitch. */
|
||||
fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY);
|
||||
fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5);
|
||||
parameters->interpol_coeff = kIntrpCoef[fraction_index];
|
||||
|
||||
if (parameters->mode == kPitchFilterPreGain) {
|
||||
/* If in this mode make a differential change to pitch gain. */
|
||||
parameters->gain_mult[parameters->sub_frame] += 0.2;
|
||||
if (parameters->gain_mult[parameters->sub_frame] > 1.0) {
|
||||
parameters->gain_mult[parameters->sub_frame] = 1.0;
|
||||
}
|
||||
if (parameters->sub_frame > 0) {
|
||||
parameters->gain_mult[parameters->sub_frame - 1] -= 0.2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* FilterFrame()
|
||||
* Filter a frame of 30 millisecond, given pitch-lags and pitch-gains.
|
||||
*
|
||||
* Inputs
|
||||
* in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
|
||||
* lags : pointer to pitch-lags, 4 lags per frame.
|
||||
* gains : pointer to pitch-gians, 4 gains per frame.
|
||||
* mode : defining the functionality of the filter. It takes the
|
||||
* following values.
|
||||
* kPitchFilterPre: Pitch pre-filter, used at encoder.
|
||||
* kPitchFilterPost: Pitch post-filter, used at decoder.
|
||||
* kPitchFilterPreLa: Pitch pre-filter with lookahead.
|
||||
* kPitchFilterPreGain: Pitch pre-filter used to otain optimal
|
||||
* pitch-gains.
|
||||
*
|
||||
* Outputs
|
||||
* out_data : pointer to a buffer where the filtered signal is written to.
|
||||
* out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
|
||||
* where the output of different gain values (differential
|
||||
* change to gain) is written.
|
||||
*/
|
||||
static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
|
||||
double* lags, double* gains, PitchFilterOperation mode,
|
||||
double* out_data,
|
||||
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
|
||||
PitchFilterParam filter_parameters;
|
||||
double gain_delta, lag_delta;
|
||||
double old_lag, old_gain;
|
||||
int n;
|
||||
int m;
|
||||
const double kEnhancer = 1.3;
|
||||
|
||||
/* Set up buffer and states. */
|
||||
filter_parameters.index = 0;
|
||||
filter_parameters.lag_offset = 0;
|
||||
filter_parameters.mode = mode;
|
||||
/* Copy states to local variables. */
|
||||
memcpy(filter_parameters.buffer, filter_state->ubuf,
|
||||
sizeof(filter_state->ubuf));
|
||||
RTC_COMPILE_ASSERT(sizeof(filter_parameters.buffer) >=
|
||||
sizeof(filter_state->ubuf));
|
||||
memset(filter_parameters.buffer +
|
||||
sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]),
|
||||
0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf));
|
||||
memcpy(filter_parameters.damper_state, filter_state->ystate,
|
||||
sizeof(filter_state->ystate));
|
||||
|
||||
if (mode == kPitchFilterPreGain) {
|
||||
/* Clear buffers. */
|
||||
memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult));
|
||||
memset(filter_parameters.damper_state_dg, 0,
|
||||
sizeof(filter_parameters.damper_state_dg));
|
||||
for (n = 0; n < PITCH_SUBFRAMES; ++n) {
|
||||
//memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD));
|
||||
memset(out_dg[n], 0, sizeof(out_dg[n]));
|
||||
}
|
||||
} else if (mode == kPitchFilterPost) {
|
||||
/* Make output more periodic. Negative sign is to change the structure
|
||||
* of the filter. */
|
||||
for (n = 0; n < PITCH_SUBFRAMES; ++n) {
|
||||
gains[n] *= -kEnhancer;
|
||||
}
|
||||
}
|
||||
|
||||
old_lag = *filter_state->oldlagp;
|
||||
old_gain = *filter_state->oldgainp;
|
||||
|
||||
/* No interpolation if pitch lag step is big. */
|
||||
if ((lags[0] > (PITCH_UPSTEP * old_lag)) ||
|
||||
(lags[0] < (PITCH_DOWNSTEP * old_lag))) {
|
||||
old_lag = lags[0];
|
||||
old_gain = gains[0];
|
||||
|
||||
if (mode == kPitchFilterPreGain) {
|
||||
filter_parameters.gain_mult[0] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
filter_parameters.num_samples = PITCH_UPDATE;
|
||||
for (m = 0; m < PITCH_SUBFRAMES; ++m) {
|
||||
/* Set the sub-frame value. */
|
||||
filter_parameters.sub_frame = m;
|
||||
/* Calculate interpolation steps for pitch-lag and pitch-gain. */
|
||||
lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME;
|
||||
filter_parameters.lag = old_lag;
|
||||
gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME;
|
||||
filter_parameters.gain = old_gain;
|
||||
/* Store for the next sub-frame. */
|
||||
old_lag = lags[m];
|
||||
old_gain = gains[m];
|
||||
|
||||
for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) {
|
||||
/* Step-wise interpolation of pitch gains and lags. As pitch-lag changes,
|
||||
* some parameters of filter need to be update. */
|
||||
filter_parameters.gain += gain_delta;
|
||||
filter_parameters.lag += lag_delta;
|
||||
/* Update parameters according to new lag value. */
|
||||
Update(&filter_parameters);
|
||||
/* Filter a segment of input. */
|
||||
FilterSegment(in_data, &filter_parameters, out_data, out_dg);
|
||||
}
|
||||
}
|
||||
|
||||
if (mode != kPitchFilterPreGain) {
|
||||
/* Export buffer and states. */
|
||||
memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN],
|
||||
sizeof(filter_state->ubuf));
|
||||
memcpy(filter_state->ystate, filter_parameters.damper_state,
|
||||
sizeof(filter_state->ystate));
|
||||
|
||||
/* Store for the next frame. */
|
||||
*filter_state->oldlagp = old_lag;
|
||||
*filter_state->oldgainp = old_gain;
|
||||
}
|
||||
|
||||
if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) {
|
||||
/* Filter the lookahead segment, this is treated as the last sub-frame. So
|
||||
* set `pf_param` to last sub-frame. */
|
||||
filter_parameters.sub_frame = PITCH_SUBFRAMES - 1;
|
||||
filter_parameters.num_samples = QLOOKAHEAD;
|
||||
FilterSegment(in_data, &filter_parameters, out_data, out_dg);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data,
|
||||
PitchFiltstr* pf_state, double* lags,
|
||||
double* gains) {
|
||||
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL);
|
||||
}
|
||||
|
||||
void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data,
|
||||
PitchFiltstr* pf_state, double* lags,
|
||||
double* gains) {
|
||||
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data,
|
||||
NULL);
|
||||
}
|
||||
|
||||
void WebRtcIsac_PitchfilterPre_gains(
|
||||
double* in_data, double* out_data,
|
||||
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state,
|
||||
double* lags, double* gains) {
|
||||
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data,
|
||||
out_dg);
|
||||
}
|
||||
|
||||
void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data,
|
||||
PitchFiltstr* pf_state, double* lags,
|
||||
double* gains) {
|
||||
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL);
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
|
||||
void WebRtcIsac_PitchfilterPre(double* indat,
|
||||
double* outdat,
|
||||
PitchFiltstr* pfp,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
void WebRtcIsac_PitchfilterPost(double* indat,
|
||||
double* outdat,
|
||||
PitchFiltstr* pfp,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
void WebRtcIsac_PitchfilterPre_la(double* indat,
|
||||
double* outdat,
|
||||
PitchFiltstr* pfp,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
void WebRtcIsac_PitchfilterPre_gains(
|
||||
double* indat,
|
||||
double* outdat,
|
||||
double out_dG[][PITCH_FRAME_LEN + QLOOKAHEAD],
|
||||
PitchFiltstr* pfp,
|
||||
double* lags,
|
||||
double* gains);
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_
|
@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* settings.h
|
||||
*
|
||||
* Declaration of #defines used in the iSAC codec
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_
|
||||
|
||||
/* sampling frequency (Hz) */
|
||||
#define FS 16000
|
||||
|
||||
/* number of samples per frame (either 320 (20ms), 480 (30ms) or 960 (60ms)) */
|
||||
#define INITIAL_FRAMESAMPLES 960
|
||||
|
||||
/* do not modify the following; this will have to be modified if we
|
||||
* have a 20ms framesize option */
|
||||
/**********************************************************************/
|
||||
/* miliseconds */
|
||||
#define FRAMESIZE 30
|
||||
/* number of samples per frame processed in the encoder, 480 */
|
||||
#define FRAMESAMPLES 480 /* ((FRAMESIZE*FS)/1000) */
|
||||
#define FRAMESAMPLES_HALF 240
|
||||
#define FRAMESAMPLES_QUARTER 120
|
||||
/**********************************************************************/
|
||||
|
||||
/* max number of samples per frame (= 60 ms frame) */
|
||||
#define MAX_FRAMESAMPLES 960
|
||||
#define MAX_SWBFRAMESAMPLES (MAX_FRAMESAMPLES * 2)
|
||||
/* number of samples per 10ms frame */
|
||||
#define FRAMESAMPLES_10ms ((10 * FS) / 1000)
|
||||
#define SWBFRAMESAMPLES_10ms (FRAMESAMPLES_10ms * 2)
|
||||
/* number of samples in 30 ms frame */
|
||||
#define FRAMESAMPLES_30ms 480
|
||||
/* number of subframes */
|
||||
#define SUBFRAMES 6
|
||||
/* length of a subframe */
|
||||
#define UPDATE 80
|
||||
/* length of half a subframe (low/high band) */
|
||||
#define HALF_SUBFRAMELEN (UPDATE / 2)
|
||||
/* samples of look ahead (in a half-band, so actually
|
||||
* half the samples of look ahead @ FS) */
|
||||
#define QLOOKAHEAD 24 /* 3 ms */
|
||||
/* order of AR model in spectral entropy coder */
|
||||
#define AR_ORDER 6
|
||||
/* order of LP model in spectral entropy coder */
|
||||
#define LP_ORDER 0
|
||||
|
||||
/* window length (masking analysis) */
|
||||
#define WINLEN 256
|
||||
/* order of low-band pole filter used to approximate masking curve */
|
||||
#define ORDERLO 12
|
||||
/* order of hi-band pole filter used to approximate masking curve */
|
||||
#define ORDERHI 6
|
||||
|
||||
#define UB_LPC_ORDER 4
|
||||
#define UB_LPC_VEC_PER_FRAME 2
|
||||
#define UB16_LPC_VEC_PER_FRAME 4
|
||||
#define UB_ACTIVE_SUBFRAMES 2
|
||||
#define UB_MAX_LPC_ORDER 6
|
||||
#define UB_INTERPOL_SEGMENTS 1
|
||||
#define UB16_INTERPOL_SEGMENTS 3
|
||||
#define LB_TOTAL_DELAY_SAMPLES 48
|
||||
enum ISACBandwidth { isac8kHz = 8, isac12kHz = 12, isac16kHz = 16 };
|
||||
enum ISACBand {
|
||||
kIsacLowerBand = 0,
|
||||
kIsacUpperBand12 = 1,
|
||||
kIsacUpperBand16 = 2
|
||||
};
|
||||
enum IsacSamplingRate { kIsacWideband = 16, kIsacSuperWideband = 32 };
|
||||
#define UB_LPC_GAIN_DIM SUBFRAMES
|
||||
#define FB_STATE_SIZE_WORD32 6
|
||||
|
||||
/* order for post_filter_bank */
|
||||
#define POSTQORDER 3
|
||||
/* order for pre-filterbank */
|
||||
#define QORDER 3
|
||||
/* another order */
|
||||
#define QORDER_ALL (POSTQORDER + QORDER - 1)
|
||||
/* for decimator */
|
||||
#define ALLPASSSECTIONS 2
|
||||
|
||||
/* array size for byte stream in number of bytes. */
|
||||
/* The old maximum size still needed for the decoding */
|
||||
#define STREAM_SIZE_MAX 600
|
||||
#define STREAM_SIZE_MAX_30 200 /* 200 bytes=53.4 kbps @ 30 ms.framelength */
|
||||
#define STREAM_SIZE_MAX_60 400 /* 400 bytes=53.4 kbps @ 60 ms.framelength */
|
||||
|
||||
/* storage size for bit counts */
|
||||
#define BIT_COUNTER_SIZE 30
|
||||
/* maximum order of any AR model or filter */
|
||||
#define MAX_AR_MODEL_ORDER 12 // 50
|
||||
|
||||
/* For pitch analysis */
|
||||
#define PITCH_FRAME_LEN (FRAMESAMPLES_HALF) /* 30 ms */
|
||||
#define PITCH_MAX_LAG 140 /* 57 Hz */
|
||||
#define PITCH_MIN_LAG 20 /* 400 Hz */
|
||||
#define PITCH_MAX_GAIN 0.45
|
||||
#define PITCH_MAX_GAIN_06 0.27 /* PITCH_MAX_GAIN*0.6 */
|
||||
#define PITCH_MAX_GAIN_Q12 1843
|
||||
#define PITCH_LAG_SPAN2 (PITCH_MAX_LAG / 2 - PITCH_MIN_LAG / 2 + 5)
|
||||
#define PITCH_CORR_LEN2 60 /* 15 ms */
|
||||
#define PITCH_CORR_STEP2 (PITCH_FRAME_LEN / 4)
|
||||
#define PITCH_BW 11 /* half the band width of correlation surface */
|
||||
#define PITCH_SUBFRAMES 4
|
||||
#define PITCH_GRAN_PER_SUBFRAME 5
|
||||
#define PITCH_SUBFRAME_LEN (PITCH_FRAME_LEN / PITCH_SUBFRAMES)
|
||||
#define PITCH_UPDATE (PITCH_SUBFRAME_LEN / PITCH_GRAN_PER_SUBFRAME)
|
||||
/* maximum number of peaks to be examined in correlation surface */
|
||||
#define PITCH_MAX_NUM_PEAKS 10
|
||||
#define PITCH_PEAK_DECAY 0.85
|
||||
/* For weighting filter */
|
||||
#define PITCH_WLPCORDER 6
|
||||
#define PITCH_WLPCWINLEN PITCH_FRAME_LEN
|
||||
#define PITCH_WLPCASYM 0.3 /* asymmetry parameter */
|
||||
#define PITCH_WLPCBUFLEN PITCH_WLPCWINLEN
|
||||
/* For pitch filter */
|
||||
/* Extra 50 for fraction and LP filters */
|
||||
#define PITCH_BUFFSIZE (PITCH_MAX_LAG + 50)
|
||||
#define PITCH_INTBUFFSIZE (PITCH_FRAME_LEN + PITCH_BUFFSIZE)
|
||||
/* Max rel. step for interpolation */
|
||||
#define PITCH_UPSTEP 1.5
|
||||
/* Max rel. step for interpolation */
|
||||
#define PITCH_DOWNSTEP 0.67
|
||||
#define PITCH_FRACS 8
|
||||
#define PITCH_FRACORDER 9
|
||||
#define PITCH_DAMPORDER 5
|
||||
#define PITCH_FILTDELAY 1.5f
|
||||
/* stepsize for quantization of the pitch Gain */
|
||||
#define PITCH_GAIN_STEPSIZE 0.125
|
||||
|
||||
/* Order of high pass filter */
|
||||
#define HPORDER 2
|
||||
|
||||
/* some mathematical constants */
|
||||
/* log2(exp) */
|
||||
#define LOG2EXP 1.44269504088896
|
||||
#define PI 3.14159265358979
|
||||
|
||||
/* Maximum number of iterations allowed to limit payload size */
|
||||
#define MAX_PAYLOAD_LIMIT_ITERATION 5
|
||||
|
||||
/* Redundant Coding */
|
||||
#define RCU_BOTTLENECK_BPS 16000
|
||||
#define RCU_TRANSCODING_SCALE 0.40f
|
||||
#define RCU_TRANSCODING_SCALE_INVERSE 2.5f
|
||||
|
||||
#define RCU_TRANSCODING_SCALE_UB 0.50f
|
||||
#define RCU_TRANSCODING_SCALE_UB_INVERSE 2.0f
|
||||
|
||||
/* Define Error codes */
|
||||
/* 6000 General */
|
||||
#define ISAC_MEMORY_ALLOCATION_FAILED 6010
|
||||
#define ISAC_MODE_MISMATCH 6020
|
||||
#define ISAC_DISALLOWED_BOTTLENECK 6030
|
||||
#define ISAC_DISALLOWED_FRAME_LENGTH 6040
|
||||
#define ISAC_UNSUPPORTED_SAMPLING_FREQUENCY 6050
|
||||
|
||||
/* 6200 Bandwidth estimator */
|
||||
#define ISAC_RANGE_ERROR_BW_ESTIMATOR 6240
|
||||
/* 6400 Encoder */
|
||||
#define ISAC_ENCODER_NOT_INITIATED 6410
|
||||
#define ISAC_DISALLOWED_CODING_MODE 6420
|
||||
#define ISAC_DISALLOWED_FRAME_MODE_ENCODER 6430
|
||||
#define ISAC_DISALLOWED_BITSTREAM_LENGTH 6440
|
||||
#define ISAC_PAYLOAD_LARGER_THAN_LIMIT 6450
|
||||
#define ISAC_DISALLOWED_ENCODER_BANDWIDTH 6460
|
||||
/* 6600 Decoder */
|
||||
#define ISAC_DECODER_NOT_INITIATED 6610
|
||||
#define ISAC_EMPTY_PACKET 6620
|
||||
#define ISAC_DISALLOWED_FRAME_MODE_DECODER 6630
|
||||
#define ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH 6640
|
||||
#define ISAC_RANGE_ERROR_DECODE_BANDWIDTH 6650
|
||||
#define ISAC_RANGE_ERROR_DECODE_PITCH_GAIN 6660
|
||||
#define ISAC_RANGE_ERROR_DECODE_PITCH_LAG 6670
|
||||
#define ISAC_RANGE_ERROR_DECODE_LPC 6680
|
||||
#define ISAC_RANGE_ERROR_DECODE_SPECTRUM 6690
|
||||
#define ISAC_LENGTH_MISMATCH 6730
|
||||
#define ISAC_RANGE_ERROR_DECODE_BANDWITH 6740
|
||||
#define ISAC_DISALLOWED_BANDWIDTH_MODE_DECODER 6750
|
||||
#define ISAC_DISALLOWED_LPC_MODEL 6760
|
||||
/* 6800 Call setup formats */
|
||||
#define ISAC_INCOMPATIBLE_FORMATS 6810
|
||||
|
||||
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ */
|
@ -0,0 +1,448 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* structs.h
|
||||
*
|
||||
* This header file contains all the structs used in the ISAC codec
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/bandwidth_info.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/settings.h"
|
||||
#include "modules/third_party/fft/fft.h"
|
||||
|
||||
typedef struct Bitstreamstruct {
|
||||
uint8_t stream[STREAM_SIZE_MAX];
|
||||
uint32_t W_upper;
|
||||
uint32_t streamval;
|
||||
uint32_t stream_index;
|
||||
|
||||
} Bitstr;
|
||||
|
||||
typedef struct {
|
||||
double DataBufferLo[WINLEN];
|
||||
double DataBufferHi[WINLEN];
|
||||
|
||||
double CorrBufLo[ORDERLO + 1];
|
||||
double CorrBufHi[ORDERHI + 1];
|
||||
|
||||
float PreStateLoF[ORDERLO + 1];
|
||||
float PreStateLoG[ORDERLO + 1];
|
||||
float PreStateHiF[ORDERHI + 1];
|
||||
float PreStateHiG[ORDERHI + 1];
|
||||
float PostStateLoF[ORDERLO + 1];
|
||||
float PostStateLoG[ORDERLO + 1];
|
||||
float PostStateHiF[ORDERHI + 1];
|
||||
float PostStateHiG[ORDERHI + 1];
|
||||
|
||||
double OldEnergy;
|
||||
|
||||
} MaskFiltstr;
|
||||
|
||||
typedef struct {
|
||||
// state vectors for each of the two analysis filters
|
||||
double INSTAT1[2 * (QORDER - 1)];
|
||||
double INSTAT2[2 * (QORDER - 1)];
|
||||
double INSTATLA1[2 * (QORDER - 1)];
|
||||
double INSTATLA2[2 * (QORDER - 1)];
|
||||
double INLABUF1[QLOOKAHEAD];
|
||||
double INLABUF2[QLOOKAHEAD];
|
||||
|
||||
float INSTAT1_float[2 * (QORDER - 1)];
|
||||
float INSTAT2_float[2 * (QORDER - 1)];
|
||||
float INSTATLA1_float[2 * (QORDER - 1)];
|
||||
float INSTATLA2_float[2 * (QORDER - 1)];
|
||||
float INLABUF1_float[QLOOKAHEAD];
|
||||
float INLABUF2_float[QLOOKAHEAD];
|
||||
|
||||
/* High pass filter */
|
||||
double HPstates[HPORDER];
|
||||
float HPstates_float[HPORDER];
|
||||
|
||||
} PreFiltBankstr;
|
||||
|
||||
typedef struct {
|
||||
// state vectors for each of the two analysis filters
|
||||
double STATE_0_LOWER[2 * POSTQORDER];
|
||||
double STATE_0_UPPER[2 * POSTQORDER];
|
||||
|
||||
/* High pass filter */
|
||||
double HPstates1[HPORDER];
|
||||
double HPstates2[HPORDER];
|
||||
|
||||
float STATE_0_LOWER_float[2 * POSTQORDER];
|
||||
float STATE_0_UPPER_float[2 * POSTQORDER];
|
||||
|
||||
float HPstates1_float[HPORDER];
|
||||
float HPstates2_float[HPORDER];
|
||||
|
||||
} PostFiltBankstr;
|
||||
|
||||
typedef struct {
|
||||
// data buffer for pitch filter
|
||||
double ubuf[PITCH_BUFFSIZE];
|
||||
|
||||
// low pass state vector
|
||||
double ystate[PITCH_DAMPORDER];
|
||||
|
||||
// old lag and gain
|
||||
double oldlagp[1];
|
||||
double oldgainp[1];
|
||||
|
||||
} PitchFiltstr;
|
||||
|
||||
typedef struct {
|
||||
// data buffer
|
||||
double buffer[PITCH_WLPCBUFLEN];
|
||||
|
||||
// state vectors
|
||||
double istate[PITCH_WLPCORDER];
|
||||
double weostate[PITCH_WLPCORDER];
|
||||
double whostate[PITCH_WLPCORDER];
|
||||
|
||||
// LPC window -> should be a global array because constant
|
||||
double window[PITCH_WLPCWINLEN];
|
||||
|
||||
} WeightFiltstr;
|
||||
|
||||
typedef struct {
|
||||
// for inital estimator
|
||||
double dec_buffer[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
|
||||
PITCH_FRAME_LEN / 2 + 2];
|
||||
double decimator_state[2 * ALLPASSSECTIONS + 1];
|
||||
double hp_state[2];
|
||||
|
||||
double whitened_buf[QLOOKAHEAD];
|
||||
|
||||
double inbuf[QLOOKAHEAD];
|
||||
|
||||
PitchFiltstr PFstr_wght;
|
||||
PitchFiltstr PFstr;
|
||||
WeightFiltstr Wghtstr;
|
||||
|
||||
} PitchAnalysisStruct;
|
||||
|
||||
/* Have instance of struct together with other iSAC structs */
|
||||
typedef struct {
|
||||
/* Previous frame length (in ms) */
|
||||
int32_t prev_frame_length;
|
||||
|
||||
/* Previous RTP timestamp from received
|
||||
packet (in samples relative beginning) */
|
||||
int32_t prev_rec_rtp_number;
|
||||
|
||||
/* Send timestamp for previous packet (in ms using timeGetTime()) */
|
||||
uint32_t prev_rec_send_ts;
|
||||
|
||||
/* Arrival time for previous packet (in ms using timeGetTime()) */
|
||||
uint32_t prev_rec_arr_ts;
|
||||
|
||||
/* rate of previous packet, derived from RTP timestamps (in bits/s) */
|
||||
float prev_rec_rtp_rate;
|
||||
|
||||
/* Time sinse the last update of the BN estimate (in ms) */
|
||||
uint32_t last_update_ts;
|
||||
|
||||
/* Time sinse the last reduction (in ms) */
|
||||
uint32_t last_reduction_ts;
|
||||
|
||||
/* How many times the estimate was update in the beginning */
|
||||
int32_t count_tot_updates_rec;
|
||||
|
||||
/* The estimated bottle neck rate from there to here (in bits/s) */
|
||||
int32_t rec_bw;
|
||||
float rec_bw_inv;
|
||||
float rec_bw_avg;
|
||||
float rec_bw_avg_Q;
|
||||
|
||||
/* The estimated mean absolute jitter value,
|
||||
as seen on this side (in ms) */
|
||||
float rec_jitter;
|
||||
float rec_jitter_short_term;
|
||||
float rec_jitter_short_term_abs;
|
||||
float rec_max_delay;
|
||||
float rec_max_delay_avg_Q;
|
||||
|
||||
/* (assumed) bitrate for headers (bps) */
|
||||
float rec_header_rate;
|
||||
|
||||
/* The estimated bottle neck rate from here to there (in bits/s) */
|
||||
float send_bw_avg;
|
||||
|
||||
/* The estimated mean absolute jitter value, as seen on
|
||||
the other siee (in ms) */
|
||||
float send_max_delay_avg;
|
||||
|
||||
// number of packets received since last update
|
||||
int num_pkts_rec;
|
||||
|
||||
int num_consec_rec_pkts_over_30k;
|
||||
|
||||
// flag for marking that a high speed network has been
|
||||
// detected downstream
|
||||
int hsn_detect_rec;
|
||||
|
||||
int num_consec_snt_pkts_over_30k;
|
||||
|
||||
// flag for marking that a high speed network has
|
||||
// been detected upstream
|
||||
int hsn_detect_snd;
|
||||
|
||||
uint32_t start_wait_period;
|
||||
|
||||
int in_wait_period;
|
||||
|
||||
int change_to_WB;
|
||||
|
||||
uint32_t senderTimestamp;
|
||||
uint32_t receiverTimestamp;
|
||||
// enum IsacSamplingRate incomingStreamSampFreq;
|
||||
uint16_t numConsecLatePkts;
|
||||
float consecLatency;
|
||||
int16_t inWaitLatePkts;
|
||||
|
||||
IsacBandwidthInfo external_bw_info;
|
||||
} BwEstimatorstr;
|
||||
|
||||
typedef struct {
|
||||
/* boolean, flags if previous packet exceeded B.N. */
|
||||
int PrevExceed;
|
||||
/* ms */
|
||||
int ExceedAgo;
|
||||
/* packets left to send in current burst */
|
||||
int BurstCounter;
|
||||
/* packets */
|
||||
int InitCounter;
|
||||
/* ms remaining in buffer when next packet will be sent */
|
||||
double StillBuffered;
|
||||
|
||||
} RateModel;
|
||||
|
||||
/* The following strutc is used to store data from encoding, to make it
|
||||
fast and easy to construct a new bitstream with a different Bandwidth
|
||||
estimate. All values (except framelength and minBytes) is double size to
|
||||
handle 60 ms of data.
|
||||
*/
|
||||
typedef struct {
|
||||
/* Used to keep track of if it is first or second part of 60 msec packet */
|
||||
int startIdx;
|
||||
|
||||
/* Frame length in samples */
|
||||
int16_t framelength;
|
||||
|
||||
/* Pitch Gain */
|
||||
int pitchGain_index[2];
|
||||
|
||||
/* Pitch Lag */
|
||||
double meanGain[2];
|
||||
int pitchIndex[PITCH_SUBFRAMES * 2];
|
||||
|
||||
/* LPC */
|
||||
int LPCindex_s[108 * 2]; /* KLT_ORDER_SHAPE = 108 */
|
||||
int LPCindex_g[12 * 2]; /* KLT_ORDER_GAIN = 12 */
|
||||
double LPCcoeffs_lo[(ORDERLO + 1) * SUBFRAMES * 2];
|
||||
double LPCcoeffs_hi[(ORDERHI + 1) * SUBFRAMES * 2];
|
||||
|
||||
/* Encode Spec */
|
||||
int16_t fre[FRAMESAMPLES];
|
||||
int16_t fim[FRAMESAMPLES];
|
||||
int16_t AvgPitchGain[2];
|
||||
|
||||
/* Used in adaptive mode only */
|
||||
int minBytes;
|
||||
|
||||
} IsacSaveEncoderData;
|
||||
|
||||
typedef struct {
|
||||
int indexLPCShape[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME];
|
||||
double lpcGain[SUBFRAMES << 1];
|
||||
int lpcGainIndex[SUBFRAMES << 1];
|
||||
|
||||
Bitstr bitStreamObj;
|
||||
|
||||
int16_t realFFT[FRAMESAMPLES_HALF];
|
||||
int16_t imagFFT[FRAMESAMPLES_HALF];
|
||||
} ISACUBSaveEncDataStruct;
|
||||
|
||||
typedef struct {
|
||||
Bitstr bitstr_obj;
|
||||
MaskFiltstr maskfiltstr_obj;
|
||||
PreFiltBankstr prefiltbankstr_obj;
|
||||
PitchFiltstr pitchfiltstr_obj;
|
||||
PitchAnalysisStruct pitchanalysisstr_obj;
|
||||
FFTstr fftstr_obj;
|
||||
IsacSaveEncoderData SaveEnc_obj;
|
||||
|
||||
int buffer_index;
|
||||
int16_t current_framesamples;
|
||||
|
||||
float data_buffer_float[FRAMESAMPLES_30ms];
|
||||
|
||||
int frame_nb;
|
||||
double bottleneck;
|
||||
int16_t new_framelength;
|
||||
double s2nr;
|
||||
|
||||
/* Maximum allowed number of bits for a 30 msec packet */
|
||||
int16_t payloadLimitBytes30;
|
||||
/* Maximum allowed number of bits for a 30 msec packet */
|
||||
int16_t payloadLimitBytes60;
|
||||
/* Maximum allowed number of bits for both 30 and 60 msec packet */
|
||||
int16_t maxPayloadBytes;
|
||||
/* Maximum allowed rate in bytes per 30 msec packet */
|
||||
int16_t maxRateInBytes;
|
||||
|
||||
/*---
|
||||
If set to 1 iSAC will not adapt the frame-size, if used in
|
||||
channel-adaptive mode. The initial value will be used for all rates.
|
||||
---*/
|
||||
int16_t enforceFrameSize;
|
||||
|
||||
/*-----
|
||||
This records the BWE index the encoder injected into the bit-stream.
|
||||
It will be used in RCU. The same BWE index of main payload will be in
|
||||
the redundant payload. We can not retrieve it from BWE because it is
|
||||
a recursive procedure (WebRtcIsac_GetDownlinkBwJitIndexImpl) and has to be
|
||||
called only once per each encode.
|
||||
-----*/
|
||||
int16_t lastBWIdx;
|
||||
} ISACLBEncStruct;
|
||||
|
||||
typedef struct {
|
||||
Bitstr bitstr_obj;
|
||||
MaskFiltstr maskfiltstr_obj;
|
||||
PreFiltBankstr prefiltbankstr_obj;
|
||||
FFTstr fftstr_obj;
|
||||
ISACUBSaveEncDataStruct SaveEnc_obj;
|
||||
|
||||
int buffer_index;
|
||||
float data_buffer_float[MAX_FRAMESAMPLES + LB_TOTAL_DELAY_SAMPLES];
|
||||
double bottleneck;
|
||||
/* Maximum allowed number of bits for a 30 msec packet */
|
||||
// int16_t payloadLimitBytes30;
|
||||
/* Maximum allowed number of bits for both 30 and 60 msec packet */
|
||||
// int16_t maxPayloadBytes;
|
||||
int16_t maxPayloadSizeBytes;
|
||||
|
||||
double lastLPCVec[UB_LPC_ORDER];
|
||||
int16_t numBytesUsed;
|
||||
int16_t lastJitterInfo;
|
||||
} ISACUBEncStruct;
|
||||
|
||||
typedef struct {
|
||||
Bitstr bitstr_obj;
|
||||
MaskFiltstr maskfiltstr_obj;
|
||||
PostFiltBankstr postfiltbankstr_obj;
|
||||
PitchFiltstr pitchfiltstr_obj;
|
||||
FFTstr fftstr_obj;
|
||||
|
||||
} ISACLBDecStruct;
|
||||
|
||||
typedef struct {
|
||||
Bitstr bitstr_obj;
|
||||
MaskFiltstr maskfiltstr_obj;
|
||||
PostFiltBankstr postfiltbankstr_obj;
|
||||
FFTstr fftstr_obj;
|
||||
|
||||
} ISACUBDecStruct;
|
||||
|
||||
typedef struct {
|
||||
ISACLBEncStruct ISACencLB_obj;
|
||||
ISACLBDecStruct ISACdecLB_obj;
|
||||
} ISACLBStruct;
|
||||
|
||||
typedef struct {
|
||||
ISACUBEncStruct ISACencUB_obj;
|
||||
ISACUBDecStruct ISACdecUB_obj;
|
||||
} ISACUBStruct;
|
||||
|
||||
/*
|
||||
This struct is used to take a snapshot of the entropy coder and LPC gains
|
||||
right before encoding LPC gains. This allows us to go back to that state
|
||||
if we like to limit the payload size.
|
||||
*/
|
||||
typedef struct {
|
||||
/* 6 lower-band & 6 upper-band */
|
||||
double loFiltGain[SUBFRAMES];
|
||||
double hiFiltGain[SUBFRAMES];
|
||||
/* Upper boundary of interval W */
|
||||
uint32_t W_upper;
|
||||
uint32_t streamval;
|
||||
/* Index to the current position in bytestream */
|
||||
uint32_t stream_index;
|
||||
uint8_t stream[3];
|
||||
} transcode_obj;
|
||||
|
||||
typedef struct {
|
||||
// TODO(kwiberg): The size of these tables could be reduced by storing floats
|
||||
// instead of doubles, and by making use of the identity cos(x) =
|
||||
// sin(x+pi/2). They could also be made global constants that we fill in at
|
||||
// compile time.
|
||||
double costab1[FRAMESAMPLES_HALF];
|
||||
double sintab1[FRAMESAMPLES_HALF];
|
||||
double costab2[FRAMESAMPLES_QUARTER];
|
||||
double sintab2[FRAMESAMPLES_QUARTER];
|
||||
} TransformTables;
|
||||
|
||||
typedef struct {
|
||||
// lower-band codec instance
|
||||
ISACLBStruct instLB;
|
||||
// upper-band codec instance
|
||||
ISACUBStruct instUB;
|
||||
|
||||
// Bandwidth Estimator and model for the rate.
|
||||
BwEstimatorstr bwestimator_obj;
|
||||
RateModel rate_data_obj;
|
||||
double MaxDelay;
|
||||
|
||||
/* 0 = adaptive; 1 = instantaneous */
|
||||
int16_t codingMode;
|
||||
|
||||
// overall bottleneck of the codec
|
||||
int32_t bottleneck;
|
||||
|
||||
// QMF Filter state
|
||||
int32_t analysisFBState1[FB_STATE_SIZE_WORD32];
|
||||
int32_t analysisFBState2[FB_STATE_SIZE_WORD32];
|
||||
int32_t synthesisFBState1[FB_STATE_SIZE_WORD32];
|
||||
int32_t synthesisFBState2[FB_STATE_SIZE_WORD32];
|
||||
|
||||
// Error Code
|
||||
int16_t errorCode;
|
||||
|
||||
// bandwidth of the encoded audio 8, 12 or 16 kHz
|
||||
enum ISACBandwidth bandwidthKHz;
|
||||
// Sampling rate of audio, encoder and decode, 8 or 16 kHz
|
||||
enum IsacSamplingRate encoderSamplingRateKHz;
|
||||
enum IsacSamplingRate decoderSamplingRateKHz;
|
||||
// Flag to keep track of initializations, lower & upper-band
|
||||
// encoder and decoder.
|
||||
int16_t initFlag;
|
||||
|
||||
// Flag to to indicate signal bandwidth switch
|
||||
int16_t resetFlag_8kHz;
|
||||
|
||||
// Maximum allowed rate, measured in Bytes per 30 ms.
|
||||
int16_t maxRateBytesPer30Ms;
|
||||
// Maximum allowed payload-size, measured in Bytes.
|
||||
int16_t maxPayloadSizeBytes;
|
||||
/* The expected sampling rate of the input signal. Valid values are 16000
|
||||
* and 32000. This is not the operation sampling rate of the codec. */
|
||||
uint16_t in_sample_rate_hz;
|
||||
|
||||
// Trig tables for WebRtcIsac_Time2Spec and WebRtcIsac_Spec2time.
|
||||
TransformTables transform_tables;
|
||||
} ISACMainStruct;
|
||||
|
||||
#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ */
|
Reference in New Issue
Block a user