update code.

This commit is contained in:
luocai 2024-09-05 09:59:28 +08:00
parent 4f3dc015f7
commit ccf69909d6
223 changed files with 36168 additions and 0 deletions

View File

@ -8,6 +8,7 @@ add_executable(Record main.cpp
Player.cpp
Recorder.cpp
SpeexDsp.h SpeexDsp.cpp
Utility.h Utility.cpp
WebRTCPublisher.h WebRTCPublisher.cpp
)

12
Record/Utility.cpp Normal file
View File

@ -0,0 +1,12 @@
#include "Utility.h"
std::vector<uint8_t> duplicate(const uint8_t *data, int32_t byteSize) {
std::vector<uint8_t> ret(byteSize * 2);
auto pcm = reinterpret_cast<const uint16_t *>(data);
auto retPcm = reinterpret_cast<uint16_t *>(ret.data());
for (int i = 0; i < byteSize / 2; i++) {
retPcm[2 * i] = pcm[i];
retPcm[2 * i + 1] = pcm[i];
}
return ret;
}

11
Record/Utility.h Normal file
View File

@ -0,0 +1,11 @@
#ifndef __UTILITY_H__
#define __UTILITY_H__
#include <vector>
#include <cstdint>
std::vector<uint8_t> split();
std::vector<uint8_t> duplicate(const uint8_t *data, int32_t byteSize);
#endif // __UTILITY_H__

139
VocieProcess/CMakeLists.txt Normal file
View File

@ -0,0 +1,139 @@
cmake_minimum_required(VERSION 3.29)
project(VocieProcess)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
include(FetchContent)
set(ABSL_PROPAGATE_CXX_STD ON)
FetchContent_Declare(absl
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
)
FetchContent_MakeAvailable(absl)
add_library(VocieProcess
api/audio/audio_processing_statistics.h api/audio/audio_processing_statistics.cc
api/audio/audio_processing.h api/audio/audio_processing.cc
api/audio/channel_layout.h api/audio/channel_layout.cc
api/audio/echo_canceller3_config.h api/audio/echo_canceller3_config.cc
api/task_queue/task_queue_base.h api/task_queue/task_queue_base.cc
api/units/time_delta.h api/units/time_delta.cc
api/units/timestamp.h api/units/timestamp.cc
common_audio/channel_buffer.h common_audio/channel_buffer.cc
common_audio/resampler/push_sinc_resampler.h common_audio/resampler/push_sinc_resampler.cc
common_audio/resampler/sinc_resampler.h common_audio/resampler/sinc_resampler.cc
common_audio/signal_processing/dot_product_with_scale.h common_audio/signal_processing/dot_product_with_scale.cc
common_audio/third_party/ooura/fft_size_128/ooura_fft.h common_audio/third_party/ooura/fft_size_128/ooura_fft.cc
common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
rtc_base/checks.h rtc_base/checks.cc
rtc_base/logging.h rtc_base/logging.cc
rtc_base/platform_thread_types.h rtc_base/platform_thread_types.cc
rtc_base/race_checker.h rtc_base/race_checker.cc
rtc_base/string_encode.h rtc_base/string_encode.cc
rtc_base/string_to_number.h rtc_base/string_to_number.cc
rtc_base/string_utils.h rtc_base/string_utils.cc
rtc_base/system_time.h rtc_base/system_time.cc
rtc_base/time_utils.h rtc_base/time_utils.cc
rtc_base/win32.h rtc_base/win32.cc
rtc_base/containers/flat_tree.h rtc_base/containers/flat_tree.cc
rtc_base/experiments/field_trial_parser.h rtc_base/experiments/field_trial_parser.cc
rtc_base/memory/aligned_malloc.h rtc_base/memory/aligned_malloc.cc
rtc_base/strings/string_builder.h rtc_base/strings/string_builder.cc
modules/audio_processing/audio_buffer.h modules/audio_processing/audio_buffer.cc
modules/audio_processing/high_pass_filter.h modules/audio_processing/high_pass_filter.cc
modules/audio_processing/splitting_filter.h modules/audio_processing/splitting_filter.cc
modules/audio_processing/three_band_filter_bank.h modules/audio_processing/three_band_filter_bank.cc
modules/audio_processing/aec3/adaptive_fir_filter_erl.h modules/audio_processing/aec3/adaptive_fir_filter_erl.cc
modules/audio_processing/aec3/adaptive_fir_filter.h modules/audio_processing/aec3/adaptive_fir_filter.cc
modules/audio_processing/aec3/aec_state.h modules/audio_processing/aec3/aec_state.cc
modules/audio_processing/aec3/aec3_common.h modules/audio_processing/aec3/aec3_common.cc
modules/audio_processing/aec3/aec3_fft.h modules/audio_processing/aec3/aec3_fft.cc
modules/audio_processing/aec3/alignment_mixer.h modules/audio_processing/aec3/alignment_mixer.cc
modules/audio_processing/aec3/api_call_jitter_metrics.h modules/audio_processing/aec3/api_call_jitter_metrics.cc
modules/audio_processing/aec3/block_buffer.h modules/audio_processing/aec3/block_buffer.cc
modules/audio_processing/aec3/block_delay_buffer.h modules/audio_processing/aec3/block_delay_buffer.cc
modules/audio_processing/aec3/block_framer.h modules/audio_processing/aec3/block_framer.cc
modules/audio_processing/aec3/block_processor_metrics.h modules/audio_processing/aec3/block_processor_metrics.cc
modules/audio_processing/aec3/block_processor.h modules/audio_processing/aec3/block_processor.cc
modules/audio_processing/aec3/clockdrift_detector.h modules/audio_processing/aec3/clockdrift_detector.cc
modules/audio_processing/aec3/coarse_filter_update_gain.h modules/audio_processing/aec3/coarse_filter_update_gain.cc
modules/audio_processing/aec3/comfort_noise_generator.h modules/audio_processing/aec3/comfort_noise_generator.cc
modules/audio_processing/aec3/config_selector.h modules/audio_processing/aec3/config_selector.cc
modules/audio_processing/aec3/decimator.h modules/audio_processing/aec3/decimator.cc
modules/audio_processing/aec3/dominant_nearend_detector.h modules/audio_processing/aec3/dominant_nearend_detector.cc
modules/audio_processing/aec3/downsampled_render_buffer.h modules/audio_processing/aec3/downsampled_render_buffer.cc
modules/audio_processing/aec3/echo_audibility.h modules/audio_processing/aec3/echo_audibility.cc
modules/audio_processing/aec3/echo_canceller3.h modules/audio_processing/aec3/echo_canceller3.cc
modules/audio_processing/aec3/echo_path_delay_estimator.h modules/audio_processing/aec3/echo_path_delay_estimator.cc
modules/audio_processing/aec3/echo_path_variability.h modules/audio_processing/aec3/echo_path_variability.cc
modules/audio_processing/aec3/echo_remover_metrics.h modules/audio_processing/aec3/echo_remover_metrics.cc
modules/audio_processing/aec3/echo_remover.h modules/audio_processing/aec3/echo_remover.cc
modules/audio_processing/aec3/erl_estimator.h modules/audio_processing/aec3/erl_estimator.cc
modules/audio_processing/aec3/erle_estimator.h modules/audio_processing/aec3/erle_estimator.cc
modules/audio_processing/aec3/fft_buffer.h modules/audio_processing/aec3/fft_buffer.cc
modules/audio_processing/aec3/filter_analyzer.h modules/audio_processing/aec3/filter_analyzer.cc
modules/audio_processing/aec3/frame_blocker.h modules/audio_processing/aec3/frame_blocker.cc
modules/audio_processing/aec3/fullband_erle_estimator.h modules/audio_processing/aec3/fullband_erle_estimator.cc
modules/audio_processing/aec3/matched_filter_lag_aggregator.h modules/audio_processing/aec3/matched_filter_lag_aggregator.cc
modules/audio_processing/aec3/matched_filter.h modules/audio_processing/aec3/matched_filter.cc
modules/audio_processing/aec3/moving_average.h modules/audio_processing/aec3/moving_average.cc
modules/audio_processing/aec3/multi_channel_content_detector.h modules/audio_processing/aec3/multi_channel_content_detector.cc
modules/audio_processing/aec3/refined_filter_update_gain.h modules/audio_processing/aec3/refined_filter_update_gain.cc
modules/audio_processing/aec3/render_buffer.h modules/audio_processing/aec3/render_buffer.cc
modules/audio_processing/aec3/render_delay_buffer.h modules/audio_processing/aec3/render_delay_buffer.cc
modules/audio_processing/aec3/render_delay_controller_metrics.h modules/audio_processing/aec3/render_delay_controller_metrics.cc
modules/audio_processing/aec3/render_delay_controller.h modules/audio_processing/aec3/render_delay_controller.cc
modules/audio_processing/aec3/render_signal_analyzer.h modules/audio_processing/aec3/render_signal_analyzer.cc
modules/audio_processing/aec3/residual_echo_estimator.h modules/audio_processing/aec3/residual_echo_estimator.cc
modules/audio_processing/aec3/reverb_decay_estimator.h modules/audio_processing/aec3/reverb_decay_estimator.cc
modules/audio_processing/aec3/reverb_frequency_response.h modules/audio_processing/aec3/reverb_frequency_response.cc
modules/audio_processing/aec3/reverb_model_estimator.h modules/audio_processing/aec3/reverb_model_estimator.cc
modules/audio_processing/aec3/reverb_model.h modules/audio_processing/aec3/reverb_model.cc
modules/audio_processing/aec3/signal_dependent_erle_estimator.h modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
modules/audio_processing/aec3/spectrum_buffer.h modules/audio_processing/aec3/spectrum_buffer.cc
modules/audio_processing/aec3/stationarity_estimator.h modules/audio_processing/aec3/stationarity_estimator.cc
modules/audio_processing/aec3/subband_erle_estimator.h modules/audio_processing/aec3/subband_erle_estimator.cc
modules/audio_processing/aec3/subband_nearend_detector.h modules/audio_processing/aec3/subband_nearend_detector.cc
modules/audio_processing/aec3/subtractor_output_analyzer.h modules/audio_processing/aec3/subtractor_output_analyzer.cc
modules/audio_processing/aec3/subtractor_output.h modules/audio_processing/aec3/subtractor_output.cc
modules/audio_processing/aec3/subtractor.h modules/audio_processing/aec3/subtractor.cc
modules/audio_processing/aec3/suppression_filter.h modules/audio_processing/aec3/suppression_filter.cc
modules/audio_processing/aec3/suppression_gain.h modules/audio_processing/aec3/suppression_gain.cc
modules/audio_processing/aec3/transparent_mode.h modules/audio_processing/aec3/transparent_mode.cc
modules/audio_processing/logging/apm_data_dumper.h modules/audio_processing/logging/apm_data_dumper.cc
modules/audio_processing/utility/cascaded_biquad_filter.h modules/audio_processing/utility/cascaded_biquad_filter.cc
)
target_compile_definitions(VocieProcess
PRIVATE WEBRTC_WIN
PRIVATE NOMINMAX # <windows.h>
PRIVATE RTC_DISABLE_LOGGING
PRIVATE RTC_METRICS_ENABLED=0
PRIVATE WEBRTC_APM_DEBUG_DUMP=0
)
target_include_directories(VocieProcess
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
)
target_link_libraries(VocieProcess
PRIVATE absl::optional
)

View File

@ -0,0 +1,335 @@
/*
* Copyright 2015 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_ARRAY_VIEW_H_
#define API_ARRAY_VIEW_H_
#include <algorithm>
#include <array>
#include <cstddef>
#include <iterator>
#include <type_traits>
#include "rtc_base/checks.h"
#include "rtc_base/type_traits.h"
namespace rtc {
// tl;dr: rtc::ArrayView is the same thing as gsl::span from the Guideline
// Support Library.
//
// Many functions read from or write to arrays. The obvious way to do this is
// to use two arguments, a pointer to the first element and an element count:
//
// bool Contains17(const int* arr, size_t size) {
// for (size_t i = 0; i < size; ++i) {
// if (arr[i] == 17)
// return true;
// }
// return false;
// }
//
// This is flexible, since it doesn't matter how the array is stored (C array,
// std::vector, rtc::Buffer, ...), but it's error-prone because the caller has
// to correctly specify the array length:
//
// Contains17(arr, arraysize(arr)); // C array
// Contains17(arr.data(), arr.size()); // std::vector
// Contains17(arr, size); // pointer + size
// ...
//
// It's also kind of messy to have two separate arguments for what is
// conceptually a single thing.
//
// Enter rtc::ArrayView<T>. It contains a T pointer (to an array it doesn't
// own) and a count, and supports the basic things you'd expect, such as
// indexing and iteration. It allows us to write our function like this:
//
// bool Contains17(rtc::ArrayView<const int> arr) {
// for (auto e : arr) {
// if (e == 17)
// return true;
// }
// return false;
// }
//
// And even better, because a bunch of things will implicitly convert to
// ArrayView, we can call it like this:
//
// Contains17(arr); // C array
// Contains17(arr); // std::vector
// Contains17(rtc::ArrayView<int>(arr, size)); // pointer + size
// Contains17(nullptr); // nullptr -> empty ArrayView
// ...
//
// ArrayView<T> stores both a pointer and a size, but you may also use
// ArrayView<T, N>, which has a size that's fixed at compile time (which means
// it only has to store the pointer).
//
// One important point is that ArrayView<T> and ArrayView<const T> are
// different types, which allow and don't allow mutation of the array elements,
// respectively. The implicit conversions work just like you'd hope, so that
// e.g. vector<int> will convert to either ArrayView<int> or ArrayView<const
// int>, but const vector<int> will convert only to ArrayView<const int>.
// (ArrayView itself can be the source type in such conversions, so
// ArrayView<int> will convert to ArrayView<const int>.)
//
// Note: ArrayView is tiny (just a pointer and a count if variable-sized, just
// a pointer if fix-sized) and trivially copyable, so it's probably cheaper to
// pass it by value than by const reference.
namespace array_view_internal {
// Magic constant for indicating that the size of an ArrayView is variable
// instead of fixed.
enum : std::ptrdiff_t { kArrayViewVarSize = -4711 };
// Base class for ArrayViews of fixed nonzero size.
template <typename T, std::ptrdiff_t Size>
class ArrayViewBase {
static_assert(Size > 0, "ArrayView size must be variable or non-negative");
public:
ArrayViewBase(T* data, size_t size) : data_(data) {}
static constexpr size_t size() { return Size; }
static constexpr bool empty() { return false; }
T* data() const { return data_; }
protected:
static constexpr bool fixed_size() { return true; }
private:
T* data_;
};
// Specialized base class for ArrayViews of fixed zero size.
template <typename T>
class ArrayViewBase<T, 0> {
public:
explicit ArrayViewBase(T* data, size_t size) {}
static constexpr size_t size() { return 0; }
static constexpr bool empty() { return true; }
T* data() const { return nullptr; }
protected:
static constexpr bool fixed_size() { return true; }
};
// Specialized base class for ArrayViews of variable size.
template <typename T>
class ArrayViewBase<T, array_view_internal::kArrayViewVarSize> {
public:
ArrayViewBase(T* data, size_t size)
: data_(size == 0 ? nullptr : data), size_(size) {}
size_t size() const { return size_; }
bool empty() const { return size_ == 0; }
T* data() const { return data_; }
protected:
static constexpr bool fixed_size() { return false; }
private:
T* data_;
size_t size_;
};
} // namespace array_view_internal
template <typename T,
std::ptrdiff_t Size = array_view_internal::kArrayViewVarSize>
class ArrayView final : public array_view_internal::ArrayViewBase<T, Size> {
public:
using value_type = T;
using reference = value_type&;
using const_reference = const value_type&;
using pointer = value_type*;
using const_pointer = const value_type*;
using const_iterator = const T*;
// Construct an ArrayView from a pointer and a length.
template <typename U>
ArrayView(U* data, size_t size)
: array_view_internal::ArrayViewBase<T, Size>::ArrayViewBase(data, size) {
RTC_DCHECK_EQ(size == 0 ? nullptr : data, this->data());
RTC_DCHECK_EQ(size, this->size());
RTC_DCHECK_EQ(!this->data(),
this->size() == 0); // data is null iff size == 0.
}
// Construct an empty ArrayView. Note that fixed-size ArrayViews of size > 0
// cannot be empty.
ArrayView() : ArrayView(nullptr, 0) {}
ArrayView(std::nullptr_t) // NOLINT
: ArrayView() {}
ArrayView(std::nullptr_t, size_t size)
: ArrayView(static_cast<T*>(nullptr), size) {
static_assert(Size == 0 || Size == array_view_internal::kArrayViewVarSize,
"");
RTC_DCHECK_EQ(0, size);
}
// Construct an ArrayView from a C-style array.
template <typename U, size_t N>
ArrayView(U (&array)[N]) // NOLINT
: ArrayView(array, N) {
static_assert(Size == N || Size == array_view_internal::kArrayViewVarSize,
"Array size must match ArrayView size");
}
// (Only if size is fixed.) Construct a fixed size ArrayView<T, N> from a
// non-const std::array instance. For an ArrayView with variable size, the
// used ctor is ArrayView(U& u) instead.
template <typename U,
size_t N,
typename std::enable_if<
Size == static_cast<std::ptrdiff_t>(N)>::type* = nullptr>
ArrayView(std::array<U, N>& u) // NOLINT
: ArrayView(u.data(), u.size()) {}
// (Only if size is fixed.) Construct a fixed size ArrayView<T, N> where T is
// const from a const(expr) std::array instance. For an ArrayView with
// variable size, the used ctor is ArrayView(U& u) instead.
template <typename U,
size_t N,
typename std::enable_if<
Size == static_cast<std::ptrdiff_t>(N)>::type* = nullptr>
ArrayView(const std::array<U, N>& u) // NOLINT
: ArrayView(u.data(), u.size()) {}
// (Only if size is fixed.) Construct an ArrayView from any type U that has a
// static constexpr size() method whose return value is equal to Size, and a
// data() method whose return value converts implicitly to T*. In particular,
// this means we allow conversion from ArrayView<T, N> to ArrayView<const T,
// N>, but not the other way around. We also don't allow conversion from
// ArrayView<T> to ArrayView<T, N>, or from ArrayView<T, M> to ArrayView<T,
// N> when M != N.
template <
typename U,
typename std::enable_if<Size != array_view_internal::kArrayViewVarSize &&
HasDataAndSize<U, T>::value>::type* = nullptr>
ArrayView(U& u) // NOLINT
: ArrayView(u.data(), u.size()) {
static_assert(U::size() == Size, "Sizes must match exactly");
}
template <
typename U,
typename std::enable_if<Size != array_view_internal::kArrayViewVarSize &&
HasDataAndSize<U, T>::value>::type* = nullptr>
ArrayView(const U& u) // NOLINT(runtime/explicit)
: ArrayView(u.data(), u.size()) {
static_assert(U::size() == Size, "Sizes must match exactly");
}
// (Only if size is variable.) Construct an ArrayView from any type U that
// has a size() method whose return value converts implicitly to size_t, and
// a data() method whose return value converts implicitly to T*. In
// particular, this means we allow conversion from ArrayView<T> to
// ArrayView<const T>, but not the other way around. Other allowed
// conversions include
// ArrayView<T, N> to ArrayView<T> or ArrayView<const T>,
// std::vector<T> to ArrayView<T> or ArrayView<const T>,
// const std::vector<T> to ArrayView<const T>,
// rtc::Buffer to ArrayView<uint8_t> or ArrayView<const uint8_t>, and
// const rtc::Buffer to ArrayView<const uint8_t>.
template <
typename U,
typename std::enable_if<Size == array_view_internal::kArrayViewVarSize &&
HasDataAndSize<U, T>::value>::type* = nullptr>
ArrayView(U& u) // NOLINT
: ArrayView(u.data(), u.size()) {}
template <
typename U,
typename std::enable_if<Size == array_view_internal::kArrayViewVarSize &&
HasDataAndSize<U, T>::value>::type* = nullptr>
ArrayView(const U& u) // NOLINT(runtime/explicit)
: ArrayView(u.data(), u.size()) {}
// Indexing and iteration. These allow mutation even if the ArrayView is
// const, because the ArrayView doesn't own the array. (To prevent mutation,
// use a const element type.)
T& operator[](size_t idx) const {
RTC_DCHECK_LT(idx, this->size());
RTC_DCHECK(this->data());
return this->data()[idx];
}
T* begin() const { return this->data(); }
T* end() const { return this->data() + this->size(); }
const T* cbegin() const { return this->data(); }
const T* cend() const { return this->data() + this->size(); }
std::reverse_iterator<T*> rbegin() const {
return std::make_reverse_iterator(end());
}
std::reverse_iterator<T*> rend() const {
return std::make_reverse_iterator(begin());
}
std::reverse_iterator<const T*> crbegin() const {
return std::make_reverse_iterator(cend());
}
std::reverse_iterator<const T*> crend() const {
return std::make_reverse_iterator(cbegin());
}
ArrayView<T> subview(size_t offset, size_t size) const {
return offset < this->size()
? ArrayView<T>(this->data() + offset,
std::min(size, this->size() - offset))
: ArrayView<T>();
}
ArrayView<T> subview(size_t offset) const {
return subview(offset, this->size());
}
};
// Comparing two ArrayViews compares their (pointer,size) pairs; it does *not*
// dereference the pointers.
template <typename T, std::ptrdiff_t Size1, std::ptrdiff_t Size2>
bool operator==(const ArrayView<T, Size1>& a, const ArrayView<T, Size2>& b) {
return a.data() == b.data() && a.size() == b.size();
}
template <typename T, std::ptrdiff_t Size1, std::ptrdiff_t Size2>
bool operator!=(const ArrayView<T, Size1>& a, const ArrayView<T, Size2>& b) {
return !(a == b);
}
// Variable-size ArrayViews are the size of two pointers; fixed-size ArrayViews
// are the size of one pointer. (And as a special case, fixed-size ArrayViews
// of size 0 require no storage.)
static_assert(sizeof(ArrayView<int>) == 2 * sizeof(int*), "");
static_assert(sizeof(ArrayView<int, 17>) == sizeof(int*), "");
static_assert(std::is_empty<ArrayView<int, 0>>::value, "");
template <typename T>
inline ArrayView<T> MakeArrayView(T* data, size_t size) {
return ArrayView<T>(data, size);
}
// Only for primitive types that have the same size and aligment.
// Allow reinterpret cast of the array view to another primitive type of the
// same size.
// Template arguments order is (U, T, Size) to allow deduction of the template
// arguments in client calls: reinterpret_array_view<target_type>(array_view).
template <typename U, typename T, std::ptrdiff_t Size>
inline ArrayView<U, Size> reinterpret_array_view(ArrayView<T, Size> view) {
static_assert(sizeof(U) == sizeof(T) && alignof(U) == alignof(T),
"ArrayView reinterpret_cast is only supported for casting "
"between views that represent the same chunk of memory.");
static_assert(
std::is_fundamental<T>::value && std::is_fundamental<U>::value,
"ArrayView reinterpret_cast is only supported for casting between "
"fundamental types.");
return ArrayView<U, Size>(reinterpret_cast<U*>(view.data()), view.size());
}
} // namespace rtc
#endif // API_ARRAY_VIEW_H_

View File

@ -0,0 +1,211 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/audio_processing.h"
#include <string>
#include "rtc_base/checks.h"
#include "rtc_base/strings/string_builder.h"
namespace webrtc {
namespace {
using Agc1Config = AudioProcessing::Config::GainController1;
using Agc2Config = AudioProcessing::Config::GainController2;
std::string NoiseSuppressionLevelToString(
const AudioProcessing::Config::NoiseSuppression::Level& level) {
switch (level) {
case AudioProcessing::Config::NoiseSuppression::Level::kLow:
return "Low";
case AudioProcessing::Config::NoiseSuppression::Level::kModerate:
return "Moderate";
case AudioProcessing::Config::NoiseSuppression::Level::kHigh:
return "High";
case AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh:
return "VeryHigh";
}
RTC_CHECK_NOTREACHED();
}
std::string GainController1ModeToString(const Agc1Config::Mode& mode) {
switch (mode) {
case Agc1Config::Mode::kAdaptiveAnalog:
return "AdaptiveAnalog";
case Agc1Config::Mode::kAdaptiveDigital:
return "AdaptiveDigital";
case Agc1Config::Mode::kFixedDigital:
return "FixedDigital";
}
RTC_CHECK_NOTREACHED();
}
} // namespace
constexpr int AudioProcessing::kNativeSampleRatesHz[];
void CustomProcessing::SetRuntimeSetting(
AudioProcessing::RuntimeSetting setting) {}
bool Agc1Config::operator==(const Agc1Config& rhs) const {
const auto& analog_lhs = analog_gain_controller;
const auto& analog_rhs = rhs.analog_gain_controller;
return enabled == rhs.enabled && mode == rhs.mode &&
target_level_dbfs == rhs.target_level_dbfs &&
compression_gain_db == rhs.compression_gain_db &&
enable_limiter == rhs.enable_limiter &&
analog_lhs.enabled == analog_rhs.enabled &&
analog_lhs.startup_min_volume == analog_rhs.startup_min_volume &&
analog_lhs.clipped_level_min == analog_rhs.clipped_level_min &&
analog_lhs.enable_digital_adaptive ==
analog_rhs.enable_digital_adaptive &&
analog_lhs.clipped_level_step == analog_rhs.clipped_level_step &&
analog_lhs.clipped_ratio_threshold ==
analog_rhs.clipped_ratio_threshold &&
analog_lhs.clipped_wait_frames == analog_rhs.clipped_wait_frames &&
analog_lhs.clipping_predictor.mode ==
analog_rhs.clipping_predictor.mode &&
analog_lhs.clipping_predictor.window_length ==
analog_rhs.clipping_predictor.window_length &&
analog_lhs.clipping_predictor.reference_window_length ==
analog_rhs.clipping_predictor.reference_window_length &&
analog_lhs.clipping_predictor.reference_window_delay ==
analog_rhs.clipping_predictor.reference_window_delay &&
analog_lhs.clipping_predictor.clipping_threshold ==
analog_rhs.clipping_predictor.clipping_threshold &&
analog_lhs.clipping_predictor.crest_factor_margin ==
analog_rhs.clipping_predictor.crest_factor_margin &&
analog_lhs.clipping_predictor.use_predicted_step ==
analog_rhs.clipping_predictor.use_predicted_step;
}
bool Agc2Config::AdaptiveDigital::operator==(
const Agc2Config::AdaptiveDigital& rhs) const {
return enabled == rhs.enabled && headroom_db == rhs.headroom_db &&
max_gain_db == rhs.max_gain_db &&
initial_gain_db == rhs.initial_gain_db &&
max_gain_change_db_per_second == rhs.max_gain_change_db_per_second &&
max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs;
}
bool Agc2Config::InputVolumeController::operator==(
const Agc2Config::InputVolumeController& rhs) const {
return enabled == rhs.enabled;
}
bool Agc2Config::operator==(const Agc2Config& rhs) const {
return enabled == rhs.enabled &&
fixed_digital.gain_db == rhs.fixed_digital.gain_db &&
adaptive_digital == rhs.adaptive_digital &&
input_volume_controller == rhs.input_volume_controller;
}
bool AudioProcessing::Config::CaptureLevelAdjustment::operator==(
const AudioProcessing::Config::CaptureLevelAdjustment& rhs) const {
return enabled == rhs.enabled && pre_gain_factor == rhs.pre_gain_factor &&
post_gain_factor == rhs.post_gain_factor &&
analog_mic_gain_emulation == rhs.analog_mic_gain_emulation;
}
bool AudioProcessing::Config::CaptureLevelAdjustment::AnalogMicGainEmulation::
operator==(const AudioProcessing::Config::CaptureLevelAdjustment::
AnalogMicGainEmulation& rhs) const {
return enabled == rhs.enabled && initial_level == rhs.initial_level;
}
std::string AudioProcessing::Config::ToString() const {
char buf[2048];
rtc::SimpleStringBuilder builder(buf);
builder << "AudioProcessing::Config{ "
"pipeline: { "
"maximum_internal_processing_rate: "
<< pipeline.maximum_internal_processing_rate
<< ", multi_channel_render: " << pipeline.multi_channel_render
<< ", multi_channel_capture: " << pipeline.multi_channel_capture
<< " }, pre_amplifier: { enabled: " << pre_amplifier.enabled
<< ", fixed_gain_factor: " << pre_amplifier.fixed_gain_factor
<< " },capture_level_adjustment: { enabled: "
<< capture_level_adjustment.enabled
<< ", pre_gain_factor: " << capture_level_adjustment.pre_gain_factor
<< ", post_gain_factor: " << capture_level_adjustment.post_gain_factor
<< ", analog_mic_gain_emulation: { enabled: "
<< capture_level_adjustment.analog_mic_gain_emulation.enabled
<< ", initial_level: "
<< capture_level_adjustment.analog_mic_gain_emulation.initial_level
<< " }}, high_pass_filter: { enabled: " << high_pass_filter.enabled
<< " }, echo_canceller: { enabled: " << echo_canceller.enabled
<< ", mobile_mode: " << echo_canceller.mobile_mode
<< ", enforce_high_pass_filtering: "
<< echo_canceller.enforce_high_pass_filtering
<< " }, noise_suppression: { enabled: " << noise_suppression.enabled
<< ", level: "
<< NoiseSuppressionLevelToString(noise_suppression.level)
<< " }, transient_suppression: { enabled: "
<< transient_suppression.enabled
<< " }, gain_controller1: { enabled: " << gain_controller1.enabled
<< ", mode: " << GainController1ModeToString(gain_controller1.mode)
<< ", target_level_dbfs: " << gain_controller1.target_level_dbfs
<< ", compression_gain_db: " << gain_controller1.compression_gain_db
<< ", enable_limiter: " << gain_controller1.enable_limiter
<< ", analog_gain_controller { enabled: "
<< gain_controller1.analog_gain_controller.enabled
<< ", startup_min_volume: "
<< gain_controller1.analog_gain_controller.startup_min_volume
<< ", clipped_level_min: "
<< gain_controller1.analog_gain_controller.clipped_level_min
<< ", enable_digital_adaptive: "
<< gain_controller1.analog_gain_controller.enable_digital_adaptive
<< ", clipped_level_step: "
<< gain_controller1.analog_gain_controller.clipped_level_step
<< ", clipped_ratio_threshold: "
<< gain_controller1.analog_gain_controller.clipped_ratio_threshold
<< ", clipped_wait_frames: "
<< gain_controller1.analog_gain_controller.clipped_wait_frames
<< ", clipping_predictor: { enabled: "
<< gain_controller1.analog_gain_controller.clipping_predictor.enabled
<< ", mode: "
<< gain_controller1.analog_gain_controller.clipping_predictor.mode
<< ", window_length: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.window_length
<< ", reference_window_length: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.reference_window_length
<< ", reference_window_delay: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.reference_window_delay
<< ", clipping_threshold: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.clipping_threshold
<< ", crest_factor_margin: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.crest_factor_margin
<< ", use_predicted_step: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.use_predicted_step
<< " }}}, gain_controller2: { enabled: " << gain_controller2.enabled
<< ", fixed_digital: { gain_db: "
<< gain_controller2.fixed_digital.gain_db
<< " }, adaptive_digital: { enabled: "
<< gain_controller2.adaptive_digital.enabled
<< ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
<< ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
<< ", initial_gain_db: "
<< gain_controller2.adaptive_digital.initial_gain_db
<< ", max_gain_change_db_per_second: "
<< gain_controller2.adaptive_digital.max_gain_change_db_per_second
<< ", max_output_noise_level_dbfs: "
<< gain_controller2.adaptive_digital.max_output_noise_level_dbfs
<< " }, input_volume_control : { enabled "
<< gain_controller2.input_volume_controller.enabled << "}}";
return builder.str();
}
} // namespace webrtc

View File

@ -0,0 +1,944 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_AUDIO_PROCESSING_H_
#define API_AUDIO_AUDIO_PROCESSING_H_
// MSVC++ requires this to be set before any other includes to get M_PI.
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES
#endif
#include <math.h>
#include <stddef.h> // size_t
#include <stdio.h> // FILE
#include <string.h>
#include <array>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/audio/audio_processing_statistics.h"
#include "api/audio/echo_control.h"
#include "api/ref_count.h"
#include "api/scoped_refptr.h"
#include "api/task_queue/task_queue_base.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
class AecDump;
class AudioBuffer;
class StreamConfig;
class ProcessingConfig;
class EchoDetector;
// The Audio Processing Module (APM) provides a collection of voice processing
// components designed for real-time communications software.
//
// APM operates on two audio streams on a frame-by-frame basis. Frames of the
// primary stream, on which all processing is applied, are passed to
// `ProcessStream()`. Frames of the reverse direction stream are passed to
// `ProcessReverseStream()`. On the client-side, this will typically be the
// near-end (capture) and far-end (render) streams, respectively. APM should be
// placed in the signal chain as close to the audio hardware abstraction layer
// (HAL) as possible.
//
// On the server-side, the reverse stream will normally not be used, with
// processing occurring on each incoming stream.
//
// Component interfaces follow a similar pattern and are accessed through
// corresponding getters in APM. All components are disabled at create-time,
// with default settings that are recommended for most situations. New settings
// can be applied without enabling a component. Enabling a component triggers
// memory allocation and initialization to allow it to start processing the
// streams.
//
// Thread safety is provided with the following assumptions to reduce locking
// overhead:
// 1. The stream getters and setters are called from the same thread as
// ProcessStream(). More precisely, stream functions are never called
// concurrently with ProcessStream().
// 2. Parameter getters are never called concurrently with the corresponding
// setter.
//
// APM accepts only linear PCM audio data in chunks of ~10 ms (see
// AudioProcessing::GetFrameSize() for details) and sample rates ranging from
// 8000 Hz to 384000 Hz. The int16 interfaces use interleaved data, while the
// float interfaces use deinterleaved data.
//
// Usage example, omitting error checking:
// rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
//
// AudioProcessing::Config config;
// config.echo_canceller.enabled = true;
// config.echo_canceller.mobile_mode = false;
//
// config.gain_controller1.enabled = true;
// config.gain_controller1.mode =
// AudioProcessing::Config::GainController1::kAdaptiveAnalog;
// config.gain_controller1.analog_level_minimum = 0;
// config.gain_controller1.analog_level_maximum = 255;
//
// config.gain_controller2.enabled = true;
//
// config.high_pass_filter.enabled = true;
//
// apm->ApplyConfig(config)
//
// // Start a voice call...
//
// // ... Render frame arrives bound for the audio HAL ...
// apm->ProcessReverseStream(render_frame);
//
// // ... Capture frame arrives from the audio HAL ...
// // Call required set_stream_ functions.
// apm->set_stream_delay_ms(delay_ms);
// apm->set_stream_analog_level(analog_level);
//
// apm->ProcessStream(capture_frame);
//
// // Call required stream_ functions.
// analog_level = apm->recommended_stream_analog_level();
// has_voice = apm->stream_has_voice();
//
// // Repeat render and capture processing for the duration of the call...
// // Start a new call...
// apm->Initialize();
//
// // Close the application...
// apm.reset();
//
class RTC_EXPORT AudioProcessing : public RefCountInterface {
public:
// The struct below constitutes the new parameter scheme for the audio
// processing. It is being introduced gradually and until it is fully
// introduced, it is prone to change.
// TODO(peah): Remove this comment once the new config scheme is fully rolled
// out.
//
// The parameters and behavior of the audio processing module are controlled
// by changing the default values in the AudioProcessing::Config struct.
// The config is applied by passing the struct to the ApplyConfig method.
//
// This config is intended to be used during setup, and to enable/disable
// top-level processing effects. Use during processing may cause undesired
// submodule resets, affecting the audio quality. Use the RuntimeSetting
// construct for runtime configuration.
struct RTC_EXPORT Config {
// Sets the properties of the audio processing pipeline.
struct RTC_EXPORT Pipeline {
// Ways to downmix a multi-channel track to mono.
enum class DownmixMethod {
kAverageChannels, // Average across channels.
kUseFirstChannel // Use the first channel.
};
// Maximum allowed processing rate used internally. May only be set to
// 32000 or 48000 and any differing values will be treated as 48000.
int maximum_internal_processing_rate = 48000;
// Allow multi-channel processing of render audio.
bool multi_channel_render = false;
// Allow multi-channel processing of capture audio when AEC3 is active
// or a custom AEC is injected..
bool multi_channel_capture = false;
// Indicates how to downmix multi-channel capture audio to mono (when
// needed).
DownmixMethod capture_downmix_method = DownmixMethod::kAverageChannels;
} pipeline;
// Enabled the pre-amplifier. It amplifies the capture signal
// before any other processing is done.
// TODO(webrtc:5298): Deprecate and use the pre-gain functionality in
// capture_level_adjustment instead.
struct PreAmplifier {
bool enabled = false;
float fixed_gain_factor = 1.0f;
} pre_amplifier;
// Functionality for general level adjustment in the capture pipeline. This
// should not be used together with the legacy PreAmplifier functionality.
struct CaptureLevelAdjustment {
bool operator==(const CaptureLevelAdjustment& rhs) const;
bool operator!=(const CaptureLevelAdjustment& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
// The `pre_gain_factor` scales the signal before any processing is done.
float pre_gain_factor = 1.0f;
// The `post_gain_factor` scales the signal after all processing is done.
float post_gain_factor = 1.0f;
struct AnalogMicGainEmulation {
bool operator==(const AnalogMicGainEmulation& rhs) const;
bool operator!=(const AnalogMicGainEmulation& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
// Initial analog gain level to use for the emulated analog gain. Must
// be in the range [0...255].
int initial_level = 255;
} analog_mic_gain_emulation;
} capture_level_adjustment;
struct HighPassFilter {
bool enabled = false;
bool apply_in_full_band = true;
} high_pass_filter;
struct EchoCanceller {
bool enabled = false;
bool mobile_mode = false;
bool export_linear_aec_output = false;
// Enforce the highpass filter to be on (has no effect for the mobile
// mode).
bool enforce_high_pass_filtering = true;
} echo_canceller;
// Enables background noise suppression.
struct NoiseSuppression {
bool enabled = false;
enum Level { kLow, kModerate, kHigh, kVeryHigh };
Level level = kModerate;
bool analyze_linear_aec_output_when_available = false;
} noise_suppression;
// TODO(bugs.webrtc.org/357281131): Deprecated. Stop using and remove.
// Enables transient suppression.
struct TransientSuppression {
bool enabled = false;
} transient_suppression;
// Enables automatic gain control (AGC) functionality.
// The automatic gain control (AGC) component brings the signal to an
// appropriate range. This is done by applying a digital gain directly and,
// in the analog mode, prescribing an analog gain to be applied at the audio
// HAL.
// Recommended to be enabled on the client-side.
struct RTC_EXPORT GainController1 {
bool operator==(const GainController1& rhs) const;
bool operator!=(const GainController1& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
enum Mode {
// Adaptive mode intended for use if an analog volume control is
// available on the capture device. It will require the user to provide
// coupling between the OS mixer controls and AGC through the
// stream_analog_level() functions.
// It consists of an analog gain prescription for the audio device and a
// digital compression stage.
kAdaptiveAnalog,
// Adaptive mode intended for situations in which an analog volume
// control is unavailable. It operates in a similar fashion to the
// adaptive analog mode, but with scaling instead applied in the digital
// domain. As with the analog mode, it additionally uses a digital
// compression stage.
kAdaptiveDigital,
// Fixed mode which enables only the digital compression stage also used
// by the two adaptive modes.
// It is distinguished from the adaptive modes by considering only a
// short time-window of the input signal. It applies a fixed gain
// through most of the input level range, and compresses (gradually
// reduces gain with increasing level) the input signal at higher
// levels. This mode is preferred on embedded devices where the capture
// signal level is predictable, so that a known gain can be applied.
kFixedDigital
};
Mode mode = kAdaptiveAnalog;
// Sets the target peak level (or envelope) of the AGC in dBFs (decibels
// from digital full-scale). The convention is to use positive values. For
// instance, passing in a value of 3 corresponds to -3 dBFs, or a target
// level 3 dB below full-scale. Limited to [0, 31].
int target_level_dbfs = 3;
// Sets the maximum gain the digital compression stage may apply, in dB. A
// higher number corresponds to greater compression, while a value of 0
// will leave the signal uncompressed. Limited to [0, 90].
// For updates after APM setup, use a RuntimeSetting instead.
int compression_gain_db = 9;
// When enabled, the compression stage will hard limit the signal to the
// target level. Otherwise, the signal will be compressed but not limited
// above the target level.
bool enable_limiter = true;
// Enables the analog gain controller functionality.
struct AnalogGainController {
bool enabled = true;
// TODO(bugs.webrtc.org/7494): Deprecated. Stop using and remove.
int startup_min_volume = 0;
// Lowest analog microphone level that will be applied in response to
// clipping.
int clipped_level_min = 70;
// If true, an adaptive digital gain is applied.
bool enable_digital_adaptive = true;
// Amount the microphone level is lowered with every clipping event.
// Limited to (0, 255].
int clipped_level_step = 15;
// Proportion of clipped samples required to declare a clipping event.
// Limited to (0.f, 1.f).
float clipped_ratio_threshold = 0.1f;
// Time in frames to wait after a clipping event before checking again.
// Limited to values higher than 0.
int clipped_wait_frames = 300;
// Enables clipping prediction functionality.
struct ClippingPredictor {
bool enabled = false;
enum Mode {
// Clipping event prediction mode with fixed step estimation.
kClippingEventPrediction,
// Clipped peak estimation mode with adaptive step estimation.
kAdaptiveStepClippingPeakPrediction,
// Clipped peak estimation mode with fixed step estimation.
kFixedStepClippingPeakPrediction,
};
Mode mode = kClippingEventPrediction;
// Number of frames in the sliding analysis window.
int window_length = 5;
// Number of frames in the sliding reference window.
int reference_window_length = 5;
// Reference window delay (unit: number of frames).
int reference_window_delay = 5;
// Clipping prediction threshold (dBFS).
float clipping_threshold = -1.0f;
// Crest factor drop threshold (dB).
float crest_factor_margin = 3.0f;
// If true, the recommended clipped level step is used to modify the
// analog gain. Otherwise, the predictor runs without affecting the
// analog gain.
bool use_predicted_step = true;
} clipping_predictor;
} analog_gain_controller;
} gain_controller1;
// Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which
// replaces the AGC sub-module parametrized by `gain_controller1`.
// AGC2 brings the captured audio signal to the desired level by combining
// three different controllers (namely, input volume controller, adapative
// digital controller and fixed digital controller) and a limiter.
// TODO(bugs.webrtc.org:7494): Name `GainController` when AGC1 removed.
struct RTC_EXPORT GainController2 {
bool operator==(const GainController2& rhs) const;
bool operator!=(const GainController2& rhs) const {
return !(*this == rhs);
}
// AGC2 must be created if and only if `enabled` is true.
bool enabled = false;
// Parameters for the input volume controller, which adjusts the input
// volume applied when the audio is captured (e.g., microphone volume on
// a soundcard, input volume on HAL).
struct InputVolumeController {
bool operator==(const InputVolumeController& rhs) const;
bool operator!=(const InputVolumeController& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
} input_volume_controller;
// Parameters for the adaptive digital controller, which adjusts and
// applies a digital gain after echo cancellation and after noise
// suppression.
struct RTC_EXPORT AdaptiveDigital {
bool operator==(const AdaptiveDigital& rhs) const;
bool operator!=(const AdaptiveDigital& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
float headroom_db = 5.0f;
float max_gain_db = 50.0f;
float initial_gain_db = 15.0f;
float max_gain_change_db_per_second = 6.0f;
float max_output_noise_level_dbfs = -50.0f;
} adaptive_digital;
// Parameters for the fixed digital controller, which applies a fixed
// digital gain after the adaptive digital controller and before the
// limiter.
struct FixedDigital {
// By setting `gain_db` to a value greater than zero, the limiter can be
// turned into a compressor that first applies a fixed gain.
float gain_db = 0.0f;
} fixed_digital;
} gain_controller2;
std::string ToString() const;
};
// Specifies the properties of a setting to be passed to AudioProcessing at
// runtime.
class RuntimeSetting {
public:
enum class Type {
kNotSpecified,
kCapturePreGain,
kCaptureCompressionGain,
kCaptureFixedPostGain,
kPlayoutVolumeChange,
kCustomRenderProcessingRuntimeSetting,
kPlayoutAudioDeviceChange,
kCapturePostGain,
kCaptureOutputUsed
};
// Play-out audio device properties.
struct PlayoutAudioDeviceInfo {
int id; // Identifies the audio device.
int max_volume; // Maximum play-out volume.
};
RuntimeSetting() : type_(Type::kNotSpecified), value_(0.0f) {}
~RuntimeSetting() = default;
static RuntimeSetting CreateCapturePreGain(float gain) {
return {Type::kCapturePreGain, gain};
}
static RuntimeSetting CreateCapturePostGain(float gain) {
return {Type::kCapturePostGain, gain};
}
// Corresponds to Config::GainController1::compression_gain_db, but for
// runtime configuration.
static RuntimeSetting CreateCompressionGainDb(int gain_db) {
RTC_DCHECK_GE(gain_db, 0);
RTC_DCHECK_LE(gain_db, 90);
return {Type::kCaptureCompressionGain, static_cast<float>(gain_db)};
}
// Corresponds to Config::GainController2::fixed_digital::gain_db, but for
// runtime configuration.
static RuntimeSetting CreateCaptureFixedPostGain(float gain_db) {
RTC_DCHECK_GE(gain_db, 0.0f);
RTC_DCHECK_LE(gain_db, 90.0f);
return {Type::kCaptureFixedPostGain, gain_db};
}
// Creates a runtime setting to notify play-out (aka render) audio device
// changes.
static RuntimeSetting CreatePlayoutAudioDeviceChange(
PlayoutAudioDeviceInfo audio_device) {
return {Type::kPlayoutAudioDeviceChange, audio_device};
}
// Creates a runtime setting to notify play-out (aka render) volume changes.
// `volume` is the unnormalized volume, the maximum of which
static RuntimeSetting CreatePlayoutVolumeChange(int volume) {
return {Type::kPlayoutVolumeChange, volume};
}
static RuntimeSetting CreateCustomRenderSetting(float payload) {
return {Type::kCustomRenderProcessingRuntimeSetting, payload};
}
static RuntimeSetting CreateCaptureOutputUsedSetting(
bool capture_output_used) {
return {Type::kCaptureOutputUsed, capture_output_used};
}
Type type() const { return type_; }
// Getters do not return a value but instead modify the argument to protect
// from implicit casting.
void GetFloat(float* value) const {
RTC_DCHECK(value);
*value = value_.float_value;
}
void GetInt(int* value) const {
RTC_DCHECK(value);
*value = value_.int_value;
}
void GetBool(bool* value) const {
RTC_DCHECK(value);
*value = value_.bool_value;
}
void GetPlayoutAudioDeviceInfo(PlayoutAudioDeviceInfo* value) const {
RTC_DCHECK(value);
*value = value_.playout_audio_device_info;
}
private:
RuntimeSetting(Type id, float value) : type_(id), value_(value) {}
RuntimeSetting(Type id, int value) : type_(id), value_(value) {}
RuntimeSetting(Type id, PlayoutAudioDeviceInfo value)
: type_(id), value_(value) {}
Type type_;
union U {
U() {}
U(int value) : int_value(value) {}
U(float value) : float_value(value) {}
U(PlayoutAudioDeviceInfo value) : playout_audio_device_info(value) {}
float float_value;
int int_value;
bool bool_value;
PlayoutAudioDeviceInfo playout_audio_device_info;
} value_;
};
~AudioProcessing() override {}
// Initializes internal states, while retaining all user settings. This
// should be called before beginning to process a new audio stream. However,
// it is not necessary to call before processing the first stream after
// creation.
//
// It is also not necessary to call if the audio parameters (sample
// rate and number of channels) have changed. Passing updated parameters
// directly to `ProcessStream()` and `ProcessReverseStream()` is permissible.
// If the parameters are known at init-time though, they may be provided.
// TODO(webrtc:5298): Change to return void.
virtual int Initialize() = 0;
// The int16 interfaces require:
// - only `NativeRate`s be used
// - that the input, output and reverse rates must match
// - that `processing_config.output_stream()` matches
// `processing_config.input_stream()`.
//
// The float interfaces accept arbitrary rates and support differing input and
// output layouts, but the output must have either one channel or the same
// number of channels as the input.
virtual int Initialize(const ProcessingConfig& processing_config) = 0;
// TODO(peah): This method is a temporary solution used to take control
// over the parameters in the audio processing module and is likely to change.
virtual void ApplyConfig(const Config& config) = 0;
// TODO(ajm): Only intended for internal use. Make private and friend the
// necessary classes?
virtual int proc_sample_rate_hz() const = 0;
virtual int proc_split_sample_rate_hz() const = 0;
virtual size_t num_input_channels() const = 0;
virtual size_t num_proc_channels() const = 0;
virtual size_t num_output_channels() const = 0;
virtual size_t num_reverse_channels() const = 0;
// Set to true when the output of AudioProcessing will be muted or in some
// other way not used. Ideally, the captured audio would still be processed,
// but some components may change behavior based on this information.
// Default false. This method takes a lock. To achieve this in a lock-less
// manner the PostRuntimeSetting can instead be used.
virtual void set_output_will_be_muted(bool muted) = 0;
// Enqueues a runtime setting.
virtual void SetRuntimeSetting(RuntimeSetting setting) = 0;
// Enqueues a runtime setting. Returns a bool indicating whether the
// enqueueing was successfull.
virtual bool PostRuntimeSetting(RuntimeSetting setting) = 0;
// Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio as
// specified in `input_config` and `output_config`. `src` and `dest` may use
// the same memory, if desired.
virtual int ProcessStream(const int16_t* const src,
const StreamConfig& input_config,
const StreamConfig& output_config,
int16_t* const dest) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
// `src` points to a channel buffer, arranged according to `input_stream`. At
// output, the channels will be arranged according to `output_stream` in
// `dest`.
//
// The output must have one channel or as many channels as the input. `src`
// and `dest` may use the same memory, if desired.
virtual int ProcessStream(const float* const* src,
const StreamConfig& input_config,
const StreamConfig& output_config,
float* const* dest) = 0;
// Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio for
// the reverse direction audio stream as specified in `input_config` and
// `output_config`. `src` and `dest` may use the same memory, if desired.
virtual int ProcessReverseStream(const int16_t* const src,
const StreamConfig& input_config,
const StreamConfig& output_config,
int16_t* const dest) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
// `data` points to a channel buffer, arranged according to `reverse_config`.
virtual int ProcessReverseStream(const float* const* src,
const StreamConfig& input_config,
const StreamConfig& output_config,
float* const* dest) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element
// of `data` points to a channel buffer, arranged according to
// `reverse_config`.
virtual int AnalyzeReverseStream(const float* const* data,
const StreamConfig& reverse_config) = 0;
// Returns the most recently produced ~10 ms of the linear AEC output at a
// rate of 16 kHz. If there is more than one capture channel, a mono
// representation of the input is returned. Returns true/false to indicate
// whether an output returned.
virtual bool GetLinearAecOutput(
rtc::ArrayView<std::array<float, 160>> linear_output) const = 0;
// This must be called prior to ProcessStream() if and only if adaptive analog
// gain control is enabled, to pass the current analog level from the audio
// HAL. Must be within the range [0, 255].
virtual void set_stream_analog_level(int level) = 0;
// When an analog mode is set, this should be called after
// `set_stream_analog_level()` and `ProcessStream()` to obtain the recommended
// new analog level for the audio HAL. It is the user's responsibility to
// apply this level.
virtual int recommended_stream_analog_level() const = 0;
// This must be called if and only if echo processing is enabled.
//
// Sets the `delay` in ms between ProcessReverseStream() receiving a far-end
// frame and ProcessStream() receiving a near-end frame containing the
// corresponding echo. On the client-side this can be expressed as
// delay = (t_render - t_analyze) + (t_process - t_capture)
// where,
// - t_analyze is the time a frame is passed to ProcessReverseStream() and
// t_render is the time the first sample of the same frame is rendered by
// the audio hardware.
// - t_capture is the time the first sample of a frame is captured by the
// audio hardware and t_process is the time the same frame is passed to
// ProcessStream().
virtual int set_stream_delay_ms(int delay) = 0;
virtual int stream_delay_ms() const = 0;
// Call to signal that a key press occurred (true) or did not occur (false)
// with this chunk of audio.
virtual void set_stream_key_pressed(bool key_pressed) = 0;
// Creates and attaches an webrtc::AecDump for recording debugging
// information.
// The `worker_queue` may not be null and must outlive the created
// AecDump instance. |max_log_size_bytes == -1| means the log size
// will be unlimited. `handle` may not be null. The AecDump takes
// responsibility for `handle` and closes it in the destructor. A
// return value of true indicates that the file has been
// sucessfully opened, while a value of false indicates that
// opening the file failed.
virtual bool CreateAndAttachAecDump(
absl::string_view file_name,
int64_t max_log_size_bytes,
absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
virtual bool CreateAndAttachAecDump(
absl::Nonnull<FILE*> handle,
int64_t max_log_size_bytes,
absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
// TODO(webrtc:5298) Deprecated variant.
// Attaches provided webrtc::AecDump for recording debugging
// information. Log file and maximum file size logic is supposed to
// be handled by implementing instance of AecDump. Calling this
// method when another AecDump is attached resets the active AecDump
// with a new one. This causes the d-tor of the earlier AecDump to
// be called. The d-tor call may block until all pending logging
// tasks are completed.
virtual void AttachAecDump(std::unique_ptr<AecDump> aec_dump) = 0;
// If no AecDump is attached, this has no effect. If an AecDump is
// attached, it's destructor is called. The d-tor may block until
// all pending logging tasks are completed.
virtual void DetachAecDump() = 0;
// Get audio processing statistics.
virtual AudioProcessingStats GetStatistics() = 0;
// TODO(webrtc:5298) Deprecated variant. The `has_remote_tracks` argument
// should be set if there are active remote tracks (this would usually be true
// during a call). If there are no remote tracks some of the stats will not be
// set by AudioProcessing, because they only make sense if there is at least
// one remote track.
virtual AudioProcessingStats GetStatistics(bool has_remote_tracks) = 0;
// Returns the last applied configuration.
virtual AudioProcessing::Config GetConfig() const = 0;
enum Error {
// Fatal errors.
kNoError = 0,
kUnspecifiedError = -1,
kCreationFailedError = -2,
kUnsupportedComponentError = -3,
kUnsupportedFunctionError = -4,
kNullPointerError = -5,
kBadParameterError = -6,
kBadSampleRateError = -7,
kBadDataLengthError = -8,
kBadNumberChannelsError = -9,
kFileError = -10,
kStreamParameterNotSetError = -11,
kNotEnabledError = -12,
// Warnings are non-fatal.
// This results when a set_stream_ parameter is out of range. Processing
// will continue, but the parameter may have been truncated.
kBadStreamParameterWarning = -13
};
// Native rates supported by the integer interfaces.
enum NativeRate {
kSampleRate8kHz = 8000,
kSampleRate16kHz = 16000,
kSampleRate32kHz = 32000,
kSampleRate48kHz = 48000
};
// TODO(kwiberg): We currently need to support a compiler (Visual C++) that
// complains if we don't explicitly state the size of the array here. Remove
// the size when that's no longer the case.
static constexpr int kNativeSampleRatesHz[4] = {
kSampleRate8kHz, kSampleRate16kHz, kSampleRate32kHz, kSampleRate48kHz};
static constexpr size_t kNumNativeSampleRates =
arraysize(kNativeSampleRatesHz);
static constexpr int kMaxNativeSampleRateHz =
kNativeSampleRatesHz[kNumNativeSampleRates - 1];
// APM processes audio in chunks of about 10 ms. See GetFrameSize() for
// details.
static constexpr int kChunkSizeMs = 10;
// Returns floor(sample_rate_hz/100): the number of samples per channel used
// as input and output to the audio processing module in calls to
// ProcessStream, ProcessReverseStream, AnalyzeReverseStream, and
// GetLinearAecOutput.
//
// This is exactly 10 ms for sample rates divisible by 100. For example:
// - 48000 Hz (480 samples per channel),
// - 44100 Hz (441 samples per channel),
// - 16000 Hz (160 samples per channel).
//
// Sample rates not divisible by 100 are received/produced in frames of
// approximately 10 ms. For example:
// - 22050 Hz (220 samples per channel, or ~9.98 ms per frame),
// - 11025 Hz (110 samples per channel, or ~9.98 ms per frame).
// These nondivisible sample rates yield lower audio quality compared to
// multiples of 100. Internal resampling to 10 ms frames causes a simulated
// clock drift effect which impacts the performance of (for example) echo
// cancellation.
static int GetFrameSize(int sample_rate_hz) { return sample_rate_hz / 100; }
};
// Experimental interface for a custom analysis submodule.
class CustomAudioAnalyzer {
public:
// (Re-) Initializes the submodule.
virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
// Analyzes the given capture or render signal.
virtual void Analyze(const AudioBuffer* audio) = 0;
// Returns a string representation of the module state.
virtual std::string ToString() const = 0;
virtual ~CustomAudioAnalyzer() {}
};
// Interface for a custom processing submodule.
class CustomProcessing {
public:
// (Re-)Initializes the submodule.
virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
// Processes the given capture or render signal.
virtual void Process(AudioBuffer* audio) = 0;
// Returns a string representation of the module state.
virtual std::string ToString() const = 0;
// Handles RuntimeSettings. TODO(webrtc:9262): make pure virtual
// after updating dependencies.
virtual void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting);
virtual ~CustomProcessing() {}
};
class RTC_EXPORT AudioProcessingBuilder {
public:
AudioProcessingBuilder();
AudioProcessingBuilder(const AudioProcessingBuilder&) = delete;
AudioProcessingBuilder& operator=(const AudioProcessingBuilder&) = delete;
~AudioProcessingBuilder();
// Sets the APM configuration.
AudioProcessingBuilder& SetConfig(const AudioProcessing::Config& config) {
config_ = config;
return *this;
}
// Sets the echo controller factory to inject when APM is created.
AudioProcessingBuilder& SetEchoControlFactory(
std::unique_ptr<EchoControlFactory> echo_control_factory) {
echo_control_factory_ = std::move(echo_control_factory);
return *this;
}
// Sets the capture post-processing sub-module to inject when APM is created.
AudioProcessingBuilder& SetCapturePostProcessing(
std::unique_ptr<CustomProcessing> capture_post_processing) {
capture_post_processing_ = std::move(capture_post_processing);
return *this;
}
// Sets the render pre-processing sub-module to inject when APM is created.
AudioProcessingBuilder& SetRenderPreProcessing(
std::unique_ptr<CustomProcessing> render_pre_processing) {
render_pre_processing_ = std::move(render_pre_processing);
return *this;
}
// Sets the echo detector to inject when APM is created.
AudioProcessingBuilder& SetEchoDetector(
rtc::scoped_refptr<EchoDetector> echo_detector) {
echo_detector_ = std::move(echo_detector);
return *this;
}
// Sets the capture analyzer sub-module to inject when APM is created.
AudioProcessingBuilder& SetCaptureAnalyzer(
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer) {
capture_analyzer_ = std::move(capture_analyzer);
return *this;
}
// Creates an APM instance with the specified config or the default one if
// unspecified. Injects the specified components transferring the ownership
// to the newly created APM instance - i.e., except for the config, the
// builder is reset to its initial state.
rtc::scoped_refptr<AudioProcessing> Create();
private:
AudioProcessing::Config config_;
std::unique_ptr<EchoControlFactory> echo_control_factory_;
std::unique_ptr<CustomProcessing> capture_post_processing_;
std::unique_ptr<CustomProcessing> render_pre_processing_;
rtc::scoped_refptr<EchoDetector> echo_detector_;
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer_;
};
class StreamConfig {
public:
// sample_rate_hz: The sampling rate of the stream.
// num_channels: The number of audio channels in the stream.
StreamConfig(int sample_rate_hz = 0,
size_t num_channels = 0) // NOLINT(runtime/explicit)
: sample_rate_hz_(sample_rate_hz),
num_channels_(num_channels),
num_frames_(calculate_frames(sample_rate_hz)) {}
void set_sample_rate_hz(int value) {
sample_rate_hz_ = value;
num_frames_ = calculate_frames(value);
}
void set_num_channels(size_t value) { num_channels_ = value; }
int sample_rate_hz() const { return sample_rate_hz_; }
// The number of channels in the stream.
size_t num_channels() const { return num_channels_; }
size_t num_frames() const { return num_frames_; }
size_t num_samples() const { return num_channels_ * num_frames_; }
bool operator==(const StreamConfig& other) const {
return sample_rate_hz_ == other.sample_rate_hz_ &&
num_channels_ == other.num_channels_;
}
bool operator!=(const StreamConfig& other) const { return !(*this == other); }
private:
static size_t calculate_frames(int sample_rate_hz) {
return static_cast<size_t>(AudioProcessing::GetFrameSize(sample_rate_hz));
}
int sample_rate_hz_;
size_t num_channels_;
size_t num_frames_;
};
class ProcessingConfig {
public:
enum StreamName {
kInputStream,
kOutputStream,
kReverseInputStream,
kReverseOutputStream,
kNumStreamNames,
};
const StreamConfig& input_stream() const {
return streams[StreamName::kInputStream];
}
const StreamConfig& output_stream() const {
return streams[StreamName::kOutputStream];
}
const StreamConfig& reverse_input_stream() const {
return streams[StreamName::kReverseInputStream];
}
const StreamConfig& reverse_output_stream() const {
return streams[StreamName::kReverseOutputStream];
}
StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
StreamConfig& reverse_input_stream() {
return streams[StreamName::kReverseInputStream];
}
StreamConfig& reverse_output_stream() {
return streams[StreamName::kReverseOutputStream];
}
bool operator==(const ProcessingConfig& other) const {
for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
if (this->streams[i] != other.streams[i]) {
return false;
}
}
return true;
}
bool operator!=(const ProcessingConfig& other) const {
return !(*this == other);
}
StreamConfig streams[StreamName::kNumStreamNames];
};
// Interface for an echo detector submodule.
class EchoDetector : public RefCountInterface {
public:
// (Re-)Initializes the submodule.
virtual void Initialize(int capture_sample_rate_hz,
int num_capture_channels,
int render_sample_rate_hz,
int num_render_channels) = 0;
// Analysis (not changing) of the first channel of the render signal.
virtual void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio) = 0;
// Analysis (not changing) of the capture signal.
virtual void AnalyzeCaptureAudio(
rtc::ArrayView<const float> capture_audio) = 0;
struct Metrics {
absl::optional<double> echo_likelihood;
absl::optional<double> echo_likelihood_recent_max;
};
// Collect current metrics from the echo detector.
virtual Metrics GetMetrics() const = 0;
};
} // namespace webrtc
#endif // API_AUDIO_AUDIO_PROCESSING_H_

View File

@ -0,0 +1,22 @@
/*
* Copyright 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/audio_processing_statistics.h"
namespace webrtc {
AudioProcessingStats::AudioProcessingStats() = default;
AudioProcessingStats::AudioProcessingStats(const AudioProcessingStats& other) =
default;
AudioProcessingStats::~AudioProcessingStats() = default;
} // namespace webrtc

View File

@ -0,0 +1,67 @@
/*
* Copyright 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
#define API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
#include <stdint.h>
#include "absl/types/optional.h"
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
// This version of the stats uses Optionals, it will replace the regular
// AudioProcessingStatistics struct.
struct RTC_EXPORT AudioProcessingStats {
AudioProcessingStats();
AudioProcessingStats(const AudioProcessingStats& other);
~AudioProcessingStats();
// Deprecated.
// TODO(bugs.webrtc.org/11226): Remove.
// True if voice is detected in the last capture frame, after processing.
// It is conservative in flagging audio as speech, with low likelihood of
// incorrectly flagging a frame as voice.
// Only reported if voice detection is enabled in AudioProcessing::Config.
absl::optional<bool> voice_detected;
// AEC Statistics.
// ERL = 10log_10(P_far / P_echo)
absl::optional<double> echo_return_loss;
// ERLE = 10log_10(P_echo / P_out)
absl::optional<double> echo_return_loss_enhancement;
// Fraction of time that the AEC linear filter is divergent, in a 1-second
// non-overlapped aggregation window.
absl::optional<double> divergent_filter_fraction;
// The delay metrics consists of the delay median and standard deviation. It
// also consists of the fraction of delay estimates that can make the echo
// cancellation perform poorly. The values are aggregated until the first
// call to `GetStatistics()` and afterwards aggregated and updated every
// second. Note that if there are several clients pulling metrics from
// `GetStatistics()` during a session the first call from any of them will
// change to one second aggregation window for all.
absl::optional<int32_t> delay_median_ms;
absl::optional<int32_t> delay_standard_deviation_ms;
// Residual echo detector likelihood.
absl::optional<double> residual_echo_likelihood;
// Maximum residual echo likelihood from the last time period.
absl::optional<double> residual_echo_likelihood_recent_max;
// The instantaneous delay estimate produced in the AEC. The unit is in
// milliseconds and the value is the instantaneous value at the time of the
// call to `GetStatistics()`.
absl::optional<int32_t> delay_ms;
};
} // namespace webrtc
#endif // API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_

View File

@ -0,0 +1,269 @@
/*
* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_AUDIO_VIEW_H_
#define API_AUDIO_AUDIO_VIEW_H_
#include "api/array_view.h"
#include "api/audio/channel_layout.h"
#include "rtc_base/checks.h"
namespace webrtc {
// This file contains 3 types of view classes:
//
// * MonoView<>: A single channel contiguous buffer of samples.
//
// * InterleavedView<>: Channel samples are interleaved (side-by-side) in
// the buffer. A single channel InterleavedView<> is the same thing as a
// MonoView<>
//
// * DeinterleavedView<>: Each channel's samples are contiguous within the
// buffer. Channels can be enumerated and accessing the individual channel
// data is done via MonoView<>.
//
// The views are comparable to and built on rtc::ArrayView<> but add
// audio specific properties for the dimensions of the buffer and the above
// specialized [de]interleaved support.
//
// There are also a few generic utility functions that can simplify
// generic code for supporting more than one type of view.
// MonoView<> represents a view over a single contiguous, audio buffer. This
// can be either an single channel (mono) interleaved buffer (e.g. AudioFrame),
// or a de-interleaved channel (e.g. from AudioBuffer).
template <typename T>
using MonoView = rtc::ArrayView<T>;
// InterleavedView<> is a view over an interleaved audio buffer (e.g. from
// AudioFrame).
template <typename T>
class InterleavedView {
public:
using value_type = T;
InterleavedView() = default;
template <typename U>
InterleavedView(U* data, size_t samples_per_channel, size_t num_channels)
: num_channels_(num_channels),
samples_per_channel_(samples_per_channel),
data_(data, num_channels * samples_per_channel) {
RTC_DCHECK_LE(num_channels_, kMaxConcurrentChannels);
RTC_DCHECK(num_channels_ == 0u || samples_per_channel_ != 0u);
}
// Construct an InterleavedView from a C-style array. Samples per channels
// is calculated based on the array size / num_channels.
template <typename U, size_t N>
InterleavedView(U (&array)[N], // NOLINT
size_t num_channels)
: InterleavedView(array, N / num_channels, num_channels) {
RTC_DCHECK_EQ(N % num_channels, 0u);
}
template <typename U>
InterleavedView(const InterleavedView<U>& other)
: num_channels_(other.num_channels()),
samples_per_channel_(other.samples_per_channel()),
data_(other.data()) {}
size_t num_channels() const { return num_channels_; }
size_t samples_per_channel() const { return samples_per_channel_; }
rtc::ArrayView<T> data() const { return data_; }
bool empty() const { return data_.empty(); }
size_t size() const { return data_.size(); }
MonoView<T> AsMono() const {
RTC_DCHECK_EQ(num_channels(), 1u);
RTC_DCHECK_EQ(data_.size(), samples_per_channel_);
return data_;
}
// A simple wrapper around memcpy that includes checks for properties.
// TODO(tommi): Consider if this can be utility function for both interleaved
// and deinterleaved views.
template <typename U>
void CopyFrom(const InterleavedView<U>& source) {
static_assert(sizeof(T) == sizeof(U), "");
RTC_DCHECK_EQ(num_channels(), source.num_channels());
RTC_DCHECK_EQ(samples_per_channel(), source.samples_per_channel());
RTC_DCHECK_GE(data_.size(), source.data().size());
const auto data = source.data();
memcpy(&data_[0], &data[0], data.size() * sizeof(U));
}
T& operator[](size_t idx) const { return data_[idx]; }
T* begin() const { return data_.begin(); }
T* end() const { return data_.end(); }
const T* cbegin() const { return data_.cbegin(); }
const T* cend() const { return data_.cend(); }
std::reverse_iterator<T*> rbegin() const { return data_.rbegin(); }
std::reverse_iterator<T*> rend() const { return data_.rend(); }
std::reverse_iterator<const T*> crbegin() const { return data_.crbegin(); }
std::reverse_iterator<const T*> crend() const { return data_.crend(); }
private:
// TODO(tommi): Consider having these both be stored as uint16_t to
// save a few bytes per view. Use `dchecked_cast` to support size_t during
// construction.
size_t num_channels_ = 0u;
size_t samples_per_channel_ = 0u;
rtc::ArrayView<T> data_;
};
template <typename T>
class DeinterleavedView {
public:
using value_type = T;
DeinterleavedView() = default;
template <typename U>
DeinterleavedView(U* data, size_t samples_per_channel, size_t num_channels)
: num_channels_(num_channels),
samples_per_channel_(samples_per_channel),
data_(data, num_channels * samples_per_channel_) {}
template <typename U>
DeinterleavedView(const DeinterleavedView<U>& other)
: num_channels_(other.num_channels()),
samples_per_channel_(other.samples_per_channel()),
data_(other.data()) {}
// Returns a deinterleaved channel where `idx` is the zero based index,
// in the range [0 .. num_channels()-1].
MonoView<T> operator[](size_t idx) const {
RTC_DCHECK_LT(idx, num_channels_);
return MonoView<T>(&data_[idx * samples_per_channel_],
samples_per_channel_);
}
size_t num_channels() const { return num_channels_; }
size_t samples_per_channel() const { return samples_per_channel_; }
rtc::ArrayView<T> data() const { return data_; }
bool empty() const { return data_.empty(); }
size_t size() const { return data_.size(); }
// Returns the first (and possibly only) channel.
MonoView<T> AsMono() const {
RTC_DCHECK_GE(num_channels(), 1u);
return (*this)[0];
}
private:
// TODO(tommi): Consider having these be stored as uint16_t to save a few
// bytes per view. Use `dchecked_cast` to support size_t during construction.
size_t num_channels_ = 0u;
size_t samples_per_channel_ = 0u;
rtc::ArrayView<T> data_;
};
template <typename T>
constexpr size_t NumChannels(const MonoView<T>& view) {
return 1u;
}
template <typename T>
size_t NumChannels(const InterleavedView<T>& view) {
return view.num_channels();
}
template <typename T>
size_t NumChannels(const DeinterleavedView<T>& view) {
return view.num_channels();
}
template <typename T>
constexpr bool IsMono(const MonoView<T>& view) {
return true;
}
template <typename T>
constexpr bool IsInterleavedView(const MonoView<T>& view) {
return true;
}
template <typename T>
constexpr bool IsInterleavedView(const InterleavedView<T>& view) {
return true;
}
template <typename T>
constexpr bool IsInterleavedView(const DeinterleavedView<const T>& view) {
return false;
}
template <typename T>
bool IsMono(const InterleavedView<T>& view) {
return NumChannels(view) == 1u;
}
template <typename T>
bool IsMono(const DeinterleavedView<T>& view) {
return NumChannels(view) == 1u;
}
template <typename T>
size_t SamplesPerChannel(const MonoView<T>& view) {
return view.size();
}
template <typename T>
size_t SamplesPerChannel(const InterleavedView<T>& view) {
return view.samples_per_channel();
}
template <typename T>
size_t SamplesPerChannel(const DeinterleavedView<T>& view) {
return view.samples_per_channel();
}
// A simple wrapper around memcpy that includes checks for properties.
// The parameter order is the same as for memcpy(), first destination then
// source.
template <typename D, typename S>
void CopySamples(D& destination, const S& source) {
static_assert(
sizeof(typename D::value_type) == sizeof(typename S::value_type), "");
// Here we'd really like to do
// static_assert(IsInterleavedView(destination) == IsInterleavedView(source),
// "");
// but the compiler doesn't like it inside this template function for
// some reason. The following check is an approximation but unfortunately
// means that copying between a MonoView and single channel interleaved or
// deinterleaved views wouldn't work.
// static_assert(sizeof(destination) == sizeof(source),
// "Incompatible view types");
RTC_DCHECK_EQ(NumChannels(destination), NumChannels(source));
RTC_DCHECK_EQ(SamplesPerChannel(destination), SamplesPerChannel(source));
RTC_DCHECK_GE(destination.size(), source.size());
memcpy(&destination[0], &source[0],
source.size() * sizeof(typename S::value_type));
}
// Sets all the samples in a view to 0. This template function is a simple
// wrapper around `memset()` but adds the benefit of automatically calculating
// the byte size from the number of samples and sample type.
template <typename T>
void ClearSamples(T& view) {
memset(&view[0], 0, view.size() * sizeof(typename T::value_type));
}
// Same as `ClearSamples()` above but allows for clearing only the first
// `sample_count` number of samples.
template <typename T>
void ClearSamples(T& view, size_t sample_count) {
RTC_DCHECK_LE(sample_count, view.size());
memset(&view[0], 0, sample_count * sizeof(typename T::value_type));
}
} // namespace webrtc
#endif // API_AUDIO_AUDIO_VIEW_H_

View File

@ -0,0 +1,282 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/channel_layout.h"
#include <stddef.h>
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
static const int kLayoutToChannels[] = {
0, // CHANNEL_LAYOUT_NONE
0, // CHANNEL_LAYOUT_UNSUPPORTED
1, // CHANNEL_LAYOUT_MONO
2, // CHANNEL_LAYOUT_STEREO
3, // CHANNEL_LAYOUT_2_1
3, // CHANNEL_LAYOUT_SURROUND
4, // CHANNEL_LAYOUT_4_0
4, // CHANNEL_LAYOUT_2_2
4, // CHANNEL_LAYOUT_QUAD
5, // CHANNEL_LAYOUT_5_0
6, // CHANNEL_LAYOUT_5_1
5, // CHANNEL_LAYOUT_5_0_BACK
6, // CHANNEL_LAYOUT_5_1_BACK
7, // CHANNEL_LAYOUT_7_0
8, // CHANNEL_LAYOUT_7_1
8, // CHANNEL_LAYOUT_7_1_WIDE
2, // CHANNEL_LAYOUT_STEREO_DOWNMIX
3, // CHANNEL_LAYOUT_2POINT1
4, // CHANNEL_LAYOUT_3_1
5, // CHANNEL_LAYOUT_4_1
6, // CHANNEL_LAYOUT_6_0
6, // CHANNEL_LAYOUT_6_0_FRONT
6, // CHANNEL_LAYOUT_HEXAGONAL
7, // CHANNEL_LAYOUT_6_1
7, // CHANNEL_LAYOUT_6_1_BACK
7, // CHANNEL_LAYOUT_6_1_FRONT
7, // CHANNEL_LAYOUT_7_0_FRONT
8, // CHANNEL_LAYOUT_7_1_WIDE_BACK
8, // CHANNEL_LAYOUT_OCTAGONAL
0, // CHANNEL_LAYOUT_DISCRETE
3, // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
5, // CHANNEL_LAYOUT_4_1_QUAD_SIDE
0, // CHANNEL_LAYOUT_BITSTREAM
};
// The channel orderings for each layout as specified by FFmpeg. Each value
// represents the index of each channel in each layout. Values of -1 mean the
// channel at that index is not used for that layout. For example, the left side
// surround sound channel in FFmpeg's 5.1 layout is in the 5th position (because
// the order is L, R, C, LFE, LS, RS), so
// kChannelOrderings[CHANNEL_LAYOUT_5_1][SIDE_LEFT] = 4;
static const int kChannelOrderings[CHANNEL_LAYOUT_MAX + 1][CHANNELS_MAX + 1] = {
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
// CHANNEL_LAYOUT_NONE
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_UNSUPPORTED
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_MONO
{-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_STEREO
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_2_1
{0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1},
// CHANNEL_LAYOUT_SURROUND
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_4_0
{0, 1, 2, -1, -1, -1, -1, -1, 3, -1, -1},
// CHANNEL_LAYOUT_2_2
{0, 1, -1, -1, -1, -1, -1, -1, -1, 2, 3},
// CHANNEL_LAYOUT_QUAD
{0, 1, -1, -1, 2, 3, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_5_0
{0, 1, 2, -1, -1, -1, -1, -1, -1, 3, 4},
// CHANNEL_LAYOUT_5_1
{0, 1, 2, 3, -1, -1, -1, -1, -1, 4, 5},
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
// CHANNEL_LAYOUT_5_0_BACK
{0, 1, 2, -1, 3, 4, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_5_1_BACK
{0, 1, 2, 3, 4, 5, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_7_0
{0, 1, 2, -1, 5, 6, -1, -1, -1, 3, 4},
// CHANNEL_LAYOUT_7_1
{0, 1, 2, 3, 6, 7, -1, -1, -1, 4, 5},
// CHANNEL_LAYOUT_7_1_WIDE
{0, 1, 2, 3, -1, -1, 6, 7, -1, 4, 5},
// CHANNEL_LAYOUT_STEREO_DOWNMIX
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_2POINT1
{0, 1, -1, 2, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_3_1
{0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_4_1
{0, 1, 2, 4, -1, -1, -1, -1, 3, -1, -1},
// CHANNEL_LAYOUT_6_0
{0, 1, 2, -1, -1, -1, -1, -1, 5, 3, 4},
// CHANNEL_LAYOUT_6_0_FRONT
{0, 1, -1, -1, -1, -1, 4, 5, -1, 2, 3},
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
// CHANNEL_LAYOUT_HEXAGONAL
{0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1},
// CHANNEL_LAYOUT_6_1
{0, 1, 2, 3, -1, -1, -1, -1, 6, 4, 5},
// CHANNEL_LAYOUT_6_1_BACK
{0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1},
// CHANNEL_LAYOUT_6_1_FRONT
{0, 1, -1, 6, -1, -1, 4, 5, -1, 2, 3},
// CHANNEL_LAYOUT_7_0_FRONT
{0, 1, 2, -1, -1, -1, 5, 6, -1, 3, 4},
// CHANNEL_LAYOUT_7_1_WIDE_BACK
{0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1},
// CHANNEL_LAYOUT_OCTAGONAL
{0, 1, 2, -1, 5, 6, -1, -1, 7, 3, 4},
// CHANNEL_LAYOUT_DISCRETE
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_4_1_QUAD_SIDE
{0, 1, -1, 4, -1, -1, -1, -1, -1, 2, 3},
// CHANNEL_LAYOUT_BITSTREAM
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
};
int ChannelLayoutToChannelCount(ChannelLayout layout) {
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kLayoutToChannels));
RTC_DCHECK_LE(kLayoutToChannels[layout], kMaxConcurrentChannels);
return kLayoutToChannels[layout];
}
// Converts a channel count into a channel layout.
ChannelLayout GuessChannelLayout(int channels) {
switch (channels) {
case 1:
return CHANNEL_LAYOUT_MONO;
case 2:
return CHANNEL_LAYOUT_STEREO;
case 3:
return CHANNEL_LAYOUT_SURROUND;
case 4:
return CHANNEL_LAYOUT_QUAD;
case 5:
return CHANNEL_LAYOUT_5_0;
case 6:
return CHANNEL_LAYOUT_5_1;
case 7:
return CHANNEL_LAYOUT_6_1;
case 8:
return CHANNEL_LAYOUT_7_1;
default:
RTC_DLOG(LS_WARNING) << "Unsupported channel count: " << channels;
}
return CHANNEL_LAYOUT_UNSUPPORTED;
}
int ChannelOrder(ChannelLayout layout, Channels channel) {
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kChannelOrderings));
RTC_DCHECK_LT(static_cast<size_t>(channel), arraysize(kChannelOrderings[0]));
return kChannelOrderings[layout][channel];
}
const char* ChannelLayoutToString(ChannelLayout layout) {
switch (layout) {
case CHANNEL_LAYOUT_NONE:
return "NONE";
case CHANNEL_LAYOUT_UNSUPPORTED:
return "UNSUPPORTED";
case CHANNEL_LAYOUT_MONO:
return "MONO";
case CHANNEL_LAYOUT_STEREO:
return "STEREO";
case CHANNEL_LAYOUT_2_1:
return "2.1";
case CHANNEL_LAYOUT_SURROUND:
return "SURROUND";
case CHANNEL_LAYOUT_4_0:
return "4.0";
case CHANNEL_LAYOUT_2_2:
return "QUAD_SIDE";
case CHANNEL_LAYOUT_QUAD:
return "QUAD";
case CHANNEL_LAYOUT_5_0:
return "5.0";
case CHANNEL_LAYOUT_5_1:
return "5.1";
case CHANNEL_LAYOUT_5_0_BACK:
return "5.0_BACK";
case CHANNEL_LAYOUT_5_1_BACK:
return "5.1_BACK";
case CHANNEL_LAYOUT_7_0:
return "7.0";
case CHANNEL_LAYOUT_7_1:
return "7.1";
case CHANNEL_LAYOUT_7_1_WIDE:
return "7.1_WIDE";
case CHANNEL_LAYOUT_STEREO_DOWNMIX:
return "STEREO_DOWNMIX";
case CHANNEL_LAYOUT_2POINT1:
return "2POINT1";
case CHANNEL_LAYOUT_3_1:
return "3.1";
case CHANNEL_LAYOUT_4_1:
return "4.1";
case CHANNEL_LAYOUT_6_0:
return "6.0";
case CHANNEL_LAYOUT_6_0_FRONT:
return "6.0_FRONT";
case CHANNEL_LAYOUT_HEXAGONAL:
return "HEXAGONAL";
case CHANNEL_LAYOUT_6_1:
return "6.1";
case CHANNEL_LAYOUT_6_1_BACK:
return "6.1_BACK";
case CHANNEL_LAYOUT_6_1_FRONT:
return "6.1_FRONT";
case CHANNEL_LAYOUT_7_0_FRONT:
return "7.0_FRONT";
case CHANNEL_LAYOUT_7_1_WIDE_BACK:
return "7.1_WIDE_BACK";
case CHANNEL_LAYOUT_OCTAGONAL:
return "OCTAGONAL";
case CHANNEL_LAYOUT_DISCRETE:
return "DISCRETE";
case CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
return "STEREO_AND_KEYBOARD_MIC";
case CHANNEL_LAYOUT_4_1_QUAD_SIDE:
return "4.1_QUAD_SIDE";
case CHANNEL_LAYOUT_BITSTREAM:
return "BITSTREAM";
}
RTC_DCHECK_NOTREACHED() << "Invalid channel layout provided: " << layout;
return "";
}
} // namespace webrtc

View File

@ -0,0 +1,165 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_CHANNEL_LAYOUT_H_
#define API_AUDIO_CHANNEL_LAYOUT_H_
namespace webrtc {
// This file is derived from Chromium's base/channel_layout.h.
// Enumerates the various representations of the ordering of audio channels.
// Logged to UMA, so never reuse a value, always add new/greater ones!
enum ChannelLayout {
CHANNEL_LAYOUT_NONE = 0,
CHANNEL_LAYOUT_UNSUPPORTED = 1,
// Front C
CHANNEL_LAYOUT_MONO = 2,
// Front L, Front R
CHANNEL_LAYOUT_STEREO = 3,
// Front L, Front R, Back C
CHANNEL_LAYOUT_2_1 = 4,
// Front L, Front R, Front C
CHANNEL_LAYOUT_SURROUND = 5,
// Front L, Front R, Front C, Back C
CHANNEL_LAYOUT_4_0 = 6,
// Front L, Front R, Side L, Side R
CHANNEL_LAYOUT_2_2 = 7,
// Front L, Front R, Back L, Back R
CHANNEL_LAYOUT_QUAD = 8,
// Front L, Front R, Front C, Side L, Side R
CHANNEL_LAYOUT_5_0 = 9,
// Front L, Front R, Front C, LFE, Side L, Side R
CHANNEL_LAYOUT_5_1 = 10,
// Front L, Front R, Front C, Back L, Back R
CHANNEL_LAYOUT_5_0_BACK = 11,
// Front L, Front R, Front C, LFE, Back L, Back R
CHANNEL_LAYOUT_5_1_BACK = 12,
// Front L, Front R, Front C, Side L, Side R, Back L, Back R
CHANNEL_LAYOUT_7_0 = 13,
// Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R
CHANNEL_LAYOUT_7_1 = 14,
// Front L, Front R, Front C, LFE, Side L, Side R, Front LofC, Front RofC
CHANNEL_LAYOUT_7_1_WIDE = 15,
// Stereo L, Stereo R
CHANNEL_LAYOUT_STEREO_DOWNMIX = 16,
// Stereo L, Stereo R, LFE
CHANNEL_LAYOUT_2POINT1 = 17,
// Stereo L, Stereo R, Front C, LFE
CHANNEL_LAYOUT_3_1 = 18,
// Stereo L, Stereo R, Front C, Rear C, LFE
CHANNEL_LAYOUT_4_1 = 19,
// Stereo L, Stereo R, Front C, Side L, Side R, Back C
CHANNEL_LAYOUT_6_0 = 20,
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC
CHANNEL_LAYOUT_6_0_FRONT = 21,
// Stereo L, Stereo R, Front C, Rear L, Rear R, Rear C
CHANNEL_LAYOUT_HEXAGONAL = 22,
// Stereo L, Stereo R, Front C, LFE, Side L, Side R, Rear Center
CHANNEL_LAYOUT_6_1 = 23,
// Stereo L, Stereo R, Front C, LFE, Back L, Back R, Rear Center
CHANNEL_LAYOUT_6_1_BACK = 24,
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC, LFE
CHANNEL_LAYOUT_6_1_FRONT = 25,
// Front L, Front R, Front C, Side L, Side R, Front LofC, Front RofC
CHANNEL_LAYOUT_7_0_FRONT = 26,
// Front L, Front R, Front C, LFE, Back L, Back R, Front LofC, Front RofC
CHANNEL_LAYOUT_7_1_WIDE_BACK = 27,
// Front L, Front R, Front C, Side L, Side R, Rear L, Back R, Back C.
CHANNEL_LAYOUT_OCTAGONAL = 28,
// Channels are not explicitly mapped to speakers.
CHANNEL_LAYOUT_DISCRETE = 29,
// Front L, Front R, Front C. Front C contains the keyboard mic audio. This
// layout is only intended for input for WebRTC. The Front C channel
// is stripped away in the WebRTC audio input pipeline and never seen outside
// of that.
CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC = 30,
// Front L, Front R, Side L, Side R, LFE
CHANNEL_LAYOUT_4_1_QUAD_SIDE = 31,
// Actual channel layout is specified in the bitstream and the actual channel
// count is unknown at Chromium media pipeline level (useful for audio
// pass-through mode).
CHANNEL_LAYOUT_BITSTREAM = 32,
// Max value, must always equal the largest entry ever logged.
CHANNEL_LAYOUT_MAX = CHANNEL_LAYOUT_BITSTREAM
};
// Note: Do not reorder or reassign these values; other code depends on their
// ordering to operate correctly. E.g., CoreAudio channel layout computations.
enum Channels {
LEFT = 0,
RIGHT,
CENTER,
LFE,
BACK_LEFT,
BACK_RIGHT,
LEFT_OF_CENTER,
RIGHT_OF_CENTER,
BACK_CENTER,
SIDE_LEFT,
SIDE_RIGHT,
CHANNELS_MAX =
SIDE_RIGHT, // Must always equal the largest value ever logged.
};
// The maximum number of concurrently active channels for all possible layouts.
// ChannelLayoutToChannelCount() will never return a value higher than this.
constexpr int kMaxConcurrentChannels = 8;
// Returns the expected channel position in an interleaved stream. Values of -1
// mean the channel at that index is not used for that layout. Values range
// from 0 to ChannelLayoutToChannelCount(layout) - 1.
int ChannelOrder(ChannelLayout layout, Channels channel);
// Returns the number of channels in a given ChannelLayout.
int ChannelLayoutToChannelCount(ChannelLayout layout);
// Given the number of channels, return the best layout,
// or return CHANNEL_LAYOUT_UNSUPPORTED if there is no good match.
ChannelLayout GuessChannelLayout(int channels);
// Returns a string representation of the channel layout.
const char* ChannelLayoutToString(ChannelLayout layout);
} // namespace webrtc
#endif // API_AUDIO_CHANNEL_LAYOUT_H_

View File

@ -0,0 +1,278 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/echo_canceller3_config.h"
#include <algorithm>
#include <cmath>
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
bool Limit(float* value, float min, float max) {
float clamped = rtc::SafeClamp(*value, min, max);
clamped = std::isfinite(clamped) ? clamped : min;
bool res = *value == clamped;
*value = clamped;
return res;
}
bool Limit(size_t* value, size_t min, size_t max) {
size_t clamped = rtc::SafeClamp(*value, min, max);
bool res = *value == clamped;
*value = clamped;
return res;
}
bool Limit(int* value, int min, int max) {
int clamped = rtc::SafeClamp(*value, min, max);
bool res = *value == clamped;
*value = clamped;
return res;
}
bool FloorLimit(size_t* value, size_t min) {
size_t clamped = *value >= min ? *value : min;
bool res = *value == clamped;
*value = clamped;
return res;
}
} // namespace
EchoCanceller3Config::EchoCanceller3Config() = default;
EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) =
default;
EchoCanceller3Config& EchoCanceller3Config::operator=(
const EchoCanceller3Config& e) = default;
EchoCanceller3Config::Delay::Delay() = default;
EchoCanceller3Config::Delay::Delay(const EchoCanceller3Config::Delay& e) =
default;
EchoCanceller3Config::Delay& EchoCanceller3Config::Delay::operator=(
const Delay& e) = default;
EchoCanceller3Config::EchoModel::EchoModel() = default;
EchoCanceller3Config::EchoModel::EchoModel(
const EchoCanceller3Config::EchoModel& e) = default;
EchoCanceller3Config::EchoModel& EchoCanceller3Config::EchoModel::operator=(
const EchoModel& e) = default;
EchoCanceller3Config::Suppressor::Suppressor() = default;
EchoCanceller3Config::Suppressor::Suppressor(
const EchoCanceller3Config::Suppressor& e) = default;
EchoCanceller3Config::Suppressor& EchoCanceller3Config::Suppressor::operator=(
const Suppressor& e) = default;
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
float enr_transparent,
float enr_suppress,
float emr_transparent)
: enr_transparent(enr_transparent),
enr_suppress(enr_suppress),
emr_transparent(emr_transparent) {}
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
const EchoCanceller3Config::Suppressor::MaskingThresholds& e) = default;
EchoCanceller3Config::Suppressor::MaskingThresholds&
EchoCanceller3Config::Suppressor::MaskingThresholds::operator=(
const MaskingThresholds& e) = default;
EchoCanceller3Config::Suppressor::Tuning::Tuning(MaskingThresholds mask_lf,
MaskingThresholds mask_hf,
float max_inc_factor,
float max_dec_factor_lf)
: mask_lf(mask_lf),
mask_hf(mask_hf),
max_inc_factor(max_inc_factor),
max_dec_factor_lf(max_dec_factor_lf) {}
EchoCanceller3Config::Suppressor::Tuning::Tuning(
const EchoCanceller3Config::Suppressor::Tuning& e) = default;
EchoCanceller3Config::Suppressor::Tuning&
EchoCanceller3Config::Suppressor::Tuning::operator=(const Tuning& e) = default;
bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
RTC_DCHECK(config);
EchoCanceller3Config* c = config;
bool res = true;
if (c->delay.down_sampling_factor != 4 &&
c->delay.down_sampling_factor != 8) {
c->delay.down_sampling_factor = 4;
res = false;
}
res = res & Limit(&c->delay.default_delay, 0, 5000);
res = res & Limit(&c->delay.num_filters, 0, 5000);
res = res & Limit(&c->delay.delay_headroom_samples, 0, 5000);
res = res & Limit(&c->delay.hysteresis_limit_blocks, 0, 5000);
res = res & Limit(&c->delay.fixed_capture_delay_samples, 0, 5000);
res = res & Limit(&c->delay.delay_estimate_smoothing, 0.f, 1.f);
res = res & Limit(&c->delay.delay_candidate_detection_threshold, 0.f, 1.f);
res = res & Limit(&c->delay.delay_selection_thresholds.initial, 1, 250);
res = res & Limit(&c->delay.delay_selection_thresholds.converged, 1, 250);
res = res & FloorLimit(&c->filter.refined.length_blocks, 1);
res = res & Limit(&c->filter.refined.leakage_converged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined.leakage_diverged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined.error_floor, 0.f, 1000.f);
res = res & Limit(&c->filter.refined.error_ceil, 0.f, 100000000.f);
res = res & Limit(&c->filter.refined.noise_gate, 0.f, 100000000.f);
res = res & FloorLimit(&c->filter.refined_initial.length_blocks, 1);
res = res & Limit(&c->filter.refined_initial.leakage_converged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined_initial.leakage_diverged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined_initial.error_floor, 0.f, 1000.f);
res = res & Limit(&c->filter.refined_initial.error_ceil, 0.f, 100000000.f);
res = res & Limit(&c->filter.refined_initial.noise_gate, 0.f, 100000000.f);
if (c->filter.refined.length_blocks <
c->filter.refined_initial.length_blocks) {
c->filter.refined_initial.length_blocks = c->filter.refined.length_blocks;
res = false;
}
res = res & FloorLimit(&c->filter.coarse.length_blocks, 1);
res = res & Limit(&c->filter.coarse.rate, 0.f, 1.f);
res = res & Limit(&c->filter.coarse.noise_gate, 0.f, 100000000.f);
res = res & FloorLimit(&c->filter.coarse_initial.length_blocks, 1);
res = res & Limit(&c->filter.coarse_initial.rate, 0.f, 1.f);
res = res & Limit(&c->filter.coarse_initial.noise_gate, 0.f, 100000000.f);
if (c->filter.coarse.length_blocks < c->filter.coarse_initial.length_blocks) {
c->filter.coarse_initial.length_blocks = c->filter.coarse.length_blocks;
res = false;
}
res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 250000);
res = res & Limit(&c->erle.min, 1.f, 100000.f);
res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
res = res & Limit(&c->erle.max_h, 1.f, 100000.f);
if (c->erle.min > c->erle.max_l || c->erle.min > c->erle.max_h) {
c->erle.min = std::min(c->erle.max_l, c->erle.max_h);
res = false;
}
res = res & Limit(&c->erle.num_sections, 1, c->filter.refined.length_blocks);
res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f);
res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f);
res = res & Limit(&c->ep_strength.nearend_len, -1.0f, 1.0f);
res =
res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f);
res = res &
Limit(&c->echo_audibility.normal_render_limit, 0.f, 32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.floor_power, 0.f, 32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.audibility_threshold_lf, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.audibility_threshold_mf, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.audibility_threshold_hf, 0.f,
32768.f * 32768.f);
res = res &
Limit(&c->render_levels.active_render_limit, 0.f, 32768.f * 32768.f);
res = res & Limit(&c->render_levels.poor_excitation_render_limit, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->render_levels.poor_excitation_render_limit_ds8, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->echo_model.noise_floor_hold, 0, 1000);
res = res & Limit(&c->echo_model.min_noise_floor_power, 0, 2000000.f);
res = res & Limit(&c->echo_model.stationary_gate_slope, 0, 1000000.f);
res = res & Limit(&c->echo_model.noise_gate_power, 0, 1000000.f);
res = res & Limit(&c->echo_model.noise_gate_slope, 0, 1000000.f);
res = res & Limit(&c->echo_model.render_pre_window_size, 0, 100);
res = res & Limit(&c->echo_model.render_post_window_size, 0, 100);
res = res & Limit(&c->comfort_noise.noise_floor_dbfs, -200.f, 0.f);
res = res & Limit(&c->suppressor.nearend_average_blocks, 1, 5000);
res = res &
Limit(&c->suppressor.normal_tuning.mask_lf.enr_transparent, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_lf.enr_suppress, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_lf.emr_transparent, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_hf.enr_transparent, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_hf.enr_suppress, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_hf.emr_transparent, 0.f, 100.f);
res = res & Limit(&c->suppressor.normal_tuning.max_inc_factor, 0.f, 100.f);
res = res & Limit(&c->suppressor.normal_tuning.max_dec_factor_lf, 0.f, 100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.enr_transparent, 0.f,
100.f);
res = res &
Limit(&c->suppressor.nearend_tuning.mask_lf.enr_suppress, 0.f, 100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.emr_transparent, 0.f,
100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.enr_transparent, 0.f,
100.f);
res = res &
Limit(&c->suppressor.nearend_tuning.mask_hf.enr_suppress, 0.f, 100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.emr_transparent, 0.f,
100.f);
res = res & Limit(&c->suppressor.nearend_tuning.max_inc_factor, 0.f, 100.f);
res =
res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f);
res = res & Limit(&c->suppressor.last_permanent_lf_smoothing_band, 0, 64);
res = res & Limit(&c->suppressor.last_lf_smoothing_band, 0, 64);
res = res & Limit(&c->suppressor.last_lf_band, 0, 63);
res = res &
Limit(&c->suppressor.first_hf_band, c->suppressor.last_lf_band + 1, 64);
res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold,
0.f, 1000000.f);
res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold,
0.f, 1000000.f);
res = res & Limit(&c->suppressor.dominant_nearend_detection.hold_duration, 0,
10000);
res = res & Limit(&c->suppressor.dominant_nearend_detection.trigger_threshold,
0, 10000);
res = res &
Limit(&c->suppressor.subband_nearend_detection.nearend_average_blocks,
1, 1024);
res =
res & Limit(&c->suppressor.subband_nearend_detection.subband1.low, 0, 65);
res = res & Limit(&c->suppressor.subband_nearend_detection.subband1.high,
c->suppressor.subband_nearend_detection.subband1.low, 65);
res =
res & Limit(&c->suppressor.subband_nearend_detection.subband2.low, 0, 65);
res = res & Limit(&c->suppressor.subband_nearend_detection.subband2.high,
c->suppressor.subband_nearend_detection.subband2.low, 65);
res = res & Limit(&c->suppressor.subband_nearend_detection.nearend_threshold,
0.f, 1.e24f);
res = res & Limit(&c->suppressor.subband_nearend_detection.snr_threshold, 0.f,
1.e24f);
res = res & Limit(&c->suppressor.high_bands_suppression.enr_threshold, 0.f,
1000000.f);
res = res & Limit(&c->suppressor.high_bands_suppression.max_gain_during_echo,
0.f, 1.f);
res = res & Limit(&c->suppressor.high_bands_suppression
.anti_howling_activation_threshold,
0.f, 32768.f * 32768.f);
res = res & Limit(&c->suppressor.high_bands_suppression.anti_howling_gain,
0.f, 1.f);
res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f);
return res;
}
} // namespace webrtc

View File

@ -0,0 +1,250 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
#define API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
#include <stddef.h> // size_t
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
// Configuration struct for EchoCanceller3
struct RTC_EXPORT EchoCanceller3Config {
// Checks and updates the config parameters to lie within (mostly) reasonable
// ranges. Returns true if and only of the config did not need to be changed.
static bool Validate(EchoCanceller3Config* config);
EchoCanceller3Config();
EchoCanceller3Config(const EchoCanceller3Config& e);
EchoCanceller3Config& operator=(const EchoCanceller3Config& other);
struct Buffering {
size_t excess_render_detection_interval_blocks = 250;
size_t max_allowed_excess_render_blocks = 8;
} buffering;
struct Delay {
Delay();
Delay(const Delay& e);
Delay& operator=(const Delay& e);
size_t default_delay = 5;
size_t down_sampling_factor = 4;
size_t num_filters = 5;
size_t delay_headroom_samples = 32;
size_t hysteresis_limit_blocks = 1;
size_t fixed_capture_delay_samples = 0;
float delay_estimate_smoothing = 0.7f;
float delay_estimate_smoothing_delay_found = 0.7f;
float delay_candidate_detection_threshold = 0.2f;
struct DelaySelectionThresholds {
int initial;
int converged;
} delay_selection_thresholds = {5, 20};
bool use_external_delay_estimator = false;
bool log_warning_on_delay_changes = false;
struct AlignmentMixing {
bool downmix;
bool adaptive_selection;
float activity_power_threshold;
bool prefer_first_two_channels;
};
AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true};
AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false};
bool detect_pre_echo = true;
} delay;
struct Filter {
struct RefinedConfiguration {
size_t length_blocks;
float leakage_converged;
float leakage_diverged;
float error_floor;
float error_ceil;
float noise_gate;
};
struct CoarseConfiguration {
size_t length_blocks;
float rate;
float noise_gate;
};
RefinedConfiguration refined = {13, 0.00005f, 0.05f,
0.001f, 2.f, 20075344.f};
CoarseConfiguration coarse = {13, 0.7f, 20075344.f};
RefinedConfiguration refined_initial = {12, 0.005f, 0.5f,
0.001f, 2.f, 20075344.f};
CoarseConfiguration coarse_initial = {12, 0.9f, 20075344.f};
size_t config_change_duration_blocks = 250;
float initial_state_seconds = 2.5f;
int coarse_reset_hangover_blocks = 25;
bool conservative_initial_phase = false;
bool enable_coarse_filter_output_usage = true;
bool use_linear_filter = true;
bool high_pass_filter_echo_reference = false;
bool export_linear_aec_output = false;
} filter;
struct Erle {
float min = 1.f;
float max_l = 4.f;
float max_h = 1.5f;
bool onset_detection = true;
size_t num_sections = 1;
bool clamp_quality_estimate_to_zero = true;
bool clamp_quality_estimate_to_one = true;
} erle;
struct EpStrength {
float default_gain = 1.f;
float default_len = 0.83f;
float nearend_len = 0.83f;
bool echo_can_saturate = true;
bool bounded_erl = false;
bool erle_onset_compensation_in_dominant_nearend = false;
bool use_conservative_tail_frequency_response = true;
} ep_strength;
struct EchoAudibility {
float low_render_limit = 4 * 64.f;
float normal_render_limit = 64.f;
float floor_power = 2 * 64.f;
float audibility_threshold_lf = 10;
float audibility_threshold_mf = 10;
float audibility_threshold_hf = 10;
bool use_stationarity_properties = false;
bool use_stationarity_properties_at_init = false;
} echo_audibility;
struct RenderLevels {
float active_render_limit = 100.f;
float poor_excitation_render_limit = 150.f;
float poor_excitation_render_limit_ds8 = 20.f;
float render_power_gain_db = 0.f;
} render_levels;
struct EchoRemovalControl {
bool has_clock_drift = false;
bool linear_and_stable_echo_path = false;
} echo_removal_control;
struct EchoModel {
EchoModel();
EchoModel(const EchoModel& e);
EchoModel& operator=(const EchoModel& e);
size_t noise_floor_hold = 50;
float min_noise_floor_power = 1638400.f;
float stationary_gate_slope = 10.f;
float noise_gate_power = 27509.42f;
float noise_gate_slope = 0.3f;
size_t render_pre_window_size = 1;
size_t render_post_window_size = 1;
bool model_reverb_in_nonlinear_mode = true;
} echo_model;
struct ComfortNoise {
float noise_floor_dbfs = -96.03406f;
} comfort_noise;
struct Suppressor {
Suppressor();
Suppressor(const Suppressor& e);
Suppressor& operator=(const Suppressor& e);
size_t nearend_average_blocks = 4;
struct MaskingThresholds {
MaskingThresholds(float enr_transparent,
float enr_suppress,
float emr_transparent);
MaskingThresholds(const MaskingThresholds& e);
MaskingThresholds& operator=(const MaskingThresholds& e);
float enr_transparent;
float enr_suppress;
float emr_transparent;
};
struct Tuning {
Tuning(MaskingThresholds mask_lf,
MaskingThresholds mask_hf,
float max_inc_factor,
float max_dec_factor_lf);
Tuning(const Tuning& e);
Tuning& operator=(const Tuning& e);
MaskingThresholds mask_lf;
MaskingThresholds mask_hf;
float max_inc_factor;
float max_dec_factor_lf;
};
Tuning normal_tuning = Tuning(MaskingThresholds(.3f, .4f, .3f),
MaskingThresholds(.07f, .1f, .3f),
2.0f,
0.25f);
Tuning nearend_tuning = Tuning(MaskingThresholds(1.09f, 1.1f, .3f),
MaskingThresholds(.1f, .3f, .3f),
2.0f,
0.25f);
bool lf_smoothing_during_initial_phase = true;
int last_permanent_lf_smoothing_band = 0;
int last_lf_smoothing_band = 5;
int last_lf_band = 5;
int first_hf_band = 8;
struct DominantNearendDetection {
float enr_threshold = .25f;
float enr_exit_threshold = 10.f;
float snr_threshold = 30.f;
int hold_duration = 50;
int trigger_threshold = 12;
bool use_during_initial_phase = true;
bool use_unbounded_echo_spectrum = true;
} dominant_nearend_detection;
struct SubbandNearendDetection {
size_t nearend_average_blocks = 1;
struct SubbandRegion {
size_t low;
size_t high;
};
SubbandRegion subband1 = {1, 1};
SubbandRegion subband2 = {1, 1};
float nearend_threshold = 1.f;
float snr_threshold = 1.f;
} subband_nearend_detection;
bool use_subband_nearend_detection = false;
struct HighBandsSuppression {
float enr_threshold = 1.f;
float max_gain_during_echo = 1.f;
float anti_howling_activation_threshold = 400.f;
float anti_howling_gain = 1.f;
} high_bands_suppression;
float floor_first_increase = 0.00001f;
bool conservative_hf_suppression = false;
} suppressor;
struct MultiChannel {
bool detect_stereo_content = true;
float stereo_detection_threshold = 0.0f;
int stereo_detection_timeout_threshold_seconds = 300;
float stereo_detection_hysteresis_seconds = 2.0f;
} multi_channel;
};
} // namespace webrtc
#endif // API_AUDIO_ECHO_CANCELLER3_CONFIG_H_

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_ECHO_CONTROL_H_
#define API_AUDIO_ECHO_CONTROL_H_
#include <memory>
#include "rtc_base/checks.h"
namespace webrtc {
class AudioBuffer;
// Interface for an acoustic echo cancellation (AEC) submodule.
class EchoControl {
public:
// Analysis (not changing) of the render signal.
virtual void AnalyzeRender(AudioBuffer* render) = 0;
// Analysis (not changing) of the capture signal.
virtual void AnalyzeCapture(AudioBuffer* capture) = 0;
// Processes the capture signal in order to remove the echo.
virtual void ProcessCapture(AudioBuffer* capture, bool level_change) = 0;
// As above, but also returns the linear filter output.
virtual void ProcessCapture(AudioBuffer* capture,
AudioBuffer* linear_output,
bool level_change) = 0;
struct Metrics {
double echo_return_loss;
double echo_return_loss_enhancement;
int delay_ms;
};
// Collect current metrics from the echo controller.
virtual Metrics GetMetrics() const = 0;
// Provides an optional external estimate of the audio buffer delay.
virtual void SetAudioBufferDelay(int delay_ms) = 0;
// Specifies whether the capture output will be used. The purpose of this is
// to allow the echo controller to deactivate some of the processing when the
// resulting output is anyway not used, for instance when the endpoint is
// muted.
// TODO(b/177830919): Make pure virtual.
virtual void SetCaptureOutputUsage(bool capture_output_used) {}
// Returns wheter the signal is altered.
virtual bool ActiveProcessing() const = 0;
virtual ~EchoControl() {}
};
// Interface for a factory that creates EchoControllers.
class EchoControlFactory {
public:
virtual std::unique_ptr<EchoControl> Create(int sample_rate_hz,
int num_render_channels,
int num_capture_channels) = 0;
virtual ~EchoControlFactory() = default;
};
} // namespace webrtc
#endif // API_AUDIO_ECHO_CONTROL_H_

View File

@ -0,0 +1,31 @@
/*
* Copyright 2023 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_LOCATION_H_
#define API_LOCATION_H_
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
// Location provides basic info where of an object was constructed, or was
// significantly brought to life. This is a stripped down version of
// https://source.chromium.org/chromium/chromium/src/+/main:base/location.h
// that only specifies an interface compatible to how base::Location is
// supposed to be used.
// The declaration is overriden inside the Chromium build.
class RTC_EXPORT Location {
public:
static Location Current() { return Location(); }
};
} // namespace webrtc
#endif // API_LOCATION_H_

View File

@ -0,0 +1,67 @@
/*
* Copyright 2011 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_REF_COUNT_H_
#define API_REF_COUNT_H_
namespace webrtc {
// Refcounted objects should implement the following informal interface:
//
// void AddRef() const ;
// RefCountReleaseStatus Release() const;
//
// You may access members of a reference-counted object, including the AddRef()
// and Release() methods, only if you already own a reference to it, or if
// you're borrowing someone else's reference. (A newly created object is a
// special case: the reference count is zero on construction, and the code that
// creates the object should immediately call AddRef(), bringing the reference
// count from zero to one, e.g., by constructing an rtc::scoped_refptr).
//
// AddRef() creates a new reference to the object.
//
// Release() releases a reference to the object; the caller now has one less
// reference than before the call. Returns kDroppedLastRef if the number of
// references dropped to zero because of this (in which case the object destroys
// itself). Otherwise, returns kOtherRefsRemained, to signal that at the precise
// time the caller's reference was dropped, other references still remained (but
// if other threads own references, this may of course have changed by the time
// Release() returns).
//
// The caller of Release() must treat it in the same way as a delete operation:
// Regardless of the return value from Release(), the caller mustn't access the
// object. The object might still be alive, due to references held by other
// users of the object, but the object can go away at any time, e.g., as the
// result of another thread calling Release().
//
// Calling AddRef() and Release() manually is discouraged. It's recommended to
// use rtc::scoped_refptr to manage all pointers to reference counted objects.
// Note that rtc::scoped_refptr depends on compile-time duck-typing; formally
// implementing the below RefCountInterface is not required.
enum class RefCountReleaseStatus { kDroppedLastRef, kOtherRefsRemained };
// Interfaces where refcounting is part of the public api should
// inherit this abstract interface. The implementation of these
// methods is usually provided by the RefCountedObject template class,
// applied as a leaf in the inheritance tree.
class RefCountInterface {
public:
virtual void AddRef() const = 0;
virtual RefCountReleaseStatus Release() const = 0;
// Non-public destructor, because Release() has exclusive responsibility for
// destroying the object.
protected:
virtual ~RefCountInterface() {}
};
} // namespace webrtc
#endif // API_REF_COUNT_H_

View File

@ -0,0 +1,227 @@
/*
* Copyright 2011 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Originally these classes are from Chromium.
// http://src.chromium.org/viewvc/chrome/trunk/src/base/memory/ref_counted.h?view=markup
//
// A smart pointer class for reference counted objects. Use this class instead
// of calling AddRef and Release manually on a reference counted object to
// avoid common memory leaks caused by forgetting to Release an object
// reference. Sample usage:
//
// class MyFoo : public RefCounted<MyFoo> {
// ...
// };
//
// void some_function() {
// scoped_refptr<MyFoo> foo = new MyFoo();
// foo->Method(param);
// // `foo` is released when this function returns
// }
//
// void some_other_function() {
// scoped_refptr<MyFoo> foo = new MyFoo();
// ...
// foo = nullptr; // explicitly releases `foo`
// ...
// if (foo)
// foo->Method(param);
// }
//
// The above examples show how scoped_refptr<T> acts like a pointer to T.
// Given two scoped_refptr<T> classes, it is also possible to exchange
// references between the two objects, like so:
//
// {
// scoped_refptr<MyFoo> a = new MyFoo();
// scoped_refptr<MyFoo> b;
//
// b.swap(a);
// // now, `b` references the MyFoo object, and `a` references null.
// }
//
// To make both `a` and `b` in the above example reference the same MyFoo
// object, simply use the assignment operator:
//
// {
// scoped_refptr<MyFoo> a = new MyFoo();
// scoped_refptr<MyFoo> b;
//
// b = a;
// // now, `a` and `b` each own a reference to the same MyFoo object.
// }
//
#ifndef API_SCOPED_REFPTR_H_
#define API_SCOPED_REFPTR_H_
#include <cstddef>
#include <utility>
namespace webrtc {
template <class T>
class scoped_refptr {
public:
typedef T element_type;
scoped_refptr() : ptr_(nullptr) {}
scoped_refptr(std::nullptr_t) : ptr_(nullptr) {} // NOLINT(runtime/explicit)
explicit scoped_refptr(T* p) : ptr_(p) {
if (ptr_)
ptr_->AddRef();
}
scoped_refptr(const scoped_refptr<T>& r) : ptr_(r.ptr_) {
if (ptr_)
ptr_->AddRef();
}
template <typename U>
scoped_refptr(const scoped_refptr<U>& r) : ptr_(r.get()) {
if (ptr_)
ptr_->AddRef();
}
// Move constructors.
scoped_refptr(scoped_refptr<T>&& r) noexcept : ptr_(r.release()) {}
template <typename U>
scoped_refptr(scoped_refptr<U>&& r) noexcept : ptr_(r.release()) {}
~scoped_refptr() {
if (ptr_)
ptr_->Release();
}
T* get() const { return ptr_; }
explicit operator bool() const { return ptr_ != nullptr; }
T& operator*() const { return *ptr_; }
T* operator->() const { return ptr_; }
// Returns the (possibly null) raw pointer, and makes the scoped_refptr hold a
// null pointer, all without touching the reference count of the underlying
// pointed-to object. The object is still reference counted, and the caller of
// release() is now the proud owner of one reference, so it is responsible for
// calling Release() once on the object when no longer using it.
T* release() {
T* retVal = ptr_;
ptr_ = nullptr;
return retVal;
}
scoped_refptr<T>& operator=(T* p) {
// AddRef first so that self assignment should work
if (p)
p->AddRef();
if (ptr_)
ptr_->Release();
ptr_ = p;
return *this;
}
scoped_refptr<T>& operator=(const scoped_refptr<T>& r) {
return *this = r.ptr_;
}
template <typename U>
scoped_refptr<T>& operator=(const scoped_refptr<U>& r) {
return *this = r.get();
}
scoped_refptr<T>& operator=(scoped_refptr<T>&& r) noexcept {
scoped_refptr<T>(std::move(r)).swap(*this);
return *this;
}
template <typename U>
scoped_refptr<T>& operator=(scoped_refptr<U>&& r) noexcept {
scoped_refptr<T>(std::move(r)).swap(*this);
return *this;
}
void swap(T** pp) noexcept {
T* p = ptr_;
ptr_ = *pp;
*pp = p;
}
void swap(scoped_refptr<T>& r) noexcept { swap(&r.ptr_); }
protected:
T* ptr_;
};
template <typename T, typename U>
bool operator==(const scoped_refptr<T>& a, const scoped_refptr<U>& b) {
return a.get() == b.get();
}
template <typename T, typename U>
bool operator!=(const scoped_refptr<T>& a, const scoped_refptr<U>& b) {
return !(a == b);
}
template <typename T>
bool operator==(const scoped_refptr<T>& a, std::nullptr_t) {
return a.get() == nullptr;
}
template <typename T>
bool operator!=(const scoped_refptr<T>& a, std::nullptr_t) {
return !(a == nullptr);
}
template <typename T>
bool operator==(std::nullptr_t, const scoped_refptr<T>& a) {
return a.get() == nullptr;
}
template <typename T>
bool operator!=(std::nullptr_t, const scoped_refptr<T>& a) {
return !(a == nullptr);
}
// Comparison with raw pointer.
template <typename T, typename U>
bool operator==(const scoped_refptr<T>& a, const U* b) {
return a.get() == b;
}
template <typename T, typename U>
bool operator!=(const scoped_refptr<T>& a, const U* b) {
return !(a == b);
}
template <typename T, typename U>
bool operator==(const T* a, const scoped_refptr<U>& b) {
return a == b.get();
}
template <typename T, typename U>
bool operator!=(const T* a, const scoped_refptr<U>& b) {
return !(a == b);
}
// Ordered comparison, needed for use as a std::map key.
template <typename T, typename U>
bool operator<(const scoped_refptr<T>& a, const scoped_refptr<U>& b) {
return a.get() < b.get();
}
} // namespace webrtc
namespace rtc {
// Backwards compatible alias.
// TODO(bugs.webrtc.org/15622): Deprecate and remove.
template <typename T>
using scoped_refptr = webrtc::scoped_refptr<T>;
} // namespace rtc
#endif // API_SCOPED_REFPTR_H_

View File

@ -0,0 +1,78 @@
/*
* Copyright 2019 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/task_queue/task_queue_base.h"
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#if defined(ABSL_HAVE_THREAD_LOCAL)
namespace webrtc {
namespace {
ABSL_CONST_INIT thread_local TaskQueueBase* current = nullptr;
} // namespace
TaskQueueBase* TaskQueueBase::Current() {
return current;
}
TaskQueueBase::CurrentTaskQueueSetter::CurrentTaskQueueSetter(
TaskQueueBase* task_queue)
: previous_(current) {
current = task_queue;
}
TaskQueueBase::CurrentTaskQueueSetter::~CurrentTaskQueueSetter() {
current = previous_;
}
} // namespace webrtc
#elif defined(WEBRTC_POSIX)
#include <pthread.h>
namespace webrtc {
namespace {
ABSL_CONST_INIT pthread_key_t g_queue_ptr_tls = 0;
void InitializeTls() {
RTC_CHECK(pthread_key_create(&g_queue_ptr_tls, nullptr) == 0);
}
pthread_key_t GetQueuePtrTls() {
static pthread_once_t init_once = PTHREAD_ONCE_INIT;
RTC_CHECK(pthread_once(&init_once, &InitializeTls) == 0);
return g_queue_ptr_tls;
}
} // namespace
TaskQueueBase* TaskQueueBase::Current() {
return static_cast<TaskQueueBase*>(pthread_getspecific(GetQueuePtrTls()));
}
TaskQueueBase::CurrentTaskQueueSetter::CurrentTaskQueueSetter(
TaskQueueBase* task_queue)
: previous_(TaskQueueBase::Current()) {
pthread_setspecific(GetQueuePtrTls(), task_queue);
}
TaskQueueBase::CurrentTaskQueueSetter::~CurrentTaskQueueSetter() {
pthread_setspecific(GetQueuePtrTls(), previous_);
}
} // namespace webrtc
#else
#error Unsupported platform
#endif

View File

@ -0,0 +1,197 @@
/*
* Copyright 2019 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_TASK_QUEUE_TASK_QUEUE_BASE_H_
#define API_TASK_QUEUE_TASK_QUEUE_BASE_H_
#include <utility>
#include "absl/functional/any_invocable.h"
#include "api/location.h"
#include "api/units/time_delta.h"
#include "rtc_base/system/rtc_export.h"
#include "rtc_base/thread_annotations.h"
namespace webrtc {
// Asynchronously executes tasks in a way that guarantees that they're executed
// in FIFO order and that tasks never overlap. Tasks may always execute on the
// same worker thread and they may not. To DCHECK that tasks are executing on a
// known task queue, use IsCurrent().
class RTC_LOCKABLE RTC_EXPORT TaskQueueBase {
public:
enum class DelayPrecision {
// This may include up to a 17 ms leeway in addition to OS timer precision.
// See PostDelayedTask() for more information.
kLow,
// This does not have the additional delay that kLow has, but it is still
// limited by OS timer precision. See PostDelayedHighPrecisionTask() for
// more information.
kHigh,
};
// Starts destruction of the task queue.
// On return ensures no task are running and no new tasks are able to start
// on the task queue.
// Responsible for deallocation. Deallocation may happen synchronously during
// Delete or asynchronously after Delete returns.
// Code not running on the TaskQueue should not make any assumption when
// TaskQueue is deallocated and thus should not call any methods after Delete.
// Code running on the TaskQueue should not call Delete, but can assume
// TaskQueue still exists and may call other methods, e.g. PostTask.
// Should be called on the same task queue or thread that this task queue
// was created on.
virtual void Delete() = 0;
// Schedules a `task` to execute. Tasks are executed in FIFO order.
// When a TaskQueue is deleted, pending tasks will not be executed but they
// will be deleted.
//
// As long as tasks are not posted from task destruction, posted tasks are
// guaranteed to be destroyed with Current() pointing to the task queue they
// were posted to, whether they're executed or not. That means SequenceChecker
// works during task destruction, a fact that can be used to guarantee
// thread-compatible object deletion happening on a particular task queue
// which can simplify class design.
// Note that this guarantee does not apply to delayed tasks.
//
// May be called on any thread or task queue, including this task queue.
void PostTask(absl::AnyInvocable<void() &&> task,
const Location& location = Location::Current()) {
PostTaskImpl(std::move(task), PostTaskTraits{}, location);
}
// Prefer PostDelayedTask() over PostDelayedHighPrecisionTask() whenever
// possible.
//
// Schedules a `task` to execute a specified `delay` from when the call is
// made, using "low" precision. All scheduling is affected by OS-specific
// leeway and current workloads which means that in terms of precision there
// are no hard guarantees, but in addition to the OS induced leeway, "low"
// precision adds up to a 17 ms additional leeway. The purpose of this leeway
// is to achieve more efficient CPU scheduling and reduce Idle Wake Up
// frequency.
//
// The task may execute with [-1, 17 + OS induced leeway) ms additional delay.
//
// Avoid making assumptions about the precision of the OS scheduler. On macOS,
// the OS induced leeway may be 10% of sleep interval. On Windows, 1 ms
// precision timers may be used but there are cases, such as when running on
// battery, when the timer precision can be as poor as 15 ms.
//
// "Low" precision is not implemented everywhere yet. Where not yet
// implemented, PostDelayedTask() has "high" precision. See
// https://crbug.com/webrtc/13583 for more information.
//
// May be called on any thread or task queue, including this task queue.
void PostDelayedTask(absl::AnyInvocable<void() &&> task,
TimeDelta delay,
const Location& location = Location::Current()) {
PostDelayedTaskImpl(std::move(task), delay, PostDelayedTaskTraits{},
location);
}
// Prefer PostDelayedTask() over PostDelayedHighPrecisionTask() whenever
// possible.
//
// Schedules a `task` to execute a specified `delay` from when the call is
// made, using "high" precision. All scheduling is affected by OS-specific
// leeway and current workloads which means that in terms of precision there
// are no hard guarantees.
//
// The task may execute with [-1, OS induced leeway] ms additional delay.
//
// Avoid making assumptions about the precision of the OS scheduler. On macOS,
// the OS induced leeway may be 10% of sleep interval. On Windows, 1 ms
// precision timers may be used but there are cases, such as when running on
// battery, when the timer precision can be as poor as 15 ms.
//
// May be called on any thread or task queue, including this task queue.
void PostDelayedHighPrecisionTask(
absl::AnyInvocable<void() &&> task,
TimeDelta delay,
const Location& location = Location::Current()) {
PostDelayedTaskTraits traits;
traits.high_precision = true;
PostDelayedTaskImpl(std::move(task), delay, traits, location);
}
// As specified by `precision`, calls either PostDelayedTask() or
// PostDelayedHighPrecisionTask().
void PostDelayedTaskWithPrecision(
DelayPrecision precision,
absl::AnyInvocable<void() &&> task,
TimeDelta delay,
const Location& location = Location::Current()) {
switch (precision) {
case DelayPrecision::kLow:
PostDelayedTask(std::move(task), delay, location);
break;
case DelayPrecision::kHigh:
PostDelayedHighPrecisionTask(std::move(task), delay, location);
break;
}
}
// Returns the task queue that is running the current thread.
// Returns nullptr if this thread is not associated with any task queue.
// May be called on any thread or task queue, including this task queue.
static TaskQueueBase* Current();
bool IsCurrent() const { return Current() == this; }
protected:
// This is currently only present here to simplify introduction of future
// planned task queue changes.
struct PostTaskTraits {};
struct PostDelayedTaskTraits {
// If `high_precision` is false, tasks may execute within up to a 17 ms
// leeway in addition to OS timer precision. Otherwise the task should be
// limited to OS timer precision. See PostDelayedTask() and
// PostDelayedHighPrecisionTask() for more information.
bool high_precision = false;
};
class RTC_EXPORT CurrentTaskQueueSetter {
public:
explicit CurrentTaskQueueSetter(TaskQueueBase* task_queue);
CurrentTaskQueueSetter(const CurrentTaskQueueSetter&) = delete;
CurrentTaskQueueSetter& operator=(const CurrentTaskQueueSetter&) = delete;
~CurrentTaskQueueSetter();
private:
TaskQueueBase* const previous_;
};
// Subclasses should implement this method to support the behavior defined in
// the PostTask and PostTaskTraits docs above.
virtual void PostTaskImpl(absl::AnyInvocable<void() &&> task,
const PostTaskTraits& traits,
const Location& location) = 0;
// Subclasses should implement this method to support the behavior defined in
// the PostDelayedTask/PostHighPrecisionDelayedTask and PostDelayedTaskTraits
// docs above.
virtual void PostDelayedTaskImpl(absl::AnyInvocable<void() &&> task,
TimeDelta delay,
const PostDelayedTaskTraits& traits,
const Location& location) = 0;
// Users of the TaskQueue should call Delete instead of directly deleting
// this object.
virtual ~TaskQueueBase() = default;
};
struct TaskQueueDeleter {
void operator()(TaskQueueBase* task_queue) const { task_queue->Delete(); }
};
} // namespace webrtc
#endif // API_TASK_QUEUE_TASK_QUEUE_BASE_H_

View File

@ -0,0 +1,38 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/units/time_delta.h"
#include <string>
#include "api/array_view.h"
#include "rtc_base/strings/string_builder.h"
namespace webrtc {
std::string ToString(TimeDelta value) {
char buf[64];
rtc::SimpleStringBuilder sb(buf);
if (value.IsPlusInfinity()) {
sb << "+inf ms";
} else if (value.IsMinusInfinity()) {
sb << "-inf ms";
} else {
if (value.us() == 0 || (value.us() % 1000) != 0)
sb << value.us() << " us";
else if (value.ms() % 1000 != 0)
sb << value.ms() << " ms";
else
sb << value.seconds() << " s";
}
return sb.str();
}
} // namespace webrtc

View File

@ -0,0 +1,99 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_UNITS_TIME_DELTA_H_
#define API_UNITS_TIME_DELTA_H_
#include <cstdint>
#include <cstdlib>
#include <string>
#include <type_traits>
#include "rtc_base/units/unit_base.h" // IWYU pragma: export
namespace webrtc {
// TimeDelta represents the difference between two timestamps. Commonly this can
// be a duration. However since two Timestamps are not guaranteed to have the
// same epoch (they might come from different computers, making exact
// synchronisation infeasible), the duration covered by a TimeDelta can be
// undefined. To simplify usage, it can be constructed and converted to
// different units, specifically seconds (s), milliseconds (ms) and
// microseconds (us).
class TimeDelta final : public rtc_units_impl::RelativeUnit<TimeDelta> {
public:
template <typename T>
static constexpr TimeDelta Minutes(T value) {
static_assert(std::is_arithmetic<T>::value, "");
return Seconds(value * 60);
}
template <typename T>
static constexpr TimeDelta Seconds(T value) {
static_assert(std::is_arithmetic<T>::value, "");
return FromFraction(1'000'000, value);
}
template <typename T>
static constexpr TimeDelta Millis(T value) {
static_assert(std::is_arithmetic<T>::value, "");
return FromFraction(1'000, value);
}
template <typename T>
static constexpr TimeDelta Micros(T value) {
static_assert(std::is_arithmetic<T>::value, "");
return FromValue(value);
}
TimeDelta() = delete;
template <typename T = int64_t>
constexpr T seconds() const {
return ToFraction<1000000, T>();
}
template <typename T = int64_t>
constexpr T ms() const {
return ToFraction<1000, T>();
}
template <typename T = int64_t>
constexpr T us() const {
return ToValue<T>();
}
template <typename T = int64_t>
constexpr T ns() const {
return ToMultiple<1000, T>();
}
constexpr int64_t seconds_or(int64_t fallback_value) const {
return ToFractionOr<1000000>(fallback_value);
}
constexpr int64_t ms_or(int64_t fallback_value) const {
return ToFractionOr<1000>(fallback_value);
}
constexpr int64_t us_or(int64_t fallback_value) const {
return ToValueOr(fallback_value);
}
constexpr TimeDelta Abs() const {
return us() < 0 ? TimeDelta::Micros(-us()) : *this;
}
private:
friend class rtc_units_impl::UnitBase<TimeDelta>;
using RelativeUnit::RelativeUnit;
static constexpr bool one_sided = false;
};
std::string ToString(TimeDelta value);
inline std::string ToLogString(TimeDelta value) {
return ToString(value);
}
} // namespace webrtc
#endif // API_UNITS_TIME_DELTA_H_

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/units/timestamp.h"
#include <string>
#include "api/array_view.h"
#include "rtc_base/strings/string_builder.h"
namespace webrtc {
std::string ToString(Timestamp value) {
char buf[64];
rtc::SimpleStringBuilder sb(buf);
if (value.IsPlusInfinity()) {
sb << "+inf ms";
} else if (value.IsMinusInfinity()) {
sb << "-inf ms";
} else {
if (value.us() == 0 || (value.us() % 1000) != 0)
sb << value.us() << " us";
else if (value.ms() % 1000 != 0)
sb << value.ms() << " ms";
else
sb << value.seconds() << " s";
}
return sb.str();
}
} // namespace webrtc

View File

@ -0,0 +1,128 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_UNITS_TIMESTAMP_H_
#define API_UNITS_TIMESTAMP_H_
#include <cstdint>
#include <string>
#include <type_traits>
#include "api/units/time_delta.h"
#include "rtc_base/checks.h"
#include "rtc_base/units/unit_base.h" // IWYU pragma: export
namespace webrtc {
// Timestamp represents the time that has passed since some unspecified epoch.
// The epoch is assumed to be before any represented timestamps, this means that
// negative values are not valid. The most notable feature is that the
// difference of two Timestamps results in a TimeDelta.
class Timestamp final : public rtc_units_impl::UnitBase<Timestamp> {
public:
template <typename T>
static constexpr Timestamp Seconds(T value) {
static_assert(std::is_arithmetic<T>::value, "");
return FromFraction(1'000'000, value);
}
template <typename T>
static constexpr Timestamp Millis(T value) {
static_assert(std::is_arithmetic<T>::value, "");
return FromFraction(1'000, value);
}
template <typename T>
static constexpr Timestamp Micros(T value) {
static_assert(std::is_arithmetic<T>::value, "");
return FromValue(value);
}
Timestamp() = delete;
template <typename T = int64_t>
constexpr T seconds() const {
return ToFraction<1000000, T>();
}
template <typename T = int64_t>
constexpr T ms() const {
return ToFraction<1000, T>();
}
template <typename T = int64_t>
constexpr T us() const {
return ToValue<T>();
}
constexpr int64_t seconds_or(int64_t fallback_value) const {
return ToFractionOr<1000000>(fallback_value);
}
constexpr int64_t ms_or(int64_t fallback_value) const {
return ToFractionOr<1000>(fallback_value);
}
constexpr int64_t us_or(int64_t fallback_value) const {
return ToValueOr(fallback_value);
}
constexpr Timestamp operator+(const TimeDelta delta) const {
if (IsPlusInfinity() || delta.IsPlusInfinity()) {
RTC_DCHECK(!IsMinusInfinity());
RTC_DCHECK(!delta.IsMinusInfinity());
return PlusInfinity();
} else if (IsMinusInfinity() || delta.IsMinusInfinity()) {
RTC_DCHECK(!IsPlusInfinity());
RTC_DCHECK(!delta.IsPlusInfinity());
return MinusInfinity();
}
return Timestamp::Micros(us() + delta.us());
}
constexpr Timestamp operator-(const TimeDelta delta) const {
if (IsPlusInfinity() || delta.IsMinusInfinity()) {
RTC_DCHECK(!IsMinusInfinity());
RTC_DCHECK(!delta.IsPlusInfinity());
return PlusInfinity();
} else if (IsMinusInfinity() || delta.IsPlusInfinity()) {
RTC_DCHECK(!IsPlusInfinity());
RTC_DCHECK(!delta.IsMinusInfinity());
return MinusInfinity();
}
return Timestamp::Micros(us() - delta.us());
}
constexpr TimeDelta operator-(const Timestamp other) const {
if (IsPlusInfinity() || other.IsMinusInfinity()) {
RTC_DCHECK(!IsMinusInfinity());
RTC_DCHECK(!other.IsPlusInfinity());
return TimeDelta::PlusInfinity();
} else if (IsMinusInfinity() || other.IsPlusInfinity()) {
RTC_DCHECK(!IsPlusInfinity());
RTC_DCHECK(!other.IsMinusInfinity());
return TimeDelta::MinusInfinity();
}
return TimeDelta::Micros(us() - other.us());
}
constexpr Timestamp& operator-=(const TimeDelta delta) {
*this = *this - delta;
return *this;
}
constexpr Timestamp& operator+=(const TimeDelta delta) {
*this = *this + delta;
return *this;
}
private:
friend class rtc_units_impl::UnitBase<Timestamp>;
using UnitBase::UnitBase;
static constexpr bool one_sided = true;
};
std::string ToString(Timestamp value);
inline std::string ToLogString(Timestamp value) {
return ToString(value);
}
} // namespace webrtc
#endif // API_UNITS_TIMESTAMP_H_

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/channel_buffer.h"
#include <cstdint>
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
namespace webrtc {
IFChannelBuffer::IFChannelBuffer(size_t num_frames,
size_t num_channels,
size_t num_bands)
: ivalid_(true),
ibuf_(num_frames, num_channels, num_bands),
fvalid_(true),
fbuf_(num_frames, num_channels, num_bands) {}
IFChannelBuffer::~IFChannelBuffer() = default;
ChannelBuffer<int16_t>* IFChannelBuffer::ibuf() {
RefreshI();
fvalid_ = false;
return &ibuf_;
}
ChannelBuffer<float>* IFChannelBuffer::fbuf() {
RefreshF();
ivalid_ = false;
return &fbuf_;
}
const ChannelBuffer<int16_t>* IFChannelBuffer::ibuf_const() const {
RefreshI();
return &ibuf_;
}
const ChannelBuffer<float>* IFChannelBuffer::fbuf_const() const {
RefreshF();
return &fbuf_;
}
void IFChannelBuffer::RefreshF() const {
if (!fvalid_) {
RTC_DCHECK(ivalid_);
fbuf_.set_num_channels(ibuf_.num_channels());
const int16_t* const* int_channels = ibuf_.channels();
float* const* float_channels = fbuf_.channels();
for (size_t i = 0; i < ibuf_.num_channels(); ++i) {
for (size_t j = 0; j < ibuf_.num_frames(); ++j) {
float_channels[i][j] = int_channels[i][j];
}
}
fvalid_ = true;
}
}
void IFChannelBuffer::RefreshI() const {
if (!ivalid_) {
RTC_DCHECK(fvalid_);
int16_t* const* int_channels = ibuf_.channels();
ibuf_.set_num_channels(fbuf_.num_channels());
const float* const* float_channels = fbuf_.channels();
for (size_t i = 0; i < fbuf_.num_channels(); ++i) {
FloatS16ToS16(float_channels[i], ibuf_.num_frames(), int_channels[i]);
}
ivalid_ = true;
}
}
} // namespace webrtc

View File

@ -0,0 +1,253 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_CHANNEL_BUFFER_H_
#define COMMON_AUDIO_CHANNEL_BUFFER_H_
#include <string.h>
#include <memory>
#include <vector>
#include "api/array_view.h"
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
#include "rtc_base/gtest_prod_util.h"
namespace webrtc {
// TODO: b/335805780 - Remove this method. Instead, use Deinterleave() from
// audio_util.h which requires size checked buffer views.
template <typename T>
void Deinterleave(const T* interleaved,
size_t samples_per_channel,
size_t num_channels,
T* const* deinterleaved) {
for (size_t i = 0; i < num_channels; ++i) {
T* channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < samples_per_channel; ++j) {
channel[j] = interleaved[interleaved_idx];
interleaved_idx += num_channels;
}
}
}
// `Interleave()` variant for cases where the deinterleaved channels aren't
// represented by a `DeinterleavedView`.
// TODO: b/335805780 - Remove this method. Instead, use Deinterleave() from
// audio_util.h which requires size checked buffer views.
template <typename T>
void Interleave(const T* const* deinterleaved,
size_t samples_per_channel,
size_t num_channels,
InterleavedView<T>& interleaved) {
RTC_DCHECK_EQ(NumChannels(interleaved), num_channels);
RTC_DCHECK_EQ(SamplesPerChannel(interleaved), samples_per_channel);
for (size_t i = 0; i < num_channels; ++i) {
const T* channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < samples_per_channel; ++j) {
interleaved[interleaved_idx] = channel[j];
interleaved_idx += num_channels;
}
}
}
// Helper to encapsulate a contiguous data buffer, full or split into frequency
// bands, with access to a pointer arrays of the deinterleaved channels and
// bands. The buffer is zero initialized at creation.
//
// The buffer structure is showed below for a 2 channel and 2 bands case:
//
// `data_`:
// { [ --- b1ch1 --- ] [ --- b2ch1 --- ] [ --- b1ch2 --- ] [ --- b2ch2 --- ] }
//
// The pointer arrays for the same example are as follows:
//
// `channels_`:
// { [ b1ch1* ] [ b1ch2* ] [ b2ch1* ] [ b2ch2* ] }
//
// `bands_`:
// { [ b1ch1* ] [ b2ch1* ] [ b1ch2* ] [ b2ch2* ] }
template <typename T>
class ChannelBuffer {
public:
ChannelBuffer(size_t num_frames, size_t num_channels, size_t num_bands = 1)
: data_(new T[num_frames * num_channels]()),
channels_(new T*[num_channels * num_bands]),
bands_(new T*[num_channels * num_bands]),
num_frames_(num_frames),
num_frames_per_band_(num_frames / num_bands),
num_allocated_channels_(num_channels),
num_channels_(num_channels),
num_bands_(num_bands),
bands_view_(num_allocated_channels_,
std::vector<rtc::ArrayView<T>>(num_bands_)),
channels_view_(
num_bands_,
std::vector<rtc::ArrayView<T>>(num_allocated_channels_)) {
// Temporarily cast away const_ness to allow populating the array views.
auto* bands_view =
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(&bands_view_);
auto* channels_view =
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(
&channels_view_);
for (size_t ch = 0; ch < num_allocated_channels_; ++ch) {
for (size_t band = 0; band < num_bands_; ++band) {
(*channels_view)[band][ch] = rtc::ArrayView<T>(
&data_[ch * num_frames_ + band * num_frames_per_band_],
num_frames_per_band_);
(*bands_view)[ch][band] = channels_view_[band][ch];
channels_[band * num_allocated_channels_ + ch] =
channels_view_[band][ch].data();
bands_[ch * num_bands_ + band] =
channels_[band * num_allocated_channels_ + ch];
}
}
}
// Returns a pointer array to the channels.
// If band is explicitly specificed, the channels for a specific band are
// returned and the usage becomes: channels(band)[channel][sample].
// Where:
// 0 <= band < `num_bands_`
// 0 <= channel < `num_allocated_channels_`
// 0 <= sample < `num_frames_per_band_`
// If band is not explicitly specified, the full-band channels (or lower band
// channels) are returned and the usage becomes: channels()[channel][sample].
// Where:
// 0 <= channel < `num_allocated_channels_`
// 0 <= sample < `num_frames_`
const T* const* channels(size_t band = 0) const {
RTC_DCHECK_LT(band, num_bands_);
return &channels_[band * num_allocated_channels_];
}
T* const* channels(size_t band = 0) {
const ChannelBuffer<T>* t = this;
return const_cast<T* const*>(t->channels(band));
}
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) {
return channels_view_[band];
}
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) const {
return channels_view_[band];
}
// Returns a pointer array to the bands for a specific channel.
// Usage:
// bands(channel)[band][sample].
// Where:
// 0 <= channel < `num_channels_`
// 0 <= band < `num_bands_`
// 0 <= sample < `num_frames_per_band_`
const T* const* bands(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
RTC_DCHECK_GE(channel, 0);
return &bands_[channel * num_bands_];
}
T* const* bands(size_t channel) {
const ChannelBuffer<T>* t = this;
return const_cast<T* const*>(t->bands(channel));
}
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) {
return bands_view_[channel];
}
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) const {
return bands_view_[channel];
}
// Sets the `slice` pointers to the `start_frame` position for each channel.
// Returns `slice` for convenience.
const T* const* Slice(T** slice, size_t start_frame) const {
RTC_DCHECK_LT(start_frame, num_frames_);
for (size_t i = 0; i < num_channels_; ++i)
slice[i] = &channels_[i][start_frame];
return slice;
}
T** Slice(T** slice, size_t start_frame) {
const ChannelBuffer<T>* t = this;
return const_cast<T**>(t->Slice(slice, start_frame));
}
size_t num_frames() const { return num_frames_; }
size_t num_frames_per_band() const { return num_frames_per_band_; }
size_t num_channels() const { return num_channels_; }
size_t num_bands() const { return num_bands_; }
size_t size() const { return num_frames_ * num_allocated_channels_; }
void set_num_channels(size_t num_channels) {
RTC_DCHECK_LE(num_channels, num_allocated_channels_);
num_channels_ = num_channels;
}
void SetDataForTesting(const T* data, size_t size) {
RTC_CHECK_EQ(size, this->size());
memcpy(data_.get(), data, size * sizeof(*data));
}
private:
std::unique_ptr<T[]> data_;
std::unique_ptr<T*[]> channels_;
std::unique_ptr<T*[]> bands_;
const size_t num_frames_;
const size_t num_frames_per_band_;
// Number of channels the internal buffer holds.
const size_t num_allocated_channels_;
// Number of channels the user sees.
size_t num_channels_;
const size_t num_bands_;
const std::vector<std::vector<rtc::ArrayView<T>>> bands_view_;
const std::vector<std::vector<rtc::ArrayView<T>>> channels_view_;
};
// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
// broken when someone requests write access to either ChannelBuffer, and
// reestablished when someone requests the outdated ChannelBuffer. It is
// therefore safe to use the return value of ibuf_const() and fbuf_const()
// until the next call to ibuf() or fbuf(), and the return value of ibuf() and
// fbuf() until the next call to any of the other functions.
class IFChannelBuffer {
public:
IFChannelBuffer(size_t num_frames, size_t num_channels, size_t num_bands = 1);
~IFChannelBuffer();
ChannelBuffer<int16_t>* ibuf();
ChannelBuffer<float>* fbuf();
const ChannelBuffer<int16_t>* ibuf_const() const;
const ChannelBuffer<float>* fbuf_const() const;
size_t num_frames() const { return ibuf_.num_frames(); }
size_t num_frames_per_band() const { return ibuf_.num_frames_per_band(); }
size_t num_channels() const {
return ivalid_ ? ibuf_.num_channels() : fbuf_.num_channels();
}
void set_num_channels(size_t num_channels) {
ibuf_.set_num_channels(num_channels);
fbuf_.set_num_channels(num_channels);
}
size_t num_bands() const { return ibuf_.num_bands(); }
private:
void RefreshF() const;
void RefreshI() const;
mutable bool ivalid_;
mutable ChannelBuffer<int16_t> ibuf_;
mutable bool fvalid_;
mutable ChannelBuffer<float> fbuf_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_CHANNEL_BUFFER_H_

View File

@ -0,0 +1,204 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
#define COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
#include <stdint.h>
#include <algorithm>
#include <cmath>
#include <cstring>
#include <limits>
#include "api/audio/audio_view.h"
#include "rtc_base/checks.h"
namespace webrtc {
typedef std::numeric_limits<int16_t> limits_int16;
// TODO(tommi, peah): Move these constants to their own header, e.g.
// `audio_constants.h`. Also consider if they should be in api/.
// Absolute highest acceptable sample rate supported for audio processing,
// capture and codecs. Note that for some components some cases a lower limit
// applies which typically is 48000 but in some cases is lower.
constexpr int kMaxSampleRateHz = 384000;
// Number of samples per channel for 10ms of audio at the highest sample rate.
constexpr size_t kMaxSamplesPerChannel10ms = kMaxSampleRateHz / 100u;
// The conversion functions use the following naming convention:
// S16: int16_t [-32768, 32767]
// Float: float [-1.0, 1.0]
// FloatS16: float [-32768.0, 32768.0]
// Dbfs: float [-20.0*log(10, 32768), 0] = [-90.3, 0]
// The ratio conversion functions use this naming convention:
// Ratio: float (0, +inf)
// Db: float (-inf, +inf)
static inline float S16ToFloat(int16_t v) {
constexpr float kScaling = 1.f / 32768.f;
return v * kScaling;
}
static inline int16_t FloatS16ToS16(float v) {
v = std::min(v, 32767.f);
v = std::max(v, -32768.f);
return static_cast<int16_t>(v + std::copysign(0.5f, v));
}
static inline int16_t FloatToS16(float v) {
v *= 32768.f;
v = std::min(v, 32767.f);
v = std::max(v, -32768.f);
return static_cast<int16_t>(v + std::copysign(0.5f, v));
}
static inline float FloatToFloatS16(float v) {
v = std::min(v, 1.f);
v = std::max(v, -1.f);
return v * 32768.f;
}
static inline float FloatS16ToFloat(float v) {
v = std::min(v, 32768.f);
v = std::max(v, -32768.f);
constexpr float kScaling = 1.f / 32768.f;
return v * kScaling;
}
void FloatToS16(const float* src, size_t size, int16_t* dest);
void S16ToFloat(const int16_t* src, size_t size, float* dest);
void S16ToFloatS16(const int16_t* src, size_t size, float* dest);
void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
void FloatToFloatS16(const float* src, size_t size, float* dest);
void FloatS16ToFloat(const float* src, size_t size, float* dest);
inline float DbToRatio(float v) {
return std::pow(10.0f, v / 20.0f);
}
inline float DbfsToFloatS16(float v) {
static constexpr float kMaximumAbsFloatS16 = -limits_int16::min();
return DbToRatio(v) * kMaximumAbsFloatS16;
}
inline float FloatS16ToDbfs(float v) {
RTC_DCHECK_GE(v, 0);
// kMinDbfs is equal to -20.0 * log10(-limits_int16::min())
static constexpr float kMinDbfs = -90.30899869919436f;
if (v <= 1.0f) {
return kMinDbfs;
}
// Equal to 20 * log10(v / (-limits_int16::min()))
return 20.0f * std::log10(v) + kMinDbfs;
}
// Copy audio from `src` channels to `dest` channels unless `src` and `dest`
// point to the same address. `src` and `dest` must have the same number of
// channels, and there must be sufficient space allocated in `dest`.
// TODO: b/335805780 - Accept ArrayView.
template <typename T>
void CopyAudioIfNeeded(const T* const* src,
int num_frames,
int num_channels,
T* const* dest) {
for (int i = 0; i < num_channels; ++i) {
if (src[i] != dest[i]) {
std::copy(src[i], src[i] + num_frames, dest[i]);
}
}
}
// Deinterleave audio from `interleaved` to the channel buffers pointed to
// by `deinterleaved`. There must be sufficient space allocated in the
// `deinterleaved` buffers (`num_channel` buffers with `samples_per_channel`
// per buffer).
template <typename T>
void Deinterleave(const InterleavedView<const T>& interleaved,
const DeinterleavedView<T>& deinterleaved) {
RTC_DCHECK_EQ(NumChannels(interleaved), NumChannels(deinterleaved));
RTC_DCHECK_EQ(SamplesPerChannel(interleaved),
SamplesPerChannel(deinterleaved));
const auto num_channels = NumChannels(interleaved);
const auto samples_per_channel = SamplesPerChannel(interleaved);
for (size_t i = 0; i < num_channels; ++i) {
MonoView<T> channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < samples_per_channel; ++j) {
channel[j] = interleaved[interleaved_idx];
interleaved_idx += num_channels;
}
}
}
// Interleave audio from the channel buffers pointed to by `deinterleaved` to
// `interleaved`. There must be sufficient space allocated in `interleaved`
// (`samples_per_channel` * `num_channels`).
template <typename T>
void Interleave(const DeinterleavedView<const T>& deinterleaved,
const InterleavedView<T>& interleaved) {
RTC_DCHECK_EQ(NumChannels(interleaved), NumChannels(deinterleaved));
RTC_DCHECK_EQ(SamplesPerChannel(interleaved),
SamplesPerChannel(deinterleaved));
for (size_t i = 0; i < deinterleaved.num_channels(); ++i) {
const auto channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < deinterleaved.samples_per_channel(); ++j) {
interleaved[interleaved_idx] = channel[j];
interleaved_idx += deinterleaved.num_channels();
}
}
}
// Downmixes an interleaved multichannel signal to a single channel by averaging
// all channels.
// TODO: b/335805780 - Accept InterleavedView and DeinterleavedView.
template <typename T, typename Intermediate>
void DownmixInterleavedToMonoImpl(const T* interleaved,
size_t num_frames,
int num_channels,
T* deinterleaved) {
RTC_DCHECK_GT(num_channels, 0);
RTC_DCHECK_GT(num_frames, 0);
const T* const end = interleaved + num_frames * num_channels;
while (interleaved < end) {
const T* const frame_end = interleaved + num_channels;
Intermediate value = *interleaved++;
while (interleaved < frame_end) {
value += *interleaved++;
}
*deinterleaved++ = value / num_channels;
}
}
// TODO: b/335805780 - Accept InterleavedView and DeinterleavedView.
template <typename T>
void DownmixInterleavedToMono(const T* interleaved,
size_t num_frames,
int num_channels,
T* deinterleaved);
// TODO: b/335805780 - Accept InterleavedView and DeinterleavedView.
template <>
void DownmixInterleavedToMono<int16_t>(const int16_t* interleaved,
size_t num_frames,
int num_channels,
int16_t* deinterleaved);
} // namespace webrtc
#endif // COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/resampler/push_sinc_resampler.h"
#include <cstring>
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
namespace webrtc {
PushSincResampler::PushSincResampler(size_t source_frames,
size_t destination_frames)
: resampler_(new SincResampler(source_frames * 1.0 / destination_frames,
source_frames,
this)),
source_ptr_(nullptr),
source_ptr_int_(nullptr),
destination_frames_(destination_frames),
first_pass_(true),
source_available_(0) {}
PushSincResampler::~PushSincResampler() {}
size_t PushSincResampler::Resample(const int16_t* source,
size_t source_length,
int16_t* destination,
size_t destination_capacity) {
if (!float_buffer_.get())
float_buffer_.reset(new float[destination_frames_]);
source_ptr_int_ = source;
// Pass nullptr as the float source to have Run() read from the int16 source.
Resample(nullptr, source_length, float_buffer_.get(), destination_frames_);
FloatS16ToS16(float_buffer_.get(), destination_frames_, destination);
source_ptr_int_ = nullptr;
return destination_frames_;
}
size_t PushSincResampler::Resample(const float* source,
size_t source_length,
float* destination,
size_t destination_capacity) {
RTC_CHECK_EQ(source_length, resampler_->request_frames());
RTC_CHECK_GE(destination_capacity, destination_frames_);
// Cache the source pointer. Calling Resample() will immediately trigger
// the Run() callback whereupon we provide the cached value.
source_ptr_ = source;
source_available_ = source_length;
// On the first pass, we call Resample() twice. During the first call, we
// provide dummy input and discard the output. This is done to prime the
// SincResampler buffer with the correct delay (half the kernel size), thereby
// ensuring that all later Resample() calls will only result in one input
// request through Run().
//
// If this wasn't done, SincResampler would call Run() twice on the first
// pass, and we'd have to introduce an entire `source_frames` of delay, rather
// than the minimum half kernel.
//
// It works out that ChunkSize() is exactly the amount of output we need to
// request in order to prime the buffer with a single Run() request for
// `source_frames`.
if (first_pass_)
resampler_->Resample(resampler_->ChunkSize(), destination);
resampler_->Resample(destination_frames_, destination);
source_ptr_ = nullptr;
return destination_frames_;
}
void PushSincResampler::Run(size_t frames, float* destination) {
// Ensure we are only asked for the available samples. This would fail if
// Run() was triggered more than once per Resample() call.
RTC_CHECK_EQ(source_available_, frames);
if (first_pass_) {
// Provide dummy input on the first pass, the output of which will be
// discarded, as described in Resample().
std::memset(destination, 0, frames * sizeof(*destination));
first_pass_ = false;
return;
}
if (source_ptr_) {
std::memcpy(destination, source_ptr_, frames * sizeof(*destination));
} else {
for (size_t i = 0; i < frames; ++i)
destination[i] = static_cast<float>(source_ptr_int_[i]);
}
source_available_ -= frames;
}
} // namespace webrtc

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_
#define COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "api/audio/audio_view.h"
#include "common_audio/resampler/sinc_resampler.h"
namespace webrtc {
// A thin wrapper over SincResampler to provide a push-based interface as
// required by WebRTC. SincResampler uses a pull-based interface, and will
// use SincResamplerCallback::Run() to request data upon a call to Resample().
// These Run() calls will happen on the same thread Resample() is called on.
class PushSincResampler : public SincResamplerCallback {
public:
// Provide the size of the source and destination blocks in samples. These
// must correspond to the same time duration (typically 10 ms) as the sample
// ratio is inferred from them.
PushSincResampler(size_t source_frames, size_t destination_frames);
~PushSincResampler() override;
PushSincResampler(const PushSincResampler&) = delete;
PushSincResampler& operator=(const PushSincResampler&) = delete;
// Perform the resampling. `source_frames` must always equal the
// `source_frames` provided at construction. `destination_capacity` must be
// at least as large as `destination_frames`. Returns the number of samples
// provided in destination (for convenience, since this will always be equal
// to `destination_frames`).
template <typename S, typename D>
size_t Resample(const MonoView<S>& source, const MonoView<D>& destination) {
return Resample(&source[0], SamplesPerChannel(source), &destination[0],
SamplesPerChannel(destination));
}
size_t Resample(const int16_t* source,
size_t source_frames,
int16_t* destination,
size_t destination_capacity);
size_t Resample(const float* source,
size_t source_frames,
float* destination,
size_t destination_capacity);
// Delay due to the filter kernel. Essentially, the time after which an input
// sample will appear in the resampled output.
static float AlgorithmicDelaySeconds(int source_rate_hz) {
return 1.f / source_rate_hz * SincResampler::kKernelSize / 2;
}
protected:
// Implements SincResamplerCallback.
void Run(size_t frames, float* destination) override;
private:
friend class PushSincResamplerTest;
SincResampler* get_resampler_for_testing() { return resampler_.get(); }
std::unique_ptr<SincResampler> resampler_;
std::unique_ptr<float[]> float_buffer_;
const float* source_ptr_;
const int16_t* source_ptr_int_;
const size_t destination_frames_;
// True on the first call to Resample(), to prime the SincResampler buffer.
bool first_pass_;
// Used to assert we are only requested for as much data as is available.
size_t source_available_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_

View File

@ -0,0 +1,366 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Modified from the Chromium original:
// src/media/base/sinc_resampler.cc
// Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_
// and r4_ will move after the first load):
//
// |----------------|-----------------------------------------|----------------|
//
// request_frames_
// <--------------------------------------------------------->
// r0_ (during first load)
//
// kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2
// <---------------> <---------------> <---------------> <--------------->
// r1_ r2_ r3_ r4_
//
// block_size_ == r4_ - r2_
// <--------------------------------------->
//
// request_frames_
// <------------------ ... ----------------->
// r0_ (during second load)
//
// On the second request r0_ slides to the right by kKernelSize / 2 and r3_, r4_
// and block_size_ are reinitialized via step (3) in the algorithm below.
//
// These new regions remain constant until a Flush() occurs. While complicated,
// this allows us to reduce jitter by always requesting the same amount from the
// provided callback.
//
// The algorithm:
//
// 1) Allocate input_buffer of size: request_frames_ + kKernelSize; this ensures
// there's enough room to read request_frames_ from the callback into region
// r0_ (which will move between the first and subsequent passes).
//
// 2) Let r1_, r2_ each represent half the kernel centered around r0_:
//
// r0_ = input_buffer_ + kKernelSize / 2
// r1_ = input_buffer_
// r2_ = r0_
//
// r0_ is always request_frames_ in size. r1_, r2_ are kKernelSize / 2 in
// size. r1_ must be zero initialized to avoid convolution with garbage (see
// step (5) for why).
//
// 3) Let r3_, r4_ each represent half the kernel right aligned with the end of
// r0_ and choose block_size_ as the distance in frames between r4_ and r2_:
//
// r3_ = r0_ + request_frames_ - kKernelSize
// r4_ = r0_ + request_frames_ - kKernelSize / 2
// block_size_ = r4_ - r2_ = request_frames_ - kKernelSize / 2
//
// 4) Consume request_frames_ frames into r0_.
//
// 5) Position kernel centered at start of r2_ and generate output frames until
// the kernel is centered at the start of r4_ or we've finished generating
// all the output frames.
//
// 6) Wrap left over data from the r3_ to r1_ and r4_ to r2_.
//
// 7) If we're on the second load, in order to avoid overwriting the frames we
// just wrapped from r4_ we need to slide r0_ to the right by the size of
// r4_, which is kKernelSize / 2:
//
// r0_ = r0_ + kKernelSize / 2 = input_buffer_ + kKernelSize
//
// r3_, r4_, and block_size_ then need to be reinitialized, so goto (3).
//
// 8) Else, if we're not on the second load, goto (4).
//
// Note: we're glossing over how the sub-sample handling works with
// `virtual_source_idx_`, etc.
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include "common_audio/resampler/sinc_resampler.h"
#include <math.h>
#include <stdint.h>
#include <string.h>
#include <limits>
#include "rtc_base/checks.h"
#include "rtc_base/system/arch.h"
#include "system_wrappers/include/cpu_features_wrapper.h" // kSSE2, WebRtc_G...
namespace webrtc {
namespace {
double SincScaleFactor(double io_ratio) {
// `sinc_scale_factor` is basically the normalized cutoff frequency of the
// low-pass filter.
double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0;
// The sinc function is an idealized brick-wall filter, but since we're
// windowing it the transition from pass to stop does not happen right away.
// So we should adjust the low pass filter cutoff slightly downward to avoid
// some aliasing at the very high-end.
// TODO(crogers): this value is empirical and to be more exact should vary
// depending on kKernelSize.
sinc_scale_factor *= 0.9;
return sinc_scale_factor;
}
} // namespace
const size_t SincResampler::kKernelSize;
// If we know the minimum architecture at compile time, avoid CPU detection.
void SincResampler::InitializeCPUSpecificFeatures() {
#if defined(WEBRTC_HAS_NEON)
convolve_proc_ = Convolve_NEON;
#elif defined(WEBRTC_ARCH_X86_FAMILY)
// Using AVX2 instead of SSE2 when AVX2/FMA3 supported.
if (GetCPUInfo(kAVX2) && GetCPUInfo(kFMA3))
convolve_proc_ = Convolve_AVX2;
else if (GetCPUInfo(kSSE2))
convolve_proc_ = Convolve_SSE;
else
convolve_proc_ = Convolve_C;
#else
// Unknown architecture.
convolve_proc_ = Convolve_C;
#endif
}
SincResampler::SincResampler(double io_sample_rate_ratio,
size_t request_frames,
SincResamplerCallback* read_cb)
: io_sample_rate_ratio_(io_sample_rate_ratio),
read_cb_(read_cb),
request_frames_(request_frames),
input_buffer_size_(request_frames_ + kKernelSize),
// Create input buffers with a 32-byte alignment for SIMD optimizations.
kernel_storage_(static_cast<float*>(
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
kernel_pre_sinc_storage_(static_cast<float*>(
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
kernel_window_storage_(static_cast<float*>(
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
input_buffer_(static_cast<float*>(
AlignedMalloc(sizeof(float) * input_buffer_size_, 32))),
convolve_proc_(nullptr),
r1_(input_buffer_.get()),
r2_(input_buffer_.get() + kKernelSize / 2) {
InitializeCPUSpecificFeatures();
RTC_DCHECK(convolve_proc_);
RTC_DCHECK_GT(request_frames_, 0);
Flush();
RTC_DCHECK_GT(block_size_, kKernelSize);
memset(kernel_storage_.get(), 0,
sizeof(*kernel_storage_.get()) * kKernelStorageSize);
memset(kernel_pre_sinc_storage_.get(), 0,
sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize);
memset(kernel_window_storage_.get(), 0,
sizeof(*kernel_window_storage_.get()) * kKernelStorageSize);
InitializeKernel();
}
SincResampler::~SincResampler() {}
void SincResampler::UpdateRegions(bool second_load) {
// Setup various region pointers in the buffer (see diagram above). If we're
// on the second load we need to slide r0_ to the right by kKernelSize / 2.
r0_ = input_buffer_.get() + (second_load ? kKernelSize : kKernelSize / 2);
r3_ = r0_ + request_frames_ - kKernelSize;
r4_ = r0_ + request_frames_ - kKernelSize / 2;
block_size_ = r4_ - r2_;
// r1_ at the beginning of the buffer.
RTC_DCHECK_EQ(r1_, input_buffer_.get());
// r1_ left of r2_, r4_ left of r3_ and size correct.
RTC_DCHECK_EQ(r2_ - r1_, r4_ - r3_);
// r2_ left of r3.
RTC_DCHECK_LT(r2_, r3_);
}
void SincResampler::InitializeKernel() {
// Blackman window parameters.
static const double kAlpha = 0.16;
static const double kA0 = 0.5 * (1.0 - kAlpha);
static const double kA1 = 0.5;
static const double kA2 = 0.5 * kAlpha;
// Generates a set of windowed sinc() kernels.
// We generate a range of sub-sample offsets from 0.0 to 1.0.
const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
const float subsample_offset =
static_cast<float>(offset_idx) / kKernelOffsetCount;
for (size_t i = 0; i < kKernelSize; ++i) {
const size_t idx = i + offset_idx * kKernelSize;
const float pre_sinc = static_cast<float>(
M_PI * (static_cast<int>(i) - static_cast<int>(kKernelSize / 2) -
subsample_offset));
kernel_pre_sinc_storage_[idx] = pre_sinc;
// Compute Blackman window, matching the offset of the sinc().
const float x = (i - subsample_offset) / kKernelSize;
const float window = static_cast<float>(kA0 - kA1 * cos(2.0 * M_PI * x) +
kA2 * cos(4.0 * M_PI * x));
kernel_window_storage_[idx] = window;
// Compute the sinc with offset, then window the sinc() function and store
// at the correct offset.
kernel_storage_[idx] = static_cast<float>(
window * ((pre_sinc == 0)
? sinc_scale_factor
: (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));
}
}
}
void SincResampler::SetRatio(double io_sample_rate_ratio) {
if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) <
std::numeric_limits<double>::epsilon()) {
return;
}
io_sample_rate_ratio_ = io_sample_rate_ratio;
// Optimize reinitialization by reusing values which are independent of
// `sinc_scale_factor`. Provides a 3x speedup.
const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
for (size_t i = 0; i < kKernelSize; ++i) {
const size_t idx = i + offset_idx * kKernelSize;
const float window = kernel_window_storage_[idx];
const float pre_sinc = kernel_pre_sinc_storage_[idx];
kernel_storage_[idx] = static_cast<float>(
window * ((pre_sinc == 0)
? sinc_scale_factor
: (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));
}
}
}
void SincResampler::Resample(size_t frames, float* destination) {
size_t remaining_frames = frames;
// Step (1) -- Prime the input buffer at the start of the input stream.
if (!buffer_primed_ && remaining_frames) {
read_cb_->Run(request_frames_, r0_);
buffer_primed_ = true;
}
// Step (2) -- Resample! const what we can outside of the loop for speed. It
// actually has an impact on ARM performance. See inner loop comment below.
const double current_io_ratio = io_sample_rate_ratio_;
const float* const kernel_ptr = kernel_storage_.get();
while (remaining_frames) {
// `i` may be negative if the last Resample() call ended on an iteration
// that put `virtual_source_idx_` over the limit.
//
// Note: The loop construct here can severely impact performance on ARM
// or when built with clang. See https://codereview.chromium.org/18566009/
for (int i = static_cast<int>(
ceil((block_size_ - virtual_source_idx_) / current_io_ratio));
i > 0; --i) {
RTC_DCHECK_LT(virtual_source_idx_, block_size_);
// `virtual_source_idx_` lies in between two kernel offsets so figure out
// what they are.
const int source_idx = static_cast<int>(virtual_source_idx_);
const double subsample_remainder = virtual_source_idx_ - source_idx;
const double virtual_offset_idx =
subsample_remainder * kKernelOffsetCount;
const int offset_idx = static_cast<int>(virtual_offset_idx);
// We'll compute "convolutions" for the two kernels which straddle
// `virtual_source_idx_`.
const float* const k1 = kernel_ptr + offset_idx * kKernelSize;
const float* const k2 = k1 + kKernelSize;
// Ensure `k1`, `k2` are 32-byte aligned for SIMD usage. Should always be
// true so long as kKernelSize is a multiple of 32.
RTC_DCHECK_EQ(0, reinterpret_cast<uintptr_t>(k1) % 32);
RTC_DCHECK_EQ(0, reinterpret_cast<uintptr_t>(k2) % 32);
// Initialize input pointer based on quantized `virtual_source_idx_`.
const float* const input_ptr = r1_ + source_idx;
// Figure out how much to weight each kernel's "convolution".
const double kernel_interpolation_factor =
virtual_offset_idx - offset_idx;
*destination++ =
convolve_proc_(input_ptr, k1, k2, kernel_interpolation_factor);
// Advance the virtual index.
virtual_source_idx_ += current_io_ratio;
if (!--remaining_frames)
return;
}
// Wrap back around to the start.
virtual_source_idx_ -= block_size_;
// Step (3) -- Copy r3_, r4_ to r1_, r2_.
// This wraps the last input frames back to the start of the buffer.
memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * kKernelSize);
// Step (4) -- Reinitialize regions if necessary.
if (r0_ == r2_)
UpdateRegions(true);
// Step (5) -- Refresh the buffer with more input.
read_cb_->Run(request_frames_, r0_);
}
}
#undef CONVOLVE_FUNC
size_t SincResampler::ChunkSize() const {
return static_cast<size_t>(block_size_ / io_sample_rate_ratio_);
}
void SincResampler::Flush() {
virtual_source_idx_ = 0;
buffer_primed_ = false;
memset(input_buffer_.get(), 0,
sizeof(*input_buffer_.get()) * input_buffer_size_);
UpdateRegions(false);
}
float SincResampler::Convolve_C(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor) {
float sum1 = 0;
float sum2 = 0;
// Generate a single output sample. Unrolling this loop hurt performance in
// local testing.
size_t n = kKernelSize;
while (n--) {
sum1 += *input_ptr * *k1++;
sum2 += *input_ptr++ * *k2++;
}
// Linearly interpolate the two "convolutions".
return static_cast<float>((1.0 - kernel_interpolation_factor) * sum1 +
kernel_interpolation_factor * sum2);
}
} // namespace webrtc

View File

@ -0,0 +1,181 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Modified from the Chromium original here:
// src/media/base/sinc_resampler.h
#ifndef COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_
#define COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_
#include <stddef.h>
#include <memory>
#include "rtc_base/gtest_prod_util.h"
#include "rtc_base/memory/aligned_malloc.h"
#include "rtc_base/system/arch.h"
namespace webrtc {
// Callback class for providing more data into the resampler. Expects `frames`
// of data to be rendered into `destination`; zero padded if not enough frames
// are available to satisfy the request.
class SincResamplerCallback {
public:
virtual ~SincResamplerCallback() {}
virtual void Run(size_t frames, float* destination) = 0;
};
// SincResampler is a high-quality single-channel sample-rate converter.
class SincResampler {
public:
// The kernel size can be adjusted for quality (higher is better) at the
// expense of performance. Must be a multiple of 32.
// TODO(dalecurtis): Test performance to see if we can jack this up to 64+.
static const size_t kKernelSize = 32;
// Default request size. Affects how often and for how much SincResampler
// calls back for input. Must be greater than kKernelSize.
static const size_t kDefaultRequestSize = 512;
// The kernel offset count is used for interpolation and is the number of
// sub-sample kernel shifts. Can be adjusted for quality (higher is better)
// at the expense of allocating more memory.
static const size_t kKernelOffsetCount = 32;
static const size_t kKernelStorageSize =
kKernelSize * (kKernelOffsetCount + 1);
// Constructs a SincResampler with the specified `read_cb`, which is used to
// acquire audio data for resampling. `io_sample_rate_ratio` is the ratio
// of input / output sample rates. `request_frames` controls the size in
// frames of the buffer requested by each `read_cb` call. The value must be
// greater than kKernelSize. Specify kDefaultRequestSize if there are no
// request size constraints.
SincResampler(double io_sample_rate_ratio,
size_t request_frames,
SincResamplerCallback* read_cb);
virtual ~SincResampler();
SincResampler(const SincResampler&) = delete;
SincResampler& operator=(const SincResampler&) = delete;
// Resample `frames` of data from `read_cb_` into `destination`.
void Resample(size_t frames, float* destination);
// The maximum size in frames that guarantees Resample() will only make a
// single call to `read_cb_` for more data.
size_t ChunkSize() const;
size_t request_frames() const { return request_frames_; }
// Flush all buffered data and reset internal indices. Not thread safe, do
// not call while Resample() is in progress.
void Flush();
// Update `io_sample_rate_ratio_`. SetRatio() will cause a reconstruction of
// the kernels used for resampling. Not thread safe, do not call while
// Resample() is in progress.
//
// TODO(ajm): Use this in PushSincResampler rather than reconstructing
// SincResampler. We would also need a way to update `request_frames_`.
void SetRatio(double io_sample_rate_ratio);
float* get_kernel_for_testing() { return kernel_storage_.get(); }
private:
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, Convolve);
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, ConvolveBenchmark);
void InitializeKernel();
void UpdateRegions(bool second_load);
// Selects runtime specific CPU features like SSE. Must be called before
// using SincResampler.
// TODO(ajm): Currently managed by the class internally. See the note with
// `convolve_proc_` below.
void InitializeCPUSpecificFeatures();
// Compute convolution of `k1` and `k2` over `input_ptr`, resultant sums are
// linearly interpolated using `kernel_interpolation_factor`. On x86 and ARM
// the underlying implementation is chosen at run time.
static float Convolve_C(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
#if defined(WEBRTC_ARCH_X86_FAMILY)
static float Convolve_SSE(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
static float Convolve_AVX2(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
#elif defined(WEBRTC_HAS_NEON)
static float Convolve_NEON(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
#endif
// The ratio of input / output sample rates.
double io_sample_rate_ratio_;
// An index on the source input buffer with sub-sample precision. It must be
// double precision to avoid drift.
double virtual_source_idx_;
// The buffer is primed once at the very beginning of processing.
bool buffer_primed_;
// Source of data for resampling.
SincResamplerCallback* read_cb_;
// The size (in samples) to request from each `read_cb_` execution.
const size_t request_frames_;
// The number of source frames processed per pass.
size_t block_size_;
// The size (in samples) of the internal buffer used by the resampler.
const size_t input_buffer_size_;
// Contains kKernelOffsetCount kernels back-to-back, each of size kKernelSize.
// The kernel offsets are sub-sample shifts of a windowed sinc shifted from
// 0.0 to 1.0 sample.
std::unique_ptr<float[], AlignedFreeDeleter> kernel_storage_;
std::unique_ptr<float[], AlignedFreeDeleter> kernel_pre_sinc_storage_;
std::unique_ptr<float[], AlignedFreeDeleter> kernel_window_storage_;
// Data from the source is copied into this buffer for each processing pass.
std::unique_ptr<float[], AlignedFreeDeleter> input_buffer_;
// Stores the runtime selection of which Convolve function to use.
// TODO(ajm): Move to using a global static which must only be initialized
// once by the user. We're not doing this initially, because we don't have
// e.g. a LazyInstance helper in webrtc.
typedef float (*ConvolveProc)(const float*,
const float*,
const float*,
double);
ConvolveProc convolve_proc_;
// Pointers to the various regions inside `input_buffer_`. See the diagram at
// the top of the .cc file for more information.
float* r0_;
float* const r1_;
float* const r2_;
float* r3_;
float* r4_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/dot_product_with_scale.h"
#include "rtc_base/numerics/safe_conversions.h"
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
const int16_t* vector2,
size_t length,
int scaling) {
int64_t sum = 0;
size_t i = 0;
/* Unroll the loop to improve performance. */
for (i = 0; i + 3 < length; i += 4) {
sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
}
for (; i < length; i++) {
sum += (vector1[i] * vector2[i]) >> scaling;
}
return rtc::saturated_cast<int32_t>(sum);
}

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
#include <stdint.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
// Calculates the dot product between two (int16_t) vectors.
//
// Input:
// - vector1 : Vector 1
// - vector2 : Vector 2
// - vector_length : Number of samples used in the dot product
// - scaling : The number of right bit shifts to apply on each term
// during calculation to avoid overflow, i.e., the
// output will be in Q(-`scaling`)
//
// Return value : The dot product in Q(-scaling)
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
const int16_t* vector2,
size_t length,
int scaling);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,155 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This header file includes the inline functions in
// the fix point signal processing library.
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#include <stdint.h>
#include "rtc_base/compile_assert_c.h"
extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];
// Don't call this directly except in tests!
static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) {
// Normalize n by rounding up to the nearest number that is a sequence of 0
// bits followed by a sequence of 1 bits. This number has the same number of
// leading zeros as the original n. There are exactly 33 such values.
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
// Multiply the modified n with a constant selected (by exhaustive search)
// such that each of the 33 possible values of n give a product whose 6 most
// significant bits are unique. Then look up the answer in the table.
return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
}
// Don't call this directly except in tests!
static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) {
const int leading_zeros = n >> 32 == 0 ? 32 : 0;
return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin(
(uint32_t)(n >> (32 - leading_zeros)));
}
// Returns the number of leading zero bits in the argument.
static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) {
#ifdef __GNUC__
RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
return n == 0 ? 32 : __builtin_clz(n);
#else
return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n);
#endif
}
// Returns the number of leading zero bits in the argument.
static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) {
#ifdef __GNUC__
RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT
return n == 0 ? 64 : __builtin_clzll(n);
#else
return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n);
#endif
}
#ifdef WEBRTC_ARCH_ARM_V7
#include "common_audio/signal_processing/include/spl_inl_armv7.h"
#else
#if defined(MIPS32_LE)
#include "common_audio/signal_processing/include/spl_inl_mips.h"
#endif
#if !defined(MIPS_DSP_R1_LE)
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
int16_t out16 = (int16_t)value32;
if (value32 > 32767)
out16 = 32767;
else if (value32 < -32768)
out16 = -32768;
return out16;
}
static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) {
// Do the addition in unsigned numbers, since signed overflow is undefined
// behavior.
const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b);
// a + b can't overflow if a and b have different signs. If they have the
// same sign, a + b also has the same sign iff it didn't overflow.
if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) {
// The direction of the overflow is obvious from the sign of a + b.
return sum < 0 ? INT32_MAX : INT32_MIN;
}
return sum;
}
static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) {
// Do the subtraction in unsigned numbers, since signed overflow is undefined
// behavior.
const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b);
// a - b can't overflow if a and b have the same sign. If they have different
// signs, a - b has the same sign as a iff it didn't overflow.
if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) {
// The direction of the overflow is obvious from the sign of a - b.
return diff < 0 ? INT32_MAX : INT32_MIN;
}
return diff;
}
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b);
}
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2);
}
#endif // #if !defined(MIPS_DSP_R1_LE)
#if !defined(MIPS32_LE)
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
return 32 - WebRtcSpl_CountLeadingZeros32(n);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1;
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
const int32_t a32 = a;
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17;
}
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
return (a * b + c);
}
#endif // #if !defined(MIPS32_LE)
#endif // WEBRTC_ARCH_ARM_V7
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_

View File

@ -0,0 +1,548 @@
/*
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
* Copyright Takuya OOURA, 1996-2001
*
* You may use, copy, modify and distribute this code for any purpose (include
* commercial use) and without fee. Please refer to this package when you modify
* this code.
*
* Changes by the WebRTC authors:
* - Trivial type modifications.
* - Minimal code subset to do rdft of length 128.
* - Optimizations because of known length.
* - Removed the global variables by moving the code in to a class in order
* to make it thread safe.
*
* All changes are covered by the WebRTC license and IP grant:
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft_tables_common.h"
#include "rtc_base/system/arch.h"
#include "system_wrappers/include/cpu_features_wrapper.h"
namespace webrtc {
namespace {
#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON))
static void cft1st_128_C(float* a) {
const int n = 128;
int j, k1, k2;
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
// The processing of the first set of elements was simplified in C to avoid
// some operations (multiplication by zero or one, addition of two elements
// multiplied by the same weight, ...).
x0r = a[0] + a[2];
x0i = a[1] + a[3];
x1r = a[0] - a[2];
x1i = a[1] - a[3];
x2r = a[4] + a[6];
x2i = a[5] + a[7];
x3r = a[4] - a[6];
x3i = a[5] - a[7];
a[0] = x0r + x2r;
a[1] = x0i + x2i;
a[4] = x0r - x2r;
a[5] = x0i - x2i;
a[2] = x1r - x3i;
a[3] = x1i + x3r;
a[6] = x1r + x3i;
a[7] = x1i - x3r;
wk1r = rdft_w[2];
x0r = a[8] + a[10];
x0i = a[9] + a[11];
x1r = a[8] - a[10];
x1i = a[9] - a[11];
x2r = a[12] + a[14];
x2i = a[13] + a[15];
x3r = a[12] - a[14];
x3i = a[13] - a[15];
a[8] = x0r + x2r;
a[9] = x0i + x2i;
a[12] = x2i - x0i;
a[13] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[10] = wk1r * (x0r - x0i);
a[11] = wk1r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[14] = wk1r * (x0i - x0r);
a[15] = wk1r * (x0i + x0r);
k1 = 0;
for (j = 16; j < n; j += 16) {
k1 += 2;
k2 = 2 * k1;
wk2r = rdft_w[k1 + 0];
wk2i = rdft_w[k1 + 1];
wk1r = rdft_w[k2 + 0];
wk1i = rdft_w[k2 + 1];
wk3r = rdft_wk3ri_first[k1 + 0];
wk3i = rdft_wk3ri_first[k1 + 1];
x0r = a[j + 0] + a[j + 2];
x0i = a[j + 1] + a[j + 3];
x1r = a[j + 0] - a[j + 2];
x1i = a[j + 1] - a[j + 3];
x2r = a[j + 4] + a[j + 6];
x2i = a[j + 5] + a[j + 7];
x3r = a[j + 4] - a[j + 6];
x3i = a[j + 5] - a[j + 7];
a[j + 0] = x0r + x2r;
a[j + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 4] = wk2r * x0r - wk2i * x0i;
a[j + 5] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 2] = wk1r * x0r - wk1i * x0i;
a[j + 3] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 6] = wk3r * x0r - wk3i * x0i;
a[j + 7] = wk3r * x0i + wk3i * x0r;
wk1r = rdft_w[k2 + 2];
wk1i = rdft_w[k2 + 3];
wk3r = rdft_wk3ri_second[k1 + 0];
wk3i = rdft_wk3ri_second[k1 + 1];
x0r = a[j + 8] + a[j + 10];
x0i = a[j + 9] + a[j + 11];
x1r = a[j + 8] - a[j + 10];
x1i = a[j + 9] - a[j + 11];
x2r = a[j + 12] + a[j + 14];
x2i = a[j + 13] + a[j + 15];
x3r = a[j + 12] - a[j + 14];
x3i = a[j + 13] - a[j + 15];
a[j + 8] = x0r + x2r;
a[j + 9] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 12] = -wk2i * x0r - wk2r * x0i;
a[j + 13] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 10] = wk1r * x0r - wk1i * x0i;
a[j + 11] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 14] = wk3r * x0r - wk3i * x0i;
a[j + 15] = wk3r * x0i + wk3i * x0r;
}
}
static void cftmdl_128_C(float* a) {
const int l = 8;
const int n = 128;
const int m = 32;
int j0, j1, j2, j3, k, k1, k2, m2;
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
for (j0 = 0; j0 < l; j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
a[j2 + 0] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1 + 0] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3 + 0] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
wk1r = rdft_w[2];
for (j0 = m; j0 < l + m; j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
a[j2 + 0] = x2i - x0i;
a[j2 + 1] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1 + 0] = wk1r * (x0r - x0i);
a[j1 + 1] = wk1r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[j3 + 0] = wk1r * (x0i - x0r);
a[j3 + 1] = wk1r * (x0i + x0r);
}
k1 = 0;
m2 = 2 * m;
for (k = m2; k < n; k += m2) {
k1 += 2;
k2 = 2 * k1;
wk2r = rdft_w[k1 + 0];
wk2i = rdft_w[k1 + 1];
wk1r = rdft_w[k2 + 0];
wk1i = rdft_w[k2 + 1];
wk3r = rdft_wk3ri_first[k1 + 0];
wk3i = rdft_wk3ri_first[k1 + 1];
for (j0 = k; j0 < l + k; j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2 + 0] = wk2r * x0r - wk2i * x0i;
a[j2 + 1] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
wk1r = rdft_w[k2 + 2];
wk1i = rdft_w[k2 + 3];
wk3r = rdft_wk3ri_second[k1 + 0];
wk3i = rdft_wk3ri_second[k1 + 1];
for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
}
}
static void rftfsub_128_C(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
xi = a[j2 + 1] + a[k2 + 1];
yr = wkr * xr - wki * xi;
yi = wkr * xi + wki * xr;
a[j2 + 0] -= yr;
a[j2 + 1] -= yi;
a[k2 + 0] += yr;
a[k2 + 1] -= yi;
}
}
static void rftbsub_128_C(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
a[1] = -a[1];
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
xi = a[j2 + 1] + a[k2 + 1];
yr = wkr * xr + wki * xi;
yi = wkr * xi - wki * xr;
a[j2 + 0] = a[j2 + 0] - yr;
a[j2 + 1] = yi - a[j2 + 1];
a[k2 + 0] = yr + a[k2 + 0];
a[k2 + 1] = yi - a[k2 + 1];
}
a[65] = -a[65];
}
#endif
} // namespace
OouraFft::OouraFft(bool sse2_available) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
use_sse2_ = sse2_available;
#else
use_sse2_ = false;
#endif
}
OouraFft::OouraFft() {
#if defined(WEBRTC_ARCH_X86_FAMILY)
use_sse2_ = (GetCPUInfo(kSSE2) != 0);
#else
use_sse2_ = false;
#endif
}
OouraFft::~OouraFft() = default;
void OouraFft::Fft(float* a) const {
float xi;
bitrv2_128(a);
cftfsub_128(a);
rftfsub_128(a);
xi = a[0] - a[1];
a[0] += a[1];
a[1] = xi;
}
void OouraFft::InverseFft(float* a) const {
a[1] = 0.5f * (a[0] - a[1]);
a[0] -= a[1];
rftbsub_128(a);
bitrv2_128(a);
cftbsub_128(a);
}
void OouraFft::cft1st_128(float* a) const {
#if defined(MIPS_FPU_LE)
cft1st_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
cft1st_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
cft1st_128_SSE2(a);
} else {
cft1st_128_C(a);
}
#else
cft1st_128_C(a);
#endif
}
void OouraFft::cftmdl_128(float* a) const {
#if defined(MIPS_FPU_LE)
cftmdl_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
cftmdl_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
cftmdl_128_SSE2(a);
} else {
cftmdl_128_C(a);
}
#else
cftmdl_128_C(a);
#endif
}
void OouraFft::rftfsub_128(float* a) const {
#if defined(MIPS_FPU_LE)
rftfsub_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
rftfsub_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
rftfsub_128_SSE2(a);
} else {
rftfsub_128_C(a);
}
#else
rftfsub_128_C(a);
#endif
}
void OouraFft::rftbsub_128(float* a) const {
#if defined(MIPS_FPU_LE)
rftbsub_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
rftbsub_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
rftbsub_128_SSE2(a);
} else {
rftbsub_128_C(a);
}
#else
rftbsub_128_C(a);
#endif
}
void OouraFft::cftbsub_128(float* a) const {
int j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
cft1st_128(a);
cftmdl_128(a);
l = 32;
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = -a[j + 1] - a[j1 + 1];
x1r = a[j] - a[j1];
x1i = -a[j + 1] + a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i - x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i + x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i - x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i + x3r;
}
}
void OouraFft::cftfsub_128(float* a) const {
int j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
cft1st_128(a);
cftmdl_128(a);
l = 32;
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
}
void OouraFft::bitrv2_128(float* a) const {
/*
Following things have been attempted but are no faster:
(a) Storing the swap indexes in a LUT (index calculations are done
for 'free' while waiting on memory/L1).
(b) Consolidate the load/store of two consecutive floats by a 64 bit
integer (execution is memory/L1 bound).
(c) Do a mix of floats and 64 bit integer to maximize register
utilization (execution is memory/L1 bound).
(d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
(e) Hard-coding of the offsets to completely eliminates index
calculations.
*/
unsigned int j, j1, k, k1;
float xr, xi, yr, yi;
const int ip[4] = {0, 64, 32, 96};
for (k = 0; k < 4; k++) {
for (j = 0; j < k; j++) {
j1 = 2 * j + ip[k];
k1 = 2 * k + ip[j];
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += 8;
k1 += 16;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += 8;
k1 -= 8;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += 8;
k1 += 16;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
}
j1 = 2 * k + 8 + ip[k];
k1 = j1 + 8;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
}
}
} // namespace webrtc

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
#include "rtc_base/system/arch.h"
namespace webrtc {
#if defined(WEBRTC_ARCH_X86_FAMILY)
void cft1st_128_SSE2(float* a);
void cftmdl_128_SSE2(float* a);
void rftfsub_128_SSE2(float* a);
void rftbsub_128_SSE2(float* a);
#endif
#if defined(MIPS_FPU_LE)
void cft1st_128_mips(float* a);
void cftmdl_128_mips(float* a);
void rftfsub_128_mips(float* a);
void rftbsub_128_mips(float* a);
#endif
#if defined(WEBRTC_HAS_NEON)
void cft1st_128_neon(float* a);
void cftmdl_128_neon(float* a);
void rftfsub_128_neon(float* a);
void rftbsub_128_neon(float* a);
#endif
class OouraFft {
public:
// Ctor allowing the availability of SSE2 support to be specified.
explicit OouraFft(bool sse2_available);
// Deprecated: This Ctor will soon be removed.
OouraFft();
~OouraFft();
void Fft(float* a) const;
void InverseFft(float* a) const;
private:
void cft1st_128(float* a) const;
void cftmdl_128(float* a) const;
void rftfsub_128(float* a) const;
void rftbsub_128(float* a) const;
void cftfsub_128(float* a) const;
void cftbsub_128(float* a) const;
void bitrv2_128(float* a) const;
bool use_sse2_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
namespace webrtc {
// This tables used to be computed at run-time. For example, refer to:
// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564
// to see the initialization code.
// Constants shared by all paths (C, SSE2, NEON).
const float rdft_w[64] = {
1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, 0.9238795638f,
0.3826834559f, 0.3826834559f, 0.9238795638f, 0.9807852507f, 0.1950903237f,
0.5555702448f, 0.8314695954f, 0.8314695954f, 0.5555702448f, 0.1950903237f,
0.9807852507f, 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, 0.9569403529f,
0.2902846634f, 0.4713967443f, 0.8819212914f, 0.7730104327f, 0.6343933344f,
0.0980171412f, 0.9951847196f, 0.7071067691f, 0.4993977249f, 0.4975923598f,
0.4945882559f, 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, 0.4157347977f,
0.4016037583f, 0.3865052164f, 0.3704755902f, 0.3535533845f, 0.3357794881f,
0.3171966672f, 0.2978496552f, 0.2777851224f, 0.2570513785f, 0.2356983721f,
0.2137775421f, 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
};
// Constants used by the C and MIPS paths.
const float rdft_wk3ri_first[16] = {
1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
};
const float rdft_wk3ri_second[16] = {
-0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
-0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
-0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
-0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_

View File

@ -0,0 +1,77 @@
/*
* Written by Wilco Dijkstra, 1996. The following email exchange establishes the
* license.
*
* From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
* Date: Fri, Jun 24, 2011 at 3:20 AM
* Subject: Re: sqrt routine
* To: Kevin Ma <kma@google.com>
* Hi Kevin,
* Thanks for asking. Those routines are public domain (originally posted to
* comp.sys.arm a long time ago), so you can use them freely for any purpose.
* Cheers,
* Wilco
*
* ----- Original Message -----
* From: "Kevin Ma" <kma@google.com>
* To: <Wilco.Dijkstra@ntlworld.com>
* Sent: Thursday, June 23, 2011 11:44 PM
* Subject: Fwd: sqrt routine
* Hi Wilco,
* I saw your sqrt routine from several web sites, including
* http://www.finesse.demon.co.uk/steven/sqrt.html.
* Just wonder if there's any copyright information with your Successive
* approximation routines, or if I can freely use it for any purpose.
* Thanks.
* Kevin
*/
// Minor modifications in code style for WebRTC, 2012.
#include "common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
/*
* Algorithm:
* Successive approximation of the equation (root + delta) ^ 2 = N
* until delta < 1. If delta < 1 we have the integer part of SQRT (N).
* Use delta = 2^i for i = 15 .. 0.
*
* Output precision is 16 bits. Note for large input values (close to
* 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
* contains the MSB information (a non-sign value). Do with caution
* if you need to cast the output to int16_t type.
*
* If the input value is negative, it returns 0.
*/
#define WEBRTC_SPL_SQRT_ITER(N) \
try1 = root + (1 << (N)); \
if (value >= try1 << (N)) \
{ \
value -= try1 << (N); \
root |= 2 << (N); \
}
int32_t WebRtcSpl_SqrtFloor(int32_t value)
{
int32_t root = 0, try1;
WEBRTC_SPL_SQRT_ITER (15);
WEBRTC_SPL_SQRT_ITER (14);
WEBRTC_SPL_SQRT_ITER (13);
WEBRTC_SPL_SQRT_ITER (12);
WEBRTC_SPL_SQRT_ITER (11);
WEBRTC_SPL_SQRT_ITER (10);
WEBRTC_SPL_SQRT_ITER ( 9);
WEBRTC_SPL_SQRT_ITER ( 8);
WEBRTC_SPL_SQRT_ITER ( 7);
WEBRTC_SPL_SQRT_ITER ( 6);
WEBRTC_SPL_SQRT_ITER ( 5);
WEBRTC_SPL_SQRT_ITER ( 4);
WEBRTC_SPL_SQRT_ITER ( 3);
WEBRTC_SPL_SQRT_ITER ( 2);
WEBRTC_SPL_SQRT_ITER ( 1);
WEBRTC_SPL_SQRT_ITER ( 0);
return root >> 1;
}

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdint.h>
//
// WebRtcSpl_SqrtFloor(...)
//
// Returns the square root of the input value `value`. The precision of this
// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
// If `value` is a negative number then 0 is returned.
//
// Algorithm:
//
// An iterative 4 cylce/bit routine
//
// Input:
// - value : Value to calculate sqrt of
//
// Return value : Result of the sqrt calculation
//
int32_t WebRtcSpl_SqrtFloor(int32_t value);

View File

@ -0,0 +1,744 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
#include "rtc_base/system/arch.h"
#if defined(WEBRTC_HAS_NEON)
#include <arm_neon.h>
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
#include <emmintrin.h>
#endif
#include <math.h>
#include <algorithm>
#include <functional>
#include "modules/audio_processing/aec3/fft_data.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace aec3 {
// Computes and stores the frequency response of the filter.
void ComputeFrequencyResponse(
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
for (auto& H2_ch : *H2) {
H2_ch.fill(0.f);
}
const size_t num_render_channels = H[0].size();
RTC_DCHECK_EQ(H.size(), H2->capacity());
for (size_t p = 0; p < num_partitions; ++p) {
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
for (size_t ch = 0; ch < num_render_channels; ++ch) {
for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
float tmp =
H[p][ch].re[j] * H[p][ch].re[j] + H[p][ch].im[j] * H[p][ch].im[j];
(*H2)[p][j] = std::max((*H2)[p][j], tmp);
}
}
}
}
#if defined(WEBRTC_HAS_NEON)
// Computes and stores the frequency response of the filter.
void ComputeFrequencyResponse_Neon(
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
for (auto& H2_ch : *H2) {
H2_ch.fill(0.f);
}
const size_t num_render_channels = H[0].size();
RTC_DCHECK_EQ(H.size(), H2->capacity());
for (size_t p = 0; p < num_partitions; ++p) {
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
auto& H2_p = (*H2)[p];
for (size_t ch = 0; ch < num_render_channels; ++ch) {
const FftData& H_p_ch = H[p][ch];
for (size_t j = 0; j < kFftLengthBy2; j += 4) {
const float32x4_t re = vld1q_f32(&H_p_ch.re[j]);
const float32x4_t im = vld1q_f32(&H_p_ch.im[j]);
float32x4_t H2_new = vmulq_f32(re, re);
H2_new = vmlaq_f32(H2_new, im, im);
float32x4_t H2_p_j = vld1q_f32(&H2_p[j]);
H2_p_j = vmaxq_f32(H2_p_j, H2_new);
vst1q_f32(&H2_p[j], H2_p_j);
}
float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] +
H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new);
}
}
}
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Computes and stores the frequency response of the filter.
void ComputeFrequencyResponse_Sse2(
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
for (auto& H2_ch : *H2) {
H2_ch.fill(0.f);
}
const size_t num_render_channels = H[0].size();
RTC_DCHECK_EQ(H.size(), H2->capacity());
// constexpr __mmmask8 kMaxMask = static_cast<__mmmask8>(256u);
for (size_t p = 0; p < num_partitions; ++p) {
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
auto& H2_p = (*H2)[p];
for (size_t ch = 0; ch < num_render_channels; ++ch) {
const FftData& H_p_ch = H[p][ch];
for (size_t j = 0; j < kFftLengthBy2; j += 4) {
const __m128 re = _mm_loadu_ps(&H_p_ch.re[j]);
const __m128 re2 = _mm_mul_ps(re, re);
const __m128 im = _mm_loadu_ps(&H_p_ch.im[j]);
const __m128 im2 = _mm_mul_ps(im, im);
const __m128 H2_new = _mm_add_ps(re2, im2);
__m128 H2_k_j = _mm_loadu_ps(&H2_p[j]);
H2_k_j = _mm_max_ps(H2_k_j, H2_new);
_mm_storeu_ps(&H2_p[j], H2_k_j);
}
float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] +
H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new);
}
}
}
#endif
// Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)).
void AdaptPartitions(const RenderBuffer& render_buffer,
const FftData& G,
size_t num_partitions,
std::vector<std::vector<FftData>>* H) {
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
size_t index = render_buffer.Position();
const size_t num_render_channels = render_buffer_data[index].size();
for (size_t p = 0; p < num_partitions; ++p) {
for (size_t ch = 0; ch < num_render_channels; ++ch) {
const FftData& X_p_ch = render_buffer_data[index][ch];
FftData& H_p_ch = (*H)[p][ch];
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
H_p_ch.re[k] += X_p_ch.re[k] * G.re[k] + X_p_ch.im[k] * G.im[k];
H_p_ch.im[k] += X_p_ch.re[k] * G.im[k] - X_p_ch.im[k] * G.re[k];
}
}
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
}
}
#if defined(WEBRTC_HAS_NEON)
// Adapts the filter partitions. (Neon variant)
void AdaptPartitions_Neon(const RenderBuffer& render_buffer,
const FftData& G,
size_t num_partitions,
std::vector<std::vector<FftData>>* H) {
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
const size_t num_render_channels = render_buffer_data[0].size();
const size_t lim1 = std::min(
render_buffer_data.size() - render_buffer.Position(), num_partitions);
const size_t lim2 = num_partitions;
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
size_t X_partition = render_buffer.Position();
size_t limit = lim1;
size_t p = 0;
do {
for (; p < limit; ++p, ++X_partition) {
for (size_t ch = 0; ch < num_render_channels; ++ch) {
FftData& H_p_ch = (*H)[p][ch];
const FftData& X = render_buffer_data[X_partition][ch];
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
const float32x4_t G_re = vld1q_f32(&G.re[k]);
const float32x4_t G_im = vld1q_f32(&G.im[k]);
const float32x4_t X_re = vld1q_f32(&X.re[k]);
const float32x4_t X_im = vld1q_f32(&X.im[k]);
const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]);
const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]);
const float32x4_t a = vmulq_f32(X_re, G_re);
const float32x4_t e = vmlaq_f32(a, X_im, G_im);
const float32x4_t c = vmulq_f32(X_re, G_im);
const float32x4_t f = vmlsq_f32(c, X_im, G_re);
const float32x4_t g = vaddq_f32(H_re, e);
const float32x4_t h = vaddq_f32(H_im, f);
vst1q_f32(&H_p_ch.re[k], g);
vst1q_f32(&H_p_ch.im[k], h);
}
}
}
X_partition = 0;
limit = lim2;
} while (p < lim2);
X_partition = render_buffer.Position();
limit = lim1;
p = 0;
do {
for (; p < limit; ++p, ++X_partition) {
for (size_t ch = 0; ch < num_render_channels; ++ch) {
FftData& H_p_ch = (*H)[p][ch];
const FftData& X = render_buffer_data[X_partition][ch];
H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
}
}
X_partition = 0;
limit = lim2;
} while (p < lim2);
}
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Adapts the filter partitions. (SSE2 variant)
void AdaptPartitions_Sse2(const RenderBuffer& render_buffer,
const FftData& G,
size_t num_partitions,
std::vector<std::vector<FftData>>* H) {
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
const size_t num_render_channels = render_buffer_data[0].size();
const size_t lim1 = std::min(
render_buffer_data.size() - render_buffer.Position(), num_partitions);
const size_t lim2 = num_partitions;
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
size_t X_partition = render_buffer.Position();
size_t limit = lim1;
size_t p = 0;
do {
for (; p < limit; ++p, ++X_partition) {
for (size_t ch = 0; ch < num_render_channels; ++ch) {
FftData& H_p_ch = (*H)[p][ch];
const FftData& X = render_buffer_data[X_partition][ch];
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
const __m128 G_re = _mm_loadu_ps(&G.re[k]);
const __m128 G_im = _mm_loadu_ps(&G.im[k]);
const __m128 X_re = _mm_loadu_ps(&X.re[k]);
const __m128 X_im = _mm_loadu_ps(&X.im[k]);
const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]);
const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]);
const __m128 a = _mm_mul_ps(X_re, G_re);
const __m128 b = _mm_mul_ps(X_im, G_im);
const __m128 c = _mm_mul_ps(X_re, G_im);
const __m128 d = _mm_mul_ps(X_im, G_re);
const __m128 e = _mm_add_ps(a, b);
const __m128 f = _mm_sub_ps(c, d);
const __m128 g = _mm_add_ps(H_re, e);
const __m128 h = _mm_add_ps(H_im, f);
_mm_storeu_ps(&H_p_ch.re[k], g);
_mm_storeu_ps(&H_p_ch.im[k], h);
}
}
}
X_partition = 0;
limit = lim2;
} while (p < lim2);
X_partition = render_buffer.Position();
limit = lim1;
p = 0;
do {
for (; p < limit; ++p, ++X_partition) {
for (size_t ch = 0; ch < num_render_channels; ++ch) {
FftData& H_p_ch = (*H)[p][ch];
const FftData& X = render_buffer_data[X_partition][ch];
H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
}
}
X_partition = 0;
limit = lim2;
} while (p < lim2);
}
#endif
// Produces the filter output.
void ApplyFilter(const RenderBuffer& render_buffer,
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
FftData* S) {
S->re.fill(0.f);
S->im.fill(0.f);
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
size_t index = render_buffer.Position();
const size_t num_render_channels = render_buffer_data[index].size();
for (size_t p = 0; p < num_partitions; ++p) {
RTC_DCHECK_EQ(num_render_channels, H[p].size());
for (size_t ch = 0; ch < num_render_channels; ++ch) {
const FftData& X_p_ch = render_buffer_data[index][ch];
const FftData& H_p_ch = H[p][ch];
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
S->re[k] += X_p_ch.re[k] * H_p_ch.re[k] - X_p_ch.im[k] * H_p_ch.im[k];
S->im[k] += X_p_ch.re[k] * H_p_ch.im[k] + X_p_ch.im[k] * H_p_ch.re[k];
}
}
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
}
}
#if defined(WEBRTC_HAS_NEON)
// Produces the filter output (Neon variant).
void ApplyFilter_Neon(const RenderBuffer& render_buffer,
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
FftData* S) {
// const RenderBuffer& render_buffer,
// rtc::ArrayView<const FftData> H,
// FftData* S) {
RTC_DCHECK_GE(H.size(), H.size() - 1);
S->Clear();
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
const size_t num_render_channels = render_buffer_data[0].size();
const size_t lim1 = std::min(
render_buffer_data.size() - render_buffer.Position(), num_partitions);
const size_t lim2 = num_partitions;
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
size_t X_partition = render_buffer.Position();
size_t p = 0;
size_t limit = lim1;
do {
for (; p < limit; ++p, ++X_partition) {
for (size_t ch = 0; ch < num_render_channels; ++ch) {
const FftData& H_p_ch = H[p][ch];
const FftData& X = render_buffer_data[X_partition][ch];
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
const float32x4_t X_re = vld1q_f32(&X.re[k]);
const float32x4_t X_im = vld1q_f32(&X.im[k]);
const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]);
const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]);
const float32x4_t S_re = vld1q_f32(&S->re[k]);
const float32x4_t S_im = vld1q_f32(&S->im[k]);
const float32x4_t a = vmulq_f32(X_re, H_re);
const float32x4_t e = vmlsq_f32(a, X_im, H_im);
const float32x4_t c = vmulq_f32(X_re, H_im);
const float32x4_t f = vmlaq_f32(c, X_im, H_re);
const float32x4_t g = vaddq_f32(S_re, e);
const float32x4_t h = vaddq_f32(S_im, f);
vst1q_f32(&S->re[k], g);
vst1q_f32(&S->im[k], h);
}
}
}
limit = lim2;
X_partition = 0;
} while (p < lim2);
X_partition = render_buffer.Position();
p = 0;
limit = lim1;
do {
for (; p < limit; ++p, ++X_partition) {
for (size_t ch = 0; ch < num_render_channels; ++ch) {
const FftData& H_p_ch = H[p][ch];
const FftData& X = render_buffer_data[X_partition][ch];
S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
}
}
limit = lim2;
X_partition = 0;
} while (p < lim2);
}
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Produces the filter output (SSE2 variant).
void ApplyFilter_Sse2(const RenderBuffer& render_buffer,
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
FftData* S) {
// const RenderBuffer& render_buffer,
// rtc::ArrayView<const FftData> H,
// FftData* S) {
RTC_DCHECK_GE(H.size(), H.size() - 1);
S->re.fill(0.f);
S->im.fill(0.f);
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
const size_t num_render_channels = render_buffer_data[0].size();
const size_t lim1 = std::min(
render_buffer_data.size() - render_buffer.Position(), num_partitions);
const size_t lim2 = num_partitions;
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
size_t X_partition = render_buffer.Position();
size_t p = 0;
size_t limit = lim1;
do {
for (; p < limit; ++p, ++X_partition) {
for (size_t ch = 0; ch < num_render_channels; ++ch) {
const FftData& H_p_ch = H[p][ch];
const FftData& X = render_buffer_data[X_partition][ch];
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
const __m128 X_re = _mm_loadu_ps(&X.re[k]);
const __m128 X_im = _mm_loadu_ps(&X.im[k]);
const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]);
const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]);
const __m128 S_re = _mm_loadu_ps(&S->re[k]);
const __m128 S_im = _mm_loadu_ps(&S->im[k]);
const __m128 a = _mm_mul_ps(X_re, H_re);
const __m128 b = _mm_mul_ps(X_im, H_im);
const __m128 c = _mm_mul_ps(X_re, H_im);
const __m128 d = _mm_mul_ps(X_im, H_re);
const __m128 e = _mm_sub_ps(a, b);
const __m128 f = _mm_add_ps(c, d);
const __m128 g = _mm_add_ps(S_re, e);
const __m128 h = _mm_add_ps(S_im, f);
_mm_storeu_ps(&S->re[k], g);
_mm_storeu_ps(&S->im[k], h);
}
}
}
limit = lim2;
X_partition = 0;
} while (p < lim2);
X_partition = render_buffer.Position();
p = 0;
limit = lim1;
do {
for (; p < limit; ++p, ++X_partition) {
for (size_t ch = 0; ch < num_render_channels; ++ch) {
const FftData& H_p_ch = H[p][ch];
const FftData& X = render_buffer_data[X_partition][ch];
S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
}
}
limit = lim2;
X_partition = 0;
} while (p < lim2);
}
#endif
} // namespace aec3
namespace {
// Ensures that the newly added filter partitions after a size increase are set
// to zero.
void ZeroFilter(size_t old_size,
size_t new_size,
std::vector<std::vector<FftData>>* H) {
RTC_DCHECK_GE(H->size(), old_size);
RTC_DCHECK_GE(H->size(), new_size);
for (size_t p = old_size; p < new_size; ++p) {
RTC_DCHECK_EQ((*H)[p].size(), (*H)[0].size());
for (size_t ch = 0; ch < (*H)[0].size(); ++ch) {
(*H)[p][ch].Clear();
}
}
}
} // namespace
AdaptiveFirFilter::AdaptiveFirFilter(size_t max_size_partitions,
size_t initial_size_partitions,
size_t size_change_duration_blocks,
size_t num_render_channels,
Aec3Optimization optimization,
ApmDataDumper* data_dumper)
: data_dumper_(data_dumper),
fft_(),
optimization_(optimization),
num_render_channels_(num_render_channels),
max_size_partitions_(max_size_partitions),
size_change_duration_blocks_(
static_cast<int>(size_change_duration_blocks)),
current_size_partitions_(initial_size_partitions),
target_size_partitions_(initial_size_partitions),
old_target_size_partitions_(initial_size_partitions),
H_(max_size_partitions_, std::vector<FftData>(num_render_channels_)) {
RTC_DCHECK(data_dumper_);
RTC_DCHECK_GE(max_size_partitions, initial_size_partitions);
RTC_DCHECK_LT(0, size_change_duration_blocks_);
one_by_size_change_duration_blocks_ = 1.f / size_change_duration_blocks_;
ZeroFilter(0, max_size_partitions_, &H_);
SetSizePartitions(current_size_partitions_, true);
}
AdaptiveFirFilter::~AdaptiveFirFilter() = default;
void AdaptiveFirFilter::HandleEchoPathChange() {
// TODO(peah): Check the value and purpose of the code below.
ZeroFilter(current_size_partitions_, max_size_partitions_, &H_);
}
void AdaptiveFirFilter::SetSizePartitions(size_t size, bool immediate_effect) {
RTC_DCHECK_EQ(max_size_partitions_, H_.capacity());
RTC_DCHECK_LE(size, max_size_partitions_);
target_size_partitions_ = std::min(max_size_partitions_, size);
if (immediate_effect) {
size_t old_size_partitions_ = current_size_partitions_;
current_size_partitions_ = old_target_size_partitions_ =
target_size_partitions_;
ZeroFilter(old_size_partitions_, current_size_partitions_, &H_);
partition_to_constrain_ =
std::min(partition_to_constrain_, current_size_partitions_ - 1);
size_change_counter_ = 0;
} else {
size_change_counter_ = size_change_duration_blocks_;
}
}
void AdaptiveFirFilter::UpdateSize() {
RTC_DCHECK_GE(size_change_duration_blocks_, size_change_counter_);
size_t old_size_partitions_ = current_size_partitions_;
if (size_change_counter_ > 0) {
--size_change_counter_;
auto average = [](float from, float to, float from_weight) {
return from * from_weight + to * (1.f - from_weight);
};
float change_factor =
size_change_counter_ * one_by_size_change_duration_blocks_;
current_size_partitions_ = average(old_target_size_partitions_,
target_size_partitions_, change_factor);
partition_to_constrain_ =
std::min(partition_to_constrain_, current_size_partitions_ - 1);
} else {
current_size_partitions_ = old_target_size_partitions_ =
target_size_partitions_;
}
ZeroFilter(old_size_partitions_, current_size_partitions_, &H_);
RTC_DCHECK_LE(0, size_change_counter_);
}
void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer,
FftData* S) const {
RTC_DCHECK(S);
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
aec3::ApplyFilter_Sse2(render_buffer, current_size_partitions_, H_, S);
break;
case Aec3Optimization::kAvx2:
aec3::ApplyFilter_Avx2(render_buffer, current_size_partitions_, H_, S);
break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
aec3::ApplyFilter_Neon(render_buffer, current_size_partitions_, H_, S);
break;
#endif
default:
aec3::ApplyFilter(render_buffer, current_size_partitions_, H_, S);
}
}
void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
const FftData& G) {
// Adapt the filter and update the filter size.
AdaptAndUpdateSize(render_buffer, G);
// Constrain the filter partitions in a cyclic manner.
Constrain();
}
void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
const FftData& G,
std::vector<float>* impulse_response) {
// Adapt the filter and update the filter size.
AdaptAndUpdateSize(render_buffer, G);
// Constrain the filter partitions in a cyclic manner.
ConstrainAndUpdateImpulseResponse(impulse_response);
}
void AdaptiveFirFilter::ComputeFrequencyResponse(
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) const {
RTC_DCHECK_GE(max_size_partitions_, H2->capacity());
H2->resize(current_size_partitions_);
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
aec3::ComputeFrequencyResponse_Sse2(current_size_partitions_, H_, H2);
break;
case Aec3Optimization::kAvx2:
aec3::ComputeFrequencyResponse_Avx2(current_size_partitions_, H_, H2);
break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
aec3::ComputeFrequencyResponse_Neon(current_size_partitions_, H_, H2);
break;
#endif
default:
aec3::ComputeFrequencyResponse(current_size_partitions_, H_, H2);
}
}
void AdaptiveFirFilter::AdaptAndUpdateSize(const RenderBuffer& render_buffer,
const FftData& G) {
// Update the filter size if needed.
UpdateSize();
// Adapt the filter.
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
aec3::AdaptPartitions_Sse2(render_buffer, G, current_size_partitions_,
&H_);
break;
case Aec3Optimization::kAvx2:
aec3::AdaptPartitions_Avx2(render_buffer, G, current_size_partitions_,
&H_);
break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
aec3::AdaptPartitions_Neon(render_buffer, G, current_size_partitions_,
&H_);
break;
#endif
default:
aec3::AdaptPartitions(render_buffer, G, current_size_partitions_, &H_);
}
}
// Constrains the partition of the frequency domain filter to be limited in
// time via setting the relevant time-domain coefficients to zero and updates
// the corresponding values in an externally stored impulse response estimate.
void AdaptiveFirFilter::ConstrainAndUpdateImpulseResponse(
std::vector<float>* impulse_response) {
RTC_DCHECK_EQ(GetTimeDomainLength(max_size_partitions_),
impulse_response->capacity());
impulse_response->resize(GetTimeDomainLength(current_size_partitions_));
std::array<float, kFftLength> h;
impulse_response->resize(GetTimeDomainLength(current_size_partitions_));
std::fill(
impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2,
impulse_response->begin() + (partition_to_constrain_ + 1) * kFftLengthBy2,
0.f);
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
fft_.Ifft(H_[partition_to_constrain_][ch], &h);
static constexpr float kScale = 1.0f / kFftLengthBy2;
std::for_each(h.begin(), h.begin() + kFftLengthBy2,
[](float& a) { a *= kScale; });
std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
if (ch == 0) {
std::copy(
h.begin(), h.begin() + kFftLengthBy2,
impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2);
} else {
for (size_t k = 0, j = partition_to_constrain_ * kFftLengthBy2;
k < kFftLengthBy2; ++k, ++j) {
if (fabsf((*impulse_response)[j]) < fabsf(h[k])) {
(*impulse_response)[j] = h[k];
}
}
}
fft_.Fft(&h, &H_[partition_to_constrain_][ch]);
}
partition_to_constrain_ =
partition_to_constrain_ < (current_size_partitions_ - 1)
? partition_to_constrain_ + 1
: 0;
}
// Constrains the a partiton of the frequency domain filter to be limited in
// time via setting the relevant time-domain coefficients to zero.
void AdaptiveFirFilter::Constrain() {
std::array<float, kFftLength> h;
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
fft_.Ifft(H_[partition_to_constrain_][ch], &h);
static constexpr float kScale = 1.0f / kFftLengthBy2;
std::for_each(h.begin(), h.begin() + kFftLengthBy2,
[](float& a) { a *= kScale; });
std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
fft_.Fft(&h, &H_[partition_to_constrain_][ch]);
}
partition_to_constrain_ =
partition_to_constrain_ < (current_size_partitions_ - 1)
? partition_to_constrain_ + 1
: 0;
}
void AdaptiveFirFilter::ScaleFilter(float factor) {
for (auto& H_p : H_) {
for (auto& H_p_ch : H_p) {
for (auto& re : H_p_ch.re) {
re *= factor;
}
for (auto& im : H_p_ch.im) {
im *= factor;
}
}
}
}
// Set the filter coefficients.
void AdaptiveFirFilter::SetFilter(size_t num_partitions,
const std::vector<std::vector<FftData>>& H) {
const size_t min_num_partitions =
std::min(current_size_partitions_, num_partitions);
for (size_t p = 0; p < min_num_partitions; ++p) {
RTC_DCHECK_EQ(H_[p].size(), H[p].size());
RTC_DCHECK_EQ(num_render_channels_, H_[p].size());
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
std::copy(H[p][ch].re.begin(), H[p][ch].re.end(), H_[p][ch].re.begin());
std::copy(H[p][ch].im.begin(), H[p][ch].im.end(), H_[p][ch].im.begin());
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,192 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
#include <stddef.h>
#include <array>
#include <vector>
#include "absl/strings/string_view.h"
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/aec3_fft.h"
#include "modules/audio_processing/aec3/fft_data.h"
#include "modules/audio_processing/aec3/render_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/system/arch.h"
namespace webrtc {
namespace aec3 {
// Computes and stores the frequency response of the filter.
void ComputeFrequencyResponse(
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
#if defined(WEBRTC_HAS_NEON)
void ComputeFrequencyResponse_Neon(
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
void ComputeFrequencyResponse_Sse2(
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
void ComputeFrequencyResponse_Avx2(
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
#endif
// Adapts the filter partitions.
void AdaptPartitions(const RenderBuffer& render_buffer,
const FftData& G,
size_t num_partitions,
std::vector<std::vector<FftData>>* H);
#if defined(WEBRTC_HAS_NEON)
void AdaptPartitions_Neon(const RenderBuffer& render_buffer,
const FftData& G,
size_t num_partitions,
std::vector<std::vector<FftData>>* H);
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
void AdaptPartitions_Sse2(const RenderBuffer& render_buffer,
const FftData& G,
size_t num_partitions,
std::vector<std::vector<FftData>>* H);
void AdaptPartitions_Avx2(const RenderBuffer& render_buffer,
const FftData& G,
size_t num_partitions,
std::vector<std::vector<FftData>>* H);
#endif
// Produces the filter output.
void ApplyFilter(const RenderBuffer& render_buffer,
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
FftData* S);
#if defined(WEBRTC_HAS_NEON)
void ApplyFilter_Neon(const RenderBuffer& render_buffer,
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
FftData* S);
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
void ApplyFilter_Sse2(const RenderBuffer& render_buffer,
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
FftData* S);
void ApplyFilter_Avx2(const RenderBuffer& render_buffer,
size_t num_partitions,
const std::vector<std::vector<FftData>>& H,
FftData* S);
#endif
} // namespace aec3
// Provides a frequency domain adaptive filter functionality.
class AdaptiveFirFilter {
public:
AdaptiveFirFilter(size_t max_size_partitions,
size_t initial_size_partitions,
size_t size_change_duration_blocks,
size_t num_render_channels,
Aec3Optimization optimization,
ApmDataDumper* data_dumper);
~AdaptiveFirFilter();
AdaptiveFirFilter(const AdaptiveFirFilter&) = delete;
AdaptiveFirFilter& operator=(const AdaptiveFirFilter&) = delete;
// Produces the output of the filter.
void Filter(const RenderBuffer& render_buffer, FftData* S) const;
// Adapts the filter and updates an externally stored impulse response
// estimate.
void Adapt(const RenderBuffer& render_buffer,
const FftData& G,
std::vector<float>* impulse_response);
// Adapts the filter.
void Adapt(const RenderBuffer& render_buffer, const FftData& G);
// Receives reports that known echo path changes have occured and adjusts
// the filter adaptation accordingly.
void HandleEchoPathChange();
// Returns the filter size.
size_t SizePartitions() const { return current_size_partitions_; }
// Sets the filter size.
void SetSizePartitions(size_t size, bool immediate_effect);
// Computes the frequency responses for the filter partitions.
void ComputeFrequencyResponse(
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) const;
// Returns the maximum number of partitions for the filter.
size_t max_filter_size_partitions() const { return max_size_partitions_; }
void DumpFilter(absl::string_view name_frequency_domain) {
for (size_t p = 0; p < max_size_partitions_; ++p) {
data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].re);
data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].im);
}
}
// Scale the filter impulse response and spectrum by a factor.
void ScaleFilter(float factor);
// Set the filter coefficients.
void SetFilter(size_t num_partitions,
const std::vector<std::vector<FftData>>& H);
// Gets the filter coefficients.
const std::vector<std::vector<FftData>>& GetFilter() const { return H_; }
private:
// Adapts the filter and updates the filter size.
void AdaptAndUpdateSize(const RenderBuffer& render_buffer, const FftData& G);
// Constrain the filter partitions in a cyclic manner.
void Constrain();
// Constrains the filter in a cyclic manner and updates the corresponding
// values in the supplied impulse response.
void ConstrainAndUpdateImpulseResponse(std::vector<float>* impulse_response);
// Gradually Updates the current filter size towards the target size.
void UpdateSize();
ApmDataDumper* const data_dumper_;
const Aec3Fft fft_;
const Aec3Optimization optimization_;
const size_t num_render_channels_;
const size_t max_size_partitions_;
const int size_change_duration_blocks_;
float one_by_size_change_duration_blocks_;
size_t current_size_partitions_;
size_t target_size_partitions_;
size_t old_target_size_partitions_;
int size_change_counter_ = 0;
std::vector<std::vector<FftData>> H_;
size_t partition_to_constrain_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
#include <algorithm>
#include <functional>
#if defined(WEBRTC_HAS_NEON)
#include <arm_neon.h>
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
#include <emmintrin.h>
#endif
namespace webrtc {
namespace aec3 {
// Computes and stores the echo return loss estimate of the filter, which is the
// sum of the partition frequency responses.
void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
rtc::ArrayView<float> erl) {
std::fill(erl.begin(), erl.end(), 0.f);
for (auto& H2_j : H2) {
std::transform(H2_j.begin(), H2_j.end(), erl.begin(), erl.begin(),
std::plus<float>());
}
}
#if defined(WEBRTC_HAS_NEON)
// Computes and stores the echo return loss estimate of the filter, which is the
// sum of the partition frequency responses.
void ErlComputer_NEON(
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
rtc::ArrayView<float> erl) {
std::fill(erl.begin(), erl.end(), 0.f);
for (auto& H2_j : H2) {
for (size_t k = 0; k < kFftLengthBy2; k += 4) {
const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]);
float32x4_t erl_k = vld1q_f32(&erl[k]);
erl_k = vaddq_f32(erl_k, H2_j_k);
vst1q_f32(&erl[k], erl_k);
}
erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
}
}
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Computes and stores the echo return loss estimate of the filter, which is the
// sum of the partition frequency responses.
void ErlComputer_SSE2(
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
rtc::ArrayView<float> erl) {
std::fill(erl.begin(), erl.end(), 0.f);
for (auto& H2_j : H2) {
for (size_t k = 0; k < kFftLengthBy2; k += 4) {
const __m128 H2_j_k = _mm_loadu_ps(&H2_j[k]);
__m128 erl_k = _mm_loadu_ps(&erl[k]);
erl_k = _mm_add_ps(erl_k, H2_j_k);
_mm_storeu_ps(&erl[k], erl_k);
}
erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
}
}
#endif
} // namespace aec3
void ComputeErl(const Aec3Optimization& optimization,
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
rtc::ArrayView<float> erl) {
RTC_DCHECK_EQ(kFftLengthBy2Plus1, erl.size());
// Update the frequency response and echo return loss for the filter.
switch (optimization) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
aec3::ErlComputer_SSE2(H2, erl);
break;
case Aec3Optimization::kAvx2:
aec3::ErlComputer_AVX2(H2, erl);
break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
aec3::ErlComputer_NEON(H2, erl);
break;
#endif
default:
aec3::ErlComputer(H2, erl);
}
}
} // namespace webrtc

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
#include <stddef.h>
#include <array>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "rtc_base/system/arch.h"
namespace webrtc {
namespace aec3 {
// Computes and stores the echo return loss estimate of the filter, which is the
// sum of the partition frequency responses.
void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
rtc::ArrayView<float> erl);
#if defined(WEBRTC_HAS_NEON)
void ErlComputer_NEON(
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
rtc::ArrayView<float> erl);
#endif
#if defined(WEBRTC_ARCH_X86_FAMILY)
void ErlComputer_SSE2(
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
rtc::ArrayView<float> erl);
void ErlComputer_AVX2(
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
rtc::ArrayView<float> erl);
#endif
} // namespace aec3
// Computes the echo return loss based on a frequency response.
void ComputeErl(const Aec3Optimization& optimization,
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
rtc::ArrayView<float> erl);
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/aec3_common.h"
#include <stdint.h>
#include "rtc_base/checks.h"
#include "rtc_base/system/arch.h"
#include "system_wrappers/include/cpu_features_wrapper.h"
namespace webrtc {
Aec3Optimization DetectOptimization() {
#if defined(WEBRTC_ARCH_X86_FAMILY)
if (GetCPUInfo(kAVX2) != 0) {
return Aec3Optimization::kAvx2;
} else if (GetCPUInfo(kSSE2) != 0) {
return Aec3Optimization::kSse2;
}
#endif
#if defined(WEBRTC_HAS_NEON)
return Aec3Optimization::kNeon;
#else
return Aec3Optimization::kNone;
#endif
}
float FastApproxLog2f(const float in) {
RTC_DCHECK_GT(in, .0f);
// Read and interpret float as uint32_t and then cast to float.
// This is done to extract the exponent (bits 30 - 23).
// "Right shift" of the exponent is then performed by multiplying
// with the constant (1/2^23). Finally, we subtract a constant to
// remove the bias (https://en.wikipedia.org/wiki/Exponent_bias).
union {
float dummy;
uint32_t a;
} x = {in};
float out = x.a;
out *= 1.1920929e-7f; // 1/2^23
out -= 126.942695f; // Remove bias.
return out;
}
float Log2TodB(const float in_log2) {
return 3.0102999566398121 * in_log2;
}
} // namespace webrtc

View File

@ -0,0 +1,114 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
#include <stddef.h>
namespace webrtc {
#ifdef _MSC_VER /* visual c++ */
#define ALIGN16_BEG __declspec(align(16))
#define ALIGN16_END
#else /* gcc or icc */
#define ALIGN16_BEG
#define ALIGN16_END __attribute__((aligned(16)))
#endif
enum class Aec3Optimization { kNone, kSse2, kAvx2, kNeon };
constexpr int kNumBlocksPerSecond = 250;
constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond;
constexpr int kMetricsComputationBlocks = 3;
constexpr int kMetricsCollectionBlocks =
kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
constexpr size_t kFftLengthBy2 = 64;
constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1;
constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1;
constexpr size_t kFftLength = 2 * kFftLengthBy2;
constexpr size_t kFftLengthBy2Log2 = 6;
constexpr int kRenderTransferQueueSizeFrames = 100;
constexpr size_t kMaxNumBands = 3;
constexpr size_t kFrameSize = 160;
constexpr size_t kSubFrameLength = kFrameSize / 2;
constexpr size_t kBlockSize = kFftLengthBy2;
constexpr size_t kBlockSizeLog2 = kFftLengthBy2Log2;
constexpr size_t kExtendedBlockSize = 2 * kFftLengthBy2;
constexpr size_t kMatchedFilterWindowSizeSubBlocks = 32;
constexpr size_t kMatchedFilterAlignmentShiftSizeSubBlocks =
kMatchedFilterWindowSizeSubBlocks * 3 / 4;
// TODO(peah): Integrate this with how it is done inside audio_processing_impl.
constexpr size_t NumBandsForRate(int sample_rate_hz) {
return static_cast<size_t>(sample_rate_hz / 16000);
}
constexpr bool ValidFullBandRate(int sample_rate_hz) {
return sample_rate_hz == 16000 || sample_rate_hz == 32000 ||
sample_rate_hz == 48000;
}
constexpr int GetTimeDomainLength(int filter_length_blocks) {
return filter_length_blocks * kFftLengthBy2;
}
constexpr size_t GetDownSampledBufferSize(size_t down_sampling_factor,
size_t num_matched_filters) {
return kBlockSize / down_sampling_factor *
(kMatchedFilterAlignmentShiftSizeSubBlocks * num_matched_filters +
kMatchedFilterWindowSizeSubBlocks + 1);
}
constexpr size_t GetRenderDelayBufferSize(size_t down_sampling_factor,
size_t num_matched_filters,
size_t filter_length_blocks) {
return GetDownSampledBufferSize(down_sampling_factor, num_matched_filters) /
(kBlockSize / down_sampling_factor) +
filter_length_blocks + 1;
}
// Detects what kind of optimizations to use for the code.
Aec3Optimization DetectOptimization();
// Computes the log2 of the input in a fast an approximate manner.
float FastApproxLog2f(float in);
// Returns dB from a power quantity expressed in log2.
float Log2TodB(float in_log2);
static_assert(1 << kBlockSizeLog2 == kBlockSize,
"Proper number of shifts for blocksize");
static_assert(1 << kFftLengthBy2Log2 == kFftLengthBy2,
"Proper number of shifts for the fft length");
static_assert(1 == NumBandsForRate(16000), "Number of bands for 16 kHz");
static_assert(2 == NumBandsForRate(32000), "Number of bands for 32 kHz");
static_assert(3 == NumBandsForRate(48000), "Number of bands for 48 kHz");
static_assert(ValidFullBandRate(16000),
"Test that 16 kHz is a valid sample rate");
static_assert(ValidFullBandRate(32000),
"Test that 32 kHz is a valid sample rate");
static_assert(ValidFullBandRate(48000),
"Test that 48 kHz is a valid sample rate");
static_assert(!ValidFullBandRate(8001),
"Test that 8001 Hz is not a valid sample rate");
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_

View File

@ -0,0 +1,144 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/aec3_fft.h"
#include <algorithm>
#include <functional>
#include <iterator>
#include "rtc_base/checks.h"
#include "system_wrappers/include/cpu_features_wrapper.h"
namespace webrtc {
namespace {
const float kHanning64[kFftLengthBy2] = {
0.f, 0.00248461f, 0.00991376f, 0.0222136f, 0.03926189f,
0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
0.70564355f, 0.75f, 0.79187184f, 0.83084292f, 0.86652594f,
0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
0.83084292f, 0.79187184f, 0.75f, 0.70564355f, 0.65924333f,
0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
0.0222136f, 0.00991376f, 0.00248461f, 0.f};
// Hanning window from Matlab command win = sqrt(hanning(128)).
const float kSqrtHanning128[kFftLength] = {
0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,
0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f,
0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f,
0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f,
0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f,
0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f,
0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f,
0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f,
0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f,
0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f,
0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f,
1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f,
0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f,
0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f,
0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f,
0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f,
0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f,
0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f,
0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f,
0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f,
0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f,
0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f,
0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f,
0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f,
0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f,
0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f,
0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f};
bool IsSse2Available() {
#if defined(WEBRTC_ARCH_X86_FAMILY)
return GetCPUInfo(kSSE2) != 0;
#else
return false;
#endif
}
} // namespace
Aec3Fft::Aec3Fft() : ooura_fft_(IsSse2Available()) {}
// TODO(peah): Change x to be std::array once the rest of the code allows this.
void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x,
Window window,
FftData* X) const {
RTC_DCHECK(X);
RTC_DCHECK_EQ(kFftLengthBy2, x.size());
std::array<float, kFftLength> fft;
std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f);
switch (window) {
case Window::kRectangular:
std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
break;
case Window::kHanning:
std::transform(x.begin(), x.end(), std::begin(kHanning64),
fft.begin() + kFftLengthBy2,
[](float a, float b) { return a * b; });
break;
case Window::kSqrtHanning:
RTC_DCHECK_NOTREACHED();
break;
default:
RTC_DCHECK_NOTREACHED();
}
Fft(&fft, X);
}
void Aec3Fft::PaddedFft(rtc::ArrayView<const float> x,
rtc::ArrayView<const float> x_old,
Window window,
FftData* X) const {
RTC_DCHECK(X);
RTC_DCHECK_EQ(kFftLengthBy2, x.size());
RTC_DCHECK_EQ(kFftLengthBy2, x_old.size());
std::array<float, kFftLength> fft;
switch (window) {
case Window::kRectangular:
std::copy(x_old.begin(), x_old.end(), fft.begin());
std::copy(x.begin(), x.end(), fft.begin() + x_old.size());
break;
case Window::kHanning:
RTC_DCHECK_NOTREACHED();
break;
case Window::kSqrtHanning:
std::transform(x_old.begin(), x_old.end(), std::begin(kSqrtHanning128),
fft.begin(), std::multiplies<float>());
std::transform(x.begin(), x.end(),
std::begin(kSqrtHanning128) + x_old.size(),
fft.begin() + x_old.size(), std::multiplies<float>());
break;
default:
RTC_DCHECK_NOTREACHED();
}
Fft(&fft, X);
}
} // namespace webrtc

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
#include <array>
#include "api/array_view.h"
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/fft_data.h"
#include "rtc_base/checks.h"
namespace webrtc {
// Wrapper class that provides 128 point real valued FFT functionality with the
// FftData type.
class Aec3Fft {
public:
enum class Window { kRectangular, kHanning, kSqrtHanning };
Aec3Fft();
Aec3Fft(const Aec3Fft&) = delete;
Aec3Fft& operator=(const Aec3Fft&) = delete;
// Computes the FFT. Note that both the input and output are modified.
void Fft(std::array<float, kFftLength>* x, FftData* X) const {
RTC_DCHECK(x);
RTC_DCHECK(X);
ooura_fft_.Fft(x->data());
X->CopyFromPackedArray(*x);
}
// Computes the inverse Fft.
void Ifft(const FftData& X, std::array<float, kFftLength>* x) const {
RTC_DCHECK(x);
X.CopyToPackedArray(x);
ooura_fft_.InverseFft(x->data());
}
// Windows the input using a Hanning window, and then adds padding of
// kFftLengthBy2 initial zeros before computing the Fft.
void ZeroPaddedFft(rtc::ArrayView<const float> x,
Window window,
FftData* X) const;
// Concatenates the kFftLengthBy2 values long x and x_old before computing the
// Fft. After that, x is copied to x_old.
void PaddedFft(rtc::ArrayView<const float> x,
rtc::ArrayView<const float> x_old,
FftData* X) const {
PaddedFft(x, x_old, Window::kRectangular, X);
}
// Padded Fft using a time-domain window.
void PaddedFft(rtc::ArrayView<const float> x,
rtc::ArrayView<const float> x_old,
Window window,
FftData* X) const;
private:
const OouraFft ooura_fft_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_

View File

@ -0,0 +1,481 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/aec_state.h"
#include <math.h>
#include <algorithm>
#include <numeric>
#include <vector>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
bool DeactivateInitialStateResetAtEchoPathChange() {
return field_trial::IsEnabled(
"WebRTC-Aec3DeactivateInitialStateResetKillSwitch");
}
bool FullResetAtEchoPathChange() {
return !field_trial::IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch");
}
bool SubtractorAnalyzerResetAtEchoPathChange() {
return !field_trial::IsEnabled(
"WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch");
}
void ComputeAvgRenderReverb(
const SpectrumBuffer& spectrum_buffer,
int delay_blocks,
float reverb_decay,
ReverbModel* reverb_model,
rtc::ArrayView<float, kFftLengthBy2Plus1> reverb_power_spectrum) {
RTC_DCHECK(reverb_model);
const size_t num_render_channels = spectrum_buffer.buffer[0].size();
int idx_at_delay =
spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks);
int idx_past = spectrum_buffer.IncIndex(idx_at_delay);
std::array<float, kFftLengthBy2Plus1> X2_data;
rtc::ArrayView<const float> X2;
if (num_render_channels > 1) {
auto average_channels =
[](size_t num_render_channels,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
spectrum_band_0,
rtc::ArrayView<float, kFftLengthBy2Plus1> render_power) {
std::fill(render_power.begin(), render_power.end(), 0.f);
for (size_t ch = 0; ch < num_render_channels; ++ch) {
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
render_power[k] += spectrum_band_0[ch][k];
}
}
const float normalizer = 1.f / num_render_channels;
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
render_power[k] *= normalizer;
}
};
average_channels(num_render_channels, spectrum_buffer.buffer[idx_past],
X2_data);
reverb_model->UpdateReverbNoFreqShaping(
X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay);
average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay],
X2_data);
X2 = X2_data;
} else {
reverb_model->UpdateReverbNoFreqShaping(
spectrum_buffer.buffer[idx_past][/*channel=*/0],
/*power_spectrum_scaling=*/1.0f, reverb_decay);
X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0];
}
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
reverb_model->reverb();
for (size_t k = 0; k < X2.size(); ++k) {
reverb_power_spectrum[k] = X2[k] + reverb_power[k];
}
}
} // namespace
std::atomic<int> AecState::instance_count_(0);
void AecState::GetResidualEchoScaling(
rtc::ArrayView<float> residual_scaling) const {
bool filter_has_had_time_to_converge;
if (config_.filter.conservative_initial_phase) {
filter_has_had_time_to_converge =
strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond;
} else {
filter_has_had_time_to_converge =
strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond;
}
echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge,
residual_scaling);
}
AecState::AecState(const EchoCanceller3Config& config,
size_t num_capture_channels)
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
config_(config),
num_capture_channels_(num_capture_channels),
deactivate_initial_state_reset_at_echo_path_change_(
DeactivateInitialStateResetAtEchoPathChange()),
full_reset_at_echo_path_change_(FullResetAtEchoPathChange()),
subtractor_analyzer_reset_at_echo_path_change_(
SubtractorAnalyzerResetAtEchoPathChange()),
initial_state_(config_),
delay_state_(config_, num_capture_channels_),
transparent_state_(TransparentMode::Create(config_)),
filter_quality_state_(config_, num_capture_channels_),
erl_estimator_(2 * kNumBlocksPerSecond),
erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels_),
filter_analyzer_(config_, num_capture_channels_),
echo_audibility_(
config_.echo_audibility.use_stationarity_properties_at_init),
reverb_model_estimator_(config_, num_capture_channels_),
subtractor_output_analyzer_(num_capture_channels_) {}
AecState::~AecState() = default;
void AecState::HandleEchoPathChange(
const EchoPathVariability& echo_path_variability) {
const auto full_reset = [&]() {
filter_analyzer_.Reset();
capture_signal_saturation_ = false;
strong_not_saturated_render_blocks_ = 0;
blocks_with_active_render_ = 0;
if (!deactivate_initial_state_reset_at_echo_path_change_) {
initial_state_.Reset();
}
if (transparent_state_) {
transparent_state_->Reset();
}
erle_estimator_.Reset(true);
erl_estimator_.Reset();
filter_quality_state_.Reset();
};
// TODO(peah): Refine the reset scheme according to the type of gain and
// delay adjustment.
if (full_reset_at_echo_path_change_ &&
echo_path_variability.delay_change !=
EchoPathVariability::DelayAdjustment::kNone) {
full_reset();
} else if (echo_path_variability.gain_change) {
erle_estimator_.Reset(false);
}
if (subtractor_analyzer_reset_at_echo_path_change_) {
subtractor_output_analyzer_.HandleEchoPathChange();
}
}
void AecState::Update(
const absl::optional<DelayEstimate>& external_delay,
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
adaptive_filter_frequency_responses,
rtc::ArrayView<const std::vector<float>> adaptive_filter_impulse_responses,
const RenderBuffer& render_buffer,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
rtc::ArrayView<const SubtractorOutput> subtractor_output) {
RTC_DCHECK_EQ(num_capture_channels_, Y2.size());
RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size());
RTC_DCHECK_EQ(num_capture_channels_,
adaptive_filter_frequency_responses.size());
RTC_DCHECK_EQ(num_capture_channels_,
adaptive_filter_impulse_responses.size());
// Analyze the filter outputs and filters.
bool any_filter_converged;
bool any_coarse_filter_converged;
bool all_filters_diverged;
subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,
&any_coarse_filter_converged,
&all_filters_diverged);
bool any_filter_consistent;
float max_echo_path_gain;
filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer,
&any_filter_consistent, &max_echo_path_gain);
// Estimate the direct path delay of the filter.
if (config_.filter.use_linear_filter) {
delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay,
strong_not_saturated_render_blocks_);
}
const Block& aligned_render_block =
render_buffer.GetBlock(-delay_state_.MinDirectPathFilterDelay());
// Update render counters.
bool active_render = false;
for (int ch = 0; ch < aligned_render_block.NumChannels(); ++ch) {
const float render_energy =
std::inner_product(aligned_render_block.begin(/*block=*/0, ch),
aligned_render_block.end(/*block=*/0, ch),
aligned_render_block.begin(/*block=*/0, ch), 0.f);
if (render_energy > (config_.render_levels.active_render_limit *
config_.render_levels.active_render_limit) *
kFftLengthBy2) {
active_render = true;
break;
}
}
blocks_with_active_render_ += active_render ? 1 : 0;
strong_not_saturated_render_blocks_ +=
active_render && !SaturatedCapture() ? 1 : 0;
std::array<float, kFftLengthBy2Plus1> avg_render_spectrum_with_reverb;
ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(),
delay_state_.MinDirectPathFilterDelay(),
ReverbDecay(/*mild=*/false), &avg_render_reverb_,
avg_render_spectrum_with_reverb);
if (config_.echo_audibility.use_stationarity_properties) {
// Update the echo audibility evaluator.
echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(),
delay_state_.MinDirectPathFilterDelay(),
delay_state_.ExternalDelayReported());
}
// Update the ERL and ERLE measures.
if (initial_state_.TransitionTriggered()) {
erle_estimator_.Reset(false);
}
erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses,
avg_render_spectrum_with_reverb, Y2, E2_refined,
subtractor_output_analyzer_.ConvergedFilters());
erl_estimator_.Update(
subtractor_output_analyzer_.ConvergedFilters(),
render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2);
// Detect and flag echo saturation.
if (config_.ep_strength.echo_can_saturate) {
saturation_detector_.Update(aligned_render_block, SaturatedCapture(),
UsableLinearEstimate(), subtractor_output,
max_echo_path_gain);
} else {
RTC_DCHECK(!saturation_detector_.SaturatedEcho());
}
// Update the decision on whether to use the initial state parameter set.
initial_state_.Update(active_render, SaturatedCapture());
// Detect whether the transparent mode should be activated.
if (transparent_state_) {
transparent_state_->Update(
delay_state_.MinDirectPathFilterDelay(), any_filter_consistent,
any_filter_converged, any_coarse_filter_converged, all_filters_diverged,
active_render, SaturatedCapture());
}
// Analyze the quality of the filter.
filter_quality_state_.Update(active_render, TransparentModeActive(),
SaturatedCapture(), external_delay,
any_filter_converged);
// Update the reverb estimate.
const bool stationary_block =
config_.echo_audibility.use_stationarity_properties &&
echo_audibility_.IsBlockStationary();
reverb_model_estimator_.Update(
filter_analyzer_.GetAdjustedFilters(),
adaptive_filter_frequency_responses,
erle_estimator_.GetInstLinearQualityEstimates(),
delay_state_.DirectPathFilterDelays(),
filter_quality_state_.UsableLinearFilterOutputs(), stationary_block);
erle_estimator_.Dump(data_dumper_);
reverb_model_estimator_.Dump(data_dumper_.get());
data_dumper_->DumpRaw("aec3_active_render", active_render);
data_dumper_->DumpRaw("aec3_erl", Erl());
data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]);
data_dumper_->DumpRaw("aec3_erle_onset_compensated",
Erle(/*onset_compensated=*/true)[0]);
data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive());
data_dumper_->DumpRaw("aec3_filter_delay",
filter_analyzer_.MinFilterDelayBlocks());
data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent);
data_dumper_->DumpRaw("aec3_initial_state",
initial_state_.InitialStateActive());
data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());
data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());
data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);
data_dumper_->DumpRaw("aec3_any_coarse_filter_converged",
any_coarse_filter_converged);
data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);
data_dumper_->DumpRaw("aec3_external_delay_avaliable",
external_delay ? 1 : 0);
data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est",
GetReverbFrequencyResponse());
data_dumper_->DumpRaw("aec3_subtractor_y2", subtractor_output[0].y2);
data_dumper_->DumpRaw("aec3_subtractor_e2_coarse",
subtractor_output[0].e2_coarse);
data_dumper_->DumpRaw("aec3_subtractor_e2_refined",
subtractor_output[0].e2_refined);
}
AecState::InitialState::InitialState(const EchoCanceller3Config& config)
: conservative_initial_phase_(config.filter.conservative_initial_phase),
initial_state_seconds_(config.filter.initial_state_seconds) {
Reset();
}
void AecState::InitialState::InitialState::Reset() {
initial_state_ = true;
strong_not_saturated_render_blocks_ = 0;
}
void AecState::InitialState::InitialState::Update(bool active_render,
bool saturated_capture) {
strong_not_saturated_render_blocks_ +=
active_render && !saturated_capture ? 1 : 0;
// Flag whether the initial state is still active.
bool prev_initial_state = initial_state_;
if (conservative_initial_phase_) {
initial_state_ =
strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond;
} else {
initial_state_ = strong_not_saturated_render_blocks_ <
initial_state_seconds_ * kNumBlocksPerSecond;
}
// Flag whether the transition from the initial state has started.
transition_triggered_ = !initial_state_ && prev_initial_state;
}
AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config,
size_t num_capture_channels)
: delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_),
min_filter_delay_(delay_headroom_blocks_) {}
void AecState::FilterDelay::Update(
rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
const absl::optional<DelayEstimate>& external_delay,
size_t blocks_with_proper_filter_adaptation) {
// Update the delay based on the external delay.
if (external_delay &&
(!external_delay_ || external_delay_->delay != external_delay->delay)) {
external_delay_ = external_delay;
external_delay_reported_ = true;
}
// Override the estimated delay if it is not certain that the filter has had
// time to converge.
const bool delay_estimator_may_not_have_converged =
blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond;
if (delay_estimator_may_not_have_converged && external_delay_) {
const int delay_guess = delay_headroom_blocks_;
std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(),
delay_guess);
} else {
RTC_DCHECK_EQ(filter_delays_blocks_.size(),
analyzer_filter_delay_estimates_blocks.size());
std::copy(analyzer_filter_delay_estimates_blocks.begin(),
analyzer_filter_delay_estimates_blocks.end(),
filter_delays_blocks_.begin());
}
min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(),
filter_delays_blocks_.end());
}
AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer(
const EchoCanceller3Config& config,
size_t num_capture_channels)
: use_linear_filter_(config.filter.use_linear_filter),
usable_linear_filter_estimates_(num_capture_channels, false) {}
void AecState::FilteringQualityAnalyzer::Reset() {
std::fill(usable_linear_filter_estimates_.begin(),
usable_linear_filter_estimates_.end(), false);
overall_usable_linear_estimates_ = false;
filter_update_blocks_since_reset_ = 0;
}
void AecState::FilteringQualityAnalyzer::Update(
bool active_render,
bool transparent_mode,
bool saturated_capture,
const absl::optional<DelayEstimate>& external_delay,
bool any_filter_converged) {
// Update blocks counter.
const bool filter_update = active_render && !saturated_capture;
filter_update_blocks_since_reset_ += filter_update ? 1 : 0;
filter_update_blocks_since_start_ += filter_update ? 1 : 0;
// Store convergence flag when observed.
convergence_seen_ = convergence_seen_ || any_filter_converged;
// Verify requirements for achieving a decent filter. The requirements for
// filter adaptation at call startup are more restrictive than after an
// in-call reset.
const bool sufficient_data_to_converge_at_startup =
filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f;
const bool sufficient_data_to_converge_at_reset =
sufficient_data_to_converge_at_startup &&
filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f;
// The linear filter can only be used if it has had time to converge.
overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup &&
sufficient_data_to_converge_at_reset;
// The linear filter can only be used if an external delay or convergence have
// been identified
overall_usable_linear_estimates_ =
overall_usable_linear_estimates_ && (external_delay || convergence_seen_);
// If transparent mode is on, deactivate usign the linear filter.
overall_usable_linear_estimates_ =
overall_usable_linear_estimates_ && !transparent_mode;
if (use_linear_filter_) {
std::fill(usable_linear_filter_estimates_.begin(),
usable_linear_filter_estimates_.end(),
overall_usable_linear_estimates_);
}
}
void AecState::SaturationDetector::Update(
const Block& x,
bool saturated_capture,
bool usable_linear_estimate,
rtc::ArrayView<const SubtractorOutput> subtractor_output,
float echo_path_gain) {
saturated_echo_ = false;
if (!saturated_capture) {
return;
}
if (usable_linear_estimate) {
constexpr float kSaturationThreshold = 20000.f;
for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {
saturated_echo_ =
saturated_echo_ ||
(subtractor_output[ch].s_refined_max_abs > kSaturationThreshold ||
subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold);
}
} else {
float max_sample = 0.f;
for (int ch = 0; ch < x.NumChannels(); ++ch) {
rtc::ArrayView<const float, kBlockSize> x_ch = x.View(/*band=*/0, ch);
for (float sample : x_ch) {
max_sample = std::max(max_sample, fabsf(sample));
}
}
const float kMargin = 10.f;
float peak_echo_amplitude = max_sample * echo_path_gain * kMargin;
saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000;
}
}
} // namespace webrtc

View File

@ -0,0 +1,300 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
#define MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
#include <stddef.h>
#include <array>
#include <atomic>
#include <memory>
#include <vector>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/audio/echo_canceller3_config.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/delay_estimate.h"
#include "modules/audio_processing/aec3/echo_audibility.h"
#include "modules/audio_processing/aec3/echo_path_variability.h"
#include "modules/audio_processing/aec3/erl_estimator.h"
#include "modules/audio_processing/aec3/erle_estimator.h"
#include "modules/audio_processing/aec3/filter_analyzer.h"
#include "modules/audio_processing/aec3/render_buffer.h"
#include "modules/audio_processing/aec3/reverb_model_estimator.h"
#include "modules/audio_processing/aec3/subtractor_output.h"
#include "modules/audio_processing/aec3/subtractor_output_analyzer.h"
#include "modules/audio_processing/aec3/transparent_mode.h"
namespace webrtc {
class ApmDataDumper;
// Handles the state and the conditions for the echo removal functionality.
class AecState {
public:
AecState(const EchoCanceller3Config& config, size_t num_capture_channels);
~AecState();
// Returns whether the echo subtractor can be used to determine the residual
// echo.
bool UsableLinearEstimate() const {
return filter_quality_state_.LinearFilterUsable() &&
config_.filter.use_linear_filter;
}
// Returns whether the echo subtractor output should be used as output.
bool UseLinearFilterOutput() const {
return filter_quality_state_.LinearFilterUsable() &&
config_.filter.use_linear_filter;
}
// Returns whether the render signal is currently active.
bool ActiveRender() const { return blocks_with_active_render_ > 200; }
// Returns the appropriate scaling of the residual echo to match the
// audibility.
void GetResidualEchoScaling(rtc::ArrayView<float> residual_scaling) const;
// Returns whether the stationary properties of the signals are used in the
// aec.
bool UseStationarityProperties() const {
return config_.echo_audibility.use_stationarity_properties;
}
// Returns the ERLE.
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
bool onset_compensated) const {
return erle_estimator_.Erle(onset_compensated);
}
// Returns the non-capped ERLE.
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
const {
return erle_estimator_.ErleUnbounded();
}
// Returns the fullband ERLE estimate in log2 units.
float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); }
// Returns the ERL.
const std::array<float, kFftLengthBy2Plus1>& Erl() const {
return erl_estimator_.Erl();
}
// Returns the time-domain ERL.
float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); }
// Returns the delay estimate based on the linear filter.
int MinDirectPathFilterDelay() const {
return delay_state_.MinDirectPathFilterDelay();
}
// Returns whether the capture signal is saturated.
bool SaturatedCapture() const { return capture_signal_saturation_; }
// Returns whether the echo signal is saturated.
bool SaturatedEcho() const { return saturation_detector_.SaturatedEcho(); }
// Updates the capture signal saturation.
void UpdateCaptureSaturation(bool capture_signal_saturation) {
capture_signal_saturation_ = capture_signal_saturation;
}
// Returns whether the transparent mode is active
bool TransparentModeActive() const {
return transparent_state_ && transparent_state_->Active();
}
// Takes appropriate action at an echo path change.
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
// Returns the decay factor for the echo reverberation. The parameter `mild`
// indicates which exponential decay to return. The default one or a milder
// one that can be used during nearend regions.
float ReverbDecay(bool mild) const {
return reverb_model_estimator_.ReverbDecay(mild);
}
// Return the frequency response of the reverberant echo.
rtc::ArrayView<const float> GetReverbFrequencyResponse() const {
return reverb_model_estimator_.GetReverbFrequencyResponse();
}
// Returns whether the transition for going out of the initial stated has
// been triggered.
bool TransitionTriggered() const {
return initial_state_.TransitionTriggered();
}
// Updates the aec state.
// TODO(bugs.webrtc.org/10913): Compute multi-channel ERL.
void Update(
const absl::optional<DelayEstimate>& external_delay,
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
adaptive_filter_frequency_responses,
rtc::ArrayView<const std::vector<float>>
adaptive_filter_impulse_responses,
const RenderBuffer& render_buffer,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
rtc::ArrayView<const SubtractorOutput> subtractor_output);
// Returns filter length in blocks.
int FilterLengthBlocks() const {
// All filters have the same length, so arbitrarily return channel 0 length.
return filter_analyzer_.FilterLengthBlocks();
}
private:
static std::atomic<int> instance_count_;
std::unique_ptr<ApmDataDumper> data_dumper_;
const EchoCanceller3Config config_;
const size_t num_capture_channels_;
const bool deactivate_initial_state_reset_at_echo_path_change_;
const bool full_reset_at_echo_path_change_;
const bool subtractor_analyzer_reset_at_echo_path_change_;
// Class for controlling the transition from the intial state, which in turn
// controls when the filter parameters for the initial state should be used.
class InitialState {
public:
explicit InitialState(const EchoCanceller3Config& config);
// Resets the state to again begin in the initial state.
void Reset();
// Updates the state based on new data.
void Update(bool active_render, bool saturated_capture);
// Returns whether the initial state is active or not.
bool InitialStateActive() const { return initial_state_; }
// Returns that the transition from the initial state has was started.
bool TransitionTriggered() const { return transition_triggered_; }
private:
const bool conservative_initial_phase_;
const float initial_state_seconds_;
bool transition_triggered_ = false;
bool initial_state_ = true;
size_t strong_not_saturated_render_blocks_ = 0;
} initial_state_;
// Class for choosing the direct-path delay relative to the beginning of the
// filter, as well as any other data related to the delay used within
// AecState.
class FilterDelay {
public:
FilterDelay(const EchoCanceller3Config& config,
size_t num_capture_channels);
// Returns whether an external delay has been reported to the AecState (from
// the delay estimator).
bool ExternalDelayReported() const { return external_delay_reported_; }
// Returns the delay in blocks relative to the beginning of the filter that
// corresponds to the direct path of the echo.
rtc::ArrayView<const int> DirectPathFilterDelays() const {
return filter_delays_blocks_;
}
// Returns the minimum delay among the direct path delays relative to the
// beginning of the filter
int MinDirectPathFilterDelay() const { return min_filter_delay_; }
// Updates the delay estimates based on new data.
void Update(
rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
const absl::optional<DelayEstimate>& external_delay,
size_t blocks_with_proper_filter_adaptation);
private:
const int delay_headroom_blocks_;
bool external_delay_reported_ = false;
std::vector<int> filter_delays_blocks_;
int min_filter_delay_;
absl::optional<DelayEstimate> external_delay_;
} delay_state_;
// Classifier for toggling transparent mode when there is no echo.
std::unique_ptr<TransparentMode> transparent_state_;
// Class for analyzing how well the linear filter is, and can be expected to,
// perform on the current signals. The purpose of this is for using to
// select the echo suppression functionality as well as the input to the echo
// suppressor.
class FilteringQualityAnalyzer {
public:
FilteringQualityAnalyzer(const EchoCanceller3Config& config,
size_t num_capture_channels);
// Returns whether the linear filter can be used for the echo
// canceller output.
bool LinearFilterUsable() const { return overall_usable_linear_estimates_; }
// Returns whether an individual filter output can be used for the echo
// canceller output.
const std::vector<bool>& UsableLinearFilterOutputs() const {
return usable_linear_filter_estimates_;
}
// Resets the state of the analyzer.
void Reset();
// Updates the analysis based on new data.
void Update(bool active_render,
bool transparent_mode,
bool saturated_capture,
const absl::optional<DelayEstimate>& external_delay,
bool any_filter_converged);
private:
const bool use_linear_filter_;
bool overall_usable_linear_estimates_ = false;
size_t filter_update_blocks_since_reset_ = 0;
size_t filter_update_blocks_since_start_ = 0;
bool convergence_seen_ = false;
std::vector<bool> usable_linear_filter_estimates_;
} filter_quality_state_;
// Class for detecting whether the echo is to be considered to be
// saturated.
class SaturationDetector {
public:
// Returns whether the echo is to be considered saturated.
bool SaturatedEcho() const { return saturated_echo_; }
// Updates the detection decision based on new data.
void Update(const Block& x,
bool saturated_capture,
bool usable_linear_estimate,
rtc::ArrayView<const SubtractorOutput> subtractor_output,
float echo_path_gain);
private:
bool saturated_echo_ = false;
} saturation_detector_;
ErlEstimator erl_estimator_;
ErleEstimator erle_estimator_;
size_t strong_not_saturated_render_blocks_ = 0;
size_t blocks_with_active_render_ = 0;
bool capture_signal_saturation_ = false;
FilterAnalyzer filter_analyzer_;
EchoAudibility echo_audibility_;
ReverbModelEstimator reverb_model_estimator_;
ReverbModel avg_render_reverb_;
SubtractorOutputAnalyzer subtractor_output_analyzer_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_

View File

@ -0,0 +1,163 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/alignment_mixer.h"
#include <algorithm>
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
AlignmentMixer::MixingVariant ChooseMixingVariant(bool downmix,
bool adaptive_selection,
int num_channels) {
RTC_DCHECK(!(adaptive_selection && downmix));
RTC_DCHECK_LT(0, num_channels);
if (num_channels == 1) {
return AlignmentMixer::MixingVariant::kFixed;
}
if (downmix) {
return AlignmentMixer::MixingVariant::kDownmix;
}
if (adaptive_selection) {
return AlignmentMixer::MixingVariant::kAdaptive;
}
return AlignmentMixer::MixingVariant::kFixed;
}
} // namespace
AlignmentMixer::AlignmentMixer(
size_t num_channels,
const EchoCanceller3Config::Delay::AlignmentMixing& config)
: AlignmentMixer(num_channels,
config.downmix,
config.adaptive_selection,
config.activity_power_threshold,
config.prefer_first_two_channels) {}
AlignmentMixer::AlignmentMixer(size_t num_channels,
bool downmix,
bool adaptive_selection,
float activity_power_threshold,
bool prefer_first_two_channels)
: num_channels_(num_channels),
one_by_num_channels_(1.f / num_channels_),
excitation_energy_threshold_(kBlockSize * activity_power_threshold),
prefer_first_two_channels_(prefer_first_two_channels),
selection_variant_(
ChooseMixingVariant(downmix, adaptive_selection, num_channels_)) {
if (selection_variant_ == MixingVariant::kAdaptive) {
std::fill(strong_block_counters_.begin(), strong_block_counters_.end(), 0);
cumulative_energies_.resize(num_channels_);
std::fill(cumulative_energies_.begin(), cumulative_energies_.end(), 0.f);
}
}
void AlignmentMixer::ProduceOutput(const Block& x,
rtc::ArrayView<float, kBlockSize> y) {
RTC_DCHECK_EQ(x.NumChannels(), num_channels_);
if (selection_variant_ == MixingVariant::kDownmix) {
Downmix(x, y);
return;
}
int ch = selection_variant_ == MixingVariant::kFixed ? 0 : SelectChannel(x);
RTC_DCHECK_GT(x.NumChannels(), ch);
std::copy(x.begin(/*band=*/0, ch), x.end(/*band=*/0, ch), y.begin());
}
void AlignmentMixer::Downmix(const Block& x,
rtc::ArrayView<float, kBlockSize> y) const {
RTC_DCHECK_EQ(x.NumChannels(), num_channels_);
RTC_DCHECK_GE(num_channels_, 2);
std::memcpy(&y[0], x.View(/*band=*/0, /*channel=*/0).data(),
kBlockSize * sizeof(y[0]));
for (size_t ch = 1; ch < num_channels_; ++ch) {
const auto x_ch = x.View(/*band=*/0, ch);
for (size_t i = 0; i < kBlockSize; ++i) {
y[i] += x_ch[i];
}
}
for (size_t i = 0; i < kBlockSize; ++i) {
y[i] *= one_by_num_channels_;
}
}
int AlignmentMixer::SelectChannel(const Block& x) {
RTC_DCHECK_EQ(x.NumChannels(), num_channels_);
RTC_DCHECK_GE(num_channels_, 2);
RTC_DCHECK_EQ(cumulative_energies_.size(), num_channels_);
constexpr size_t kBlocksToChooseLeftOrRight =
static_cast<size_t>(0.5f * kNumBlocksPerSecond);
const bool good_signal_in_left_or_right =
prefer_first_two_channels_ &&
(strong_block_counters_[0] > kBlocksToChooseLeftOrRight ||
strong_block_counters_[1] > kBlocksToChooseLeftOrRight);
const int num_ch_to_analyze =
good_signal_in_left_or_right ? 2 : num_channels_;
constexpr int kNumBlocksBeforeEnergySmoothing = 60 * kNumBlocksPerSecond;
++block_counter_;
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
float x2_sum = 0.f;
rtc::ArrayView<const float, kBlockSize> x_ch = x.View(/*band=*/0, ch);
for (size_t i = 0; i < kBlockSize; ++i) {
x2_sum += x_ch[i] * x_ch[i];
}
if (ch < 2 && x2_sum > excitation_energy_threshold_) {
++strong_block_counters_[ch];
}
if (block_counter_ <= kNumBlocksBeforeEnergySmoothing) {
cumulative_energies_[ch] += x2_sum;
} else {
constexpr float kSmoothing = 1.f / (10 * kNumBlocksPerSecond);
cumulative_energies_[ch] +=
kSmoothing * (x2_sum - cumulative_energies_[ch]);
}
}
// Normalize the energies to allow the energy computations to from now be
// based on smoothing.
if (block_counter_ == kNumBlocksBeforeEnergySmoothing) {
constexpr float kOneByNumBlocksBeforeEnergySmoothing =
1.f / kNumBlocksBeforeEnergySmoothing;
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
cumulative_energies_[ch] *= kOneByNumBlocksBeforeEnergySmoothing;
}
}
int strongest_ch = 0;
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
if (cumulative_energies_[ch] > cumulative_energies_[strongest_ch]) {
strongest_ch = ch;
}
}
if ((good_signal_in_left_or_right && selected_channel_ > 1) ||
cumulative_energies_[strongest_ch] >
2.f * cumulative_energies_[selected_channel_]) {
selected_channel_ = strongest_ch;
}
return selected_channel_;
}
} // namespace webrtc

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
#include <vector>
#include "api/array_view.h"
#include "api/audio/echo_canceller3_config.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/block.h"
namespace webrtc {
// Performs channel conversion to mono for the purpose of providing a decent
// mono input for the delay estimation. This is achieved by analyzing all
// incoming channels and produce one single channel output.
class AlignmentMixer {
public:
AlignmentMixer(size_t num_channels,
const EchoCanceller3Config::Delay::AlignmentMixing& config);
AlignmentMixer(size_t num_channels,
bool downmix,
bool adaptive_selection,
float excitation_limit,
bool prefer_first_two_channels);
void ProduceOutput(const Block& x, rtc::ArrayView<float, kBlockSize> y);
enum class MixingVariant { kDownmix, kAdaptive, kFixed };
private:
const size_t num_channels_;
const float one_by_num_channels_;
const float excitation_energy_threshold_;
const bool prefer_first_two_channels_;
const MixingVariant selection_variant_;
std::array<size_t, 2> strong_block_counters_;
std::vector<float> cumulative_energies_;
int selected_channel_ = 0;
size_t block_counter_ = 0;
void Downmix(const Block& x, rtc::ArrayView<float, kBlockSize> y) const;
int SelectChannel(const Block& x);
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_

View File

@ -0,0 +1,121 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/api_call_jitter_metrics.h"
#include <algorithm>
#include <limits>
#include "modules/audio_processing/aec3/aec3_common.h"
#include "system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
bool TimeToReportMetrics(int frames_since_last_report) {
constexpr int kNumFramesPerSecond = 100;
constexpr int kReportingIntervalFrames = 10 * kNumFramesPerSecond;
return frames_since_last_report == kReportingIntervalFrames;
}
} // namespace
ApiCallJitterMetrics::Jitter::Jitter()
: max_(0), min_(std::numeric_limits<int>::max()) {}
void ApiCallJitterMetrics::Jitter::Update(int num_api_calls_in_a_row) {
min_ = std::min(min_, num_api_calls_in_a_row);
max_ = std::max(max_, num_api_calls_in_a_row);
}
void ApiCallJitterMetrics::Jitter::Reset() {
min_ = std::numeric_limits<int>::max();
max_ = 0;
}
void ApiCallJitterMetrics::Reset() {
render_jitter_.Reset();
capture_jitter_.Reset();
num_api_calls_in_a_row_ = 0;
frames_since_last_report_ = 0;
last_call_was_render_ = false;
proper_call_observed_ = false;
}
void ApiCallJitterMetrics::ReportRenderCall() {
if (!last_call_was_render_) {
// If the previous call was a capture and a proper call has been observed
// (containing both render and capture data), storing the last number of
// capture calls into the metrics.
if (proper_call_observed_) {
capture_jitter_.Update(num_api_calls_in_a_row_);
}
// Reset the call counter to start counting render calls.
num_api_calls_in_a_row_ = 0;
}
++num_api_calls_in_a_row_;
last_call_was_render_ = true;
}
void ApiCallJitterMetrics::ReportCaptureCall() {
if (last_call_was_render_) {
// If the previous call was a render and a proper call has been observed
// (containing both render and capture data), storing the last number of
// render calls into the metrics.
if (proper_call_observed_) {
render_jitter_.Update(num_api_calls_in_a_row_);
}
// Reset the call counter to start counting capture calls.
num_api_calls_in_a_row_ = 0;
// If this statement is reached, at least one render and one capture call
// have been observed.
proper_call_observed_ = true;
}
++num_api_calls_in_a_row_;
last_call_was_render_ = false;
// Only report and update jitter metrics for when a proper call, containing
// both render and capture data, has been observed.
if (proper_call_observed_ &&
TimeToReportMetrics(++frames_since_last_report_)) {
// Report jitter, where the base basic unit is frames.
constexpr int kMaxJitterToReport = 50;
// Report max and min jitter for render and capture, in units of 20 ms.
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.MaxRenderJitter",
std::min(kMaxJitterToReport, render_jitter().max()), 1,
kMaxJitterToReport, kMaxJitterToReport);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.MinRenderJitter",
std::min(kMaxJitterToReport, render_jitter().min()), 1,
kMaxJitterToReport, kMaxJitterToReport);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.MaxCaptureJitter",
std::min(kMaxJitterToReport, capture_jitter().max()), 1,
kMaxJitterToReport, kMaxJitterToReport);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.MinCaptureJitter",
std::min(kMaxJitterToReport, capture_jitter().min()), 1,
kMaxJitterToReport, kMaxJitterToReport);
frames_since_last_report_ = 0;
Reset();
}
}
bool ApiCallJitterMetrics::WillReportMetricsAtNextCapture() const {
return TimeToReportMetrics(frames_since_last_report_ + 1);
}
} // namespace webrtc

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
#define MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
namespace webrtc {
// Stores data for reporting metrics on the API call jitter.
class ApiCallJitterMetrics {
public:
class Jitter {
public:
Jitter();
void Update(int num_api_calls_in_a_row);
void Reset();
int min() const { return min_; }
int max() const { return max_; }
private:
int max_;
int min_;
};
ApiCallJitterMetrics() { Reset(); }
// Update metrics for render API call.
void ReportRenderCall();
// Update and periodically report metrics for capture API call.
void ReportCaptureCall();
// Methods used only for testing.
const Jitter& render_jitter() const { return render_jitter_; }
const Jitter& capture_jitter() const { return capture_jitter_; }
bool WillReportMetricsAtNextCapture() const;
private:
void Reset();
Jitter render_jitter_;
Jitter capture_jitter_;
int num_api_calls_in_a_row_ = 0;
int frames_since_last_report_ = 0;
bool last_call_was_render_ = false;
bool proper_call_observed_ = false;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_
#include <array>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
namespace webrtc {
// Contains one or more channels of 4 milliseconds of audio data.
// The audio is split in one or more frequency bands, each with a sampling
// rate of 16 kHz.
class Block {
public:
Block(int num_bands, int num_channels, float default_value = 0.0f)
: num_bands_(num_bands),
num_channels_(num_channels),
data_(num_bands * num_channels * kBlockSize, default_value) {}
// Returns the number of bands.
int NumBands() const { return num_bands_; }
// Returns the number of channels.
int NumChannels() const { return num_channels_; }
// Modifies the number of channels and sets all samples to zero.
void SetNumChannels(int num_channels) {
num_channels_ = num_channels;
data_.resize(num_bands_ * num_channels_ * kBlockSize);
std::fill(data_.begin(), data_.end(), 0.0f);
}
// Iterators for accessing the data.
auto begin(int band, int channel) {
return data_.begin() + GetIndex(band, channel);
}
auto begin(int band, int channel) const {
return data_.begin() + GetIndex(band, channel);
}
auto end(int band, int channel) { return begin(band, channel) + kBlockSize; }
auto end(int band, int channel) const {
return begin(band, channel) + kBlockSize;
}
// Access data via ArrayView.
rtc::ArrayView<float, kBlockSize> View(int band, int channel) {
return rtc::ArrayView<float, kBlockSize>(&data_[GetIndex(band, channel)],
kBlockSize);
}
rtc::ArrayView<const float, kBlockSize> View(int band, int channel) const {
return rtc::ArrayView<const float, kBlockSize>(
&data_[GetIndex(band, channel)], kBlockSize);
}
// Lets two Blocks swap audio data.
void Swap(Block& b) {
std::swap(num_bands_, b.num_bands_);
std::swap(num_channels_, b.num_channels_);
data_.swap(b.data_);
}
private:
// Returns the index of the first sample of the requested |band| and
// |channel|.
int GetIndex(int band, int channel) const {
return (band * num_channels_ + channel) * kBlockSize;
}
int num_bands_;
int num_channels_;
std::vector<float> data_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_

View File

@ -0,0 +1,23 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/block_buffer.h"
#include <algorithm>
namespace webrtc {
BlockBuffer::BlockBuffer(size_t size, size_t num_bands, size_t num_channels)
: size(static_cast<int>(size)),
buffer(size, Block(num_bands, num_channels)) {}
BlockBuffer::~BlockBuffer() = default;
} // namespace webrtc

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
#include <stddef.h>
#include <vector>
#include "modules/audio_processing/aec3/block.h"
#include "rtc_base/checks.h"
namespace webrtc {
// Struct for bundling a circular buffer of two dimensional vector objects
// together with the read and write indices.
struct BlockBuffer {
BlockBuffer(size_t size, size_t num_bands, size_t num_channels);
~BlockBuffer();
int IncIndex(int index) const {
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
return index < size - 1 ? index + 1 : 0;
}
int DecIndex(int index) const {
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
return index > 0 ? index - 1 : size - 1;
}
int OffsetIndex(int index, int offset) const {
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
RTC_DCHECK_GE(size, offset);
return (size + index + offset) % size;
}
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
void IncWriteIndex() { write = IncIndex(write); }
void DecWriteIndex() { write = DecIndex(write); }
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
void IncReadIndex() { read = IncIndex(read); }
void DecReadIndex() { read = DecIndex(read); }
const int size;
std::vector<Block> buffer;
int write = 0;
int read = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/block_delay_buffer.h"
#include "api/array_view.h"
#include "rtc_base/checks.h"
namespace webrtc {
BlockDelayBuffer::BlockDelayBuffer(size_t num_channels,
size_t num_bands,
size_t frame_length,
size_t delay_samples)
: frame_length_(frame_length),
delay_(delay_samples),
buf_(num_channels,
std::vector<std::vector<float>>(num_bands,
std::vector<float>(delay_, 0.f))) {}
BlockDelayBuffer::~BlockDelayBuffer() = default;
void BlockDelayBuffer::DelaySignal(AudioBuffer* frame) {
RTC_DCHECK_EQ(buf_.size(), frame->num_channels());
if (delay_ == 0) {
return;
}
const size_t num_bands = buf_[0].size();
const size_t num_channels = buf_.size();
const size_t i_start = last_insert_;
size_t i = 0;
for (size_t ch = 0; ch < num_channels; ++ch) {
RTC_DCHECK_EQ(buf_[ch].size(), frame->num_bands());
RTC_DCHECK_EQ(buf_[ch].size(), num_bands);
rtc::ArrayView<float* const> frame_ch(frame->split_bands(ch), num_bands);
const size_t delay = delay_;
for (size_t band = 0; band < num_bands; ++band) {
RTC_DCHECK_EQ(delay_, buf_[ch][band].size());
i = i_start;
// Offloading these pointers and class variables to local variables allows
// the compiler to optimize the below loop when compiling with
// '-fno-strict-aliasing'.
float* buf_ch_band = buf_[ch][band].data();
float* frame_ch_band = frame_ch[band];
for (size_t k = 0, frame_length = frame_length_; k < frame_length; ++k) {
const float tmp = buf_ch_band[i];
buf_ch_band[i] = frame_ch_band[k];
frame_ch_band[k] = tmp;
i = i < delay - 1 ? i + 1 : 0;
}
}
}
last_insert_ = i;
}
} // namespace webrtc

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
#include <stddef.h>
#include <vector>
#include "modules/audio_processing/audio_buffer.h"
namespace webrtc {
// Class for applying a fixed delay to the samples in a signal partitioned using
// the audiobuffer band-splitting scheme.
class BlockDelayBuffer {
public:
BlockDelayBuffer(size_t num_channels,
size_t num_bands,
size_t frame_length,
size_t delay_samples);
~BlockDelayBuffer();
// Delays the samples by the specified delay.
void DelaySignal(AudioBuffer* frame);
private:
const size_t frame_length_;
const size_t delay_;
std::vector<std::vector<std::vector<float>>> buf_;
size_t last_insert_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/block_framer.h"
#include <algorithm>
#include "modules/audio_processing/aec3/aec3_common.h"
#include "rtc_base/checks.h"
namespace webrtc {
BlockFramer::BlockFramer(size_t num_bands, size_t num_channels)
: num_bands_(num_bands),
num_channels_(num_channels),
buffer_(num_bands_,
std::vector<std::vector<float>>(
num_channels,
std::vector<float>(kBlockSize, 0.f))) {
RTC_DCHECK_LT(0, num_bands);
RTC_DCHECK_LT(0, num_channels);
}
BlockFramer::~BlockFramer() = default;
// All the constants are chosen so that the buffer is either empty or has enough
// samples for InsertBlockAndExtractSubFrame to produce a frame. In order to
// achieve this, the InsertBlockAndExtractSubFrame and InsertBlock methods need
// to be called in the correct order.
void BlockFramer::InsertBlock(const Block& block) {
RTC_DCHECK_EQ(num_bands_, block.NumBands());
RTC_DCHECK_EQ(num_channels_, block.NumChannels());
for (size_t band = 0; band < num_bands_; ++band) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
RTC_DCHECK_EQ(0, buffer_[band][channel].size());
buffer_[band][channel].insert(buffer_[band][channel].begin(),
block.begin(band, channel),
block.end(band, channel));
}
}
}
void BlockFramer::InsertBlockAndExtractSubFrame(
const Block& block,
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame) {
RTC_DCHECK(sub_frame);
RTC_DCHECK_EQ(num_bands_, block.NumBands());
RTC_DCHECK_EQ(num_channels_, block.NumChannels());
RTC_DCHECK_EQ(num_bands_, sub_frame->size());
for (size_t band = 0; band < num_bands_; ++band) {
RTC_DCHECK_EQ(num_channels_, (*sub_frame)[0].size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
RTC_DCHECK_LE(kSubFrameLength,
buffer_[band][channel].size() + kBlockSize);
RTC_DCHECK_GE(kBlockSize, buffer_[band][channel].size());
RTC_DCHECK_EQ(kSubFrameLength, (*sub_frame)[band][channel].size());
const int samples_to_frame =
kSubFrameLength - buffer_[band][channel].size();
std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(),
(*sub_frame)[band][channel].begin());
std::copy(
block.begin(band, channel),
block.begin(band, channel) + samples_to_frame,
(*sub_frame)[band][channel].begin() + buffer_[band][channel].size());
buffer_[band][channel].clear();
buffer_[band][channel].insert(
buffer_[band][channel].begin(),
block.begin(band, channel) + samples_to_frame,
block.end(band, channel));
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/block.h"
namespace webrtc {
// Class for producing frames consisting of 2 subframes of 80 samples each
// from 64 sample blocks. The class is designed to work together with the
// FrameBlocker class which performs the reverse conversion. Used together with
// that, this class produces output frames are the same rate as frames are
// received by the FrameBlocker class. Note that the internal buffers will
// overrun if any other rate of packets insertion is used.
class BlockFramer {
public:
BlockFramer(size_t num_bands, size_t num_channels);
~BlockFramer();
BlockFramer(const BlockFramer&) = delete;
BlockFramer& operator=(const BlockFramer&) = delete;
// Adds a 64 sample block into the data that will form the next output frame.
void InsertBlock(const Block& block);
// Adds a 64 sample block and extracts an 80 sample subframe.
void InsertBlockAndExtractSubFrame(
const Block& block,
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame);
private:
const size_t num_bands_;
const size_t num_channels_;
std::vector<std::vector<std::vector<float>>> buffer_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_

View File

@ -0,0 +1,290 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/block_processor.h"
#include <stddef.h>
#include <atomic>
#include <memory>
#include <utility>
#include <vector>
#include "absl/types/optional.h"
#include "api/audio/echo_canceller3_config.h"
#include "api/audio/echo_control.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/block_processor_metrics.h"
#include "modules/audio_processing/aec3/delay_estimate.h"
#include "modules/audio_processing/aec3/echo_path_variability.h"
#include "modules/audio_processing/aec3/echo_remover.h"
#include "modules/audio_processing/aec3/render_delay_buffer.h"
#include "modules/audio_processing/aec3/render_delay_controller.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
namespace {
enum class BlockProcessorApiCall { kCapture, kRender };
class BlockProcessorImpl final : public BlockProcessor {
public:
BlockProcessorImpl(const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels,
std::unique_ptr<RenderDelayBuffer> render_buffer,
std::unique_ptr<RenderDelayController> delay_controller,
std::unique_ptr<EchoRemover> echo_remover);
BlockProcessorImpl() = delete;
~BlockProcessorImpl() override;
void ProcessCapture(bool echo_path_gain_change,
bool capture_signal_saturation,
Block* linear_output,
Block* capture_block) override;
void BufferRender(const Block& block) override;
void UpdateEchoLeakageStatus(bool leakage_detected) override;
void GetMetrics(EchoControl::Metrics* metrics) const override;
void SetAudioBufferDelay(int delay_ms) override;
void SetCaptureOutputUsage(bool capture_output_used) override;
private:
static std::atomic<int> instance_count_;
std::unique_ptr<ApmDataDumper> data_dumper_;
const EchoCanceller3Config config_;
bool capture_properly_started_ = false;
bool render_properly_started_ = false;
const size_t sample_rate_hz_;
std::unique_ptr<RenderDelayBuffer> render_buffer_;
std::unique_ptr<RenderDelayController> delay_controller_;
std::unique_ptr<EchoRemover> echo_remover_;
BlockProcessorMetrics metrics_;
RenderDelayBuffer::BufferingEvent render_event_;
size_t capture_call_counter_ = 0;
absl::optional<DelayEstimate> estimated_delay_;
};
std::atomic<int> BlockProcessorImpl::instance_count_(0);
BlockProcessorImpl::BlockProcessorImpl(
const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels,
std::unique_ptr<RenderDelayBuffer> render_buffer,
std::unique_ptr<RenderDelayController> delay_controller,
std::unique_ptr<EchoRemover> echo_remover)
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
config_(config),
sample_rate_hz_(sample_rate_hz),
render_buffer_(std::move(render_buffer)),
delay_controller_(std::move(delay_controller)),
echo_remover_(std::move(echo_remover)),
render_event_(RenderDelayBuffer::BufferingEvent::kNone) {
RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
}
BlockProcessorImpl::~BlockProcessorImpl() = default;
void BlockProcessorImpl::ProcessCapture(bool echo_path_gain_change,
bool capture_signal_saturation,
Block* linear_output,
Block* capture_block) {
RTC_DCHECK(capture_block);
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->NumBands());
capture_call_counter_++;
data_dumper_->DumpRaw("aec3_processblock_call_order",
static_cast<int>(BlockProcessorApiCall::kCapture));
data_dumper_->DumpWav("aec3_processblock_capture_input",
capture_block->View(/*band=*/0, /*channel=*/0), 16000,
1);
if (render_properly_started_) {
if (!capture_properly_started_) {
capture_properly_started_ = true;
render_buffer_->Reset();
if (delay_controller_)
delay_controller_->Reset(true);
}
} else {
// If no render data has yet arrived, do not process the capture signal.
render_buffer_->HandleSkippedCaptureProcessing();
return;
}
EchoPathVariability echo_path_variability(
echo_path_gain_change, EchoPathVariability::DelayAdjustment::kNone,
false);
if (render_event_ == RenderDelayBuffer::BufferingEvent::kRenderOverrun &&
render_properly_started_) {
echo_path_variability.delay_change =
EchoPathVariability::DelayAdjustment::kBufferFlush;
if (delay_controller_)
delay_controller_->Reset(true);
RTC_LOG(LS_WARNING) << "Reset due to render buffer overrun at block "
<< capture_call_counter_;
}
render_event_ = RenderDelayBuffer::BufferingEvent::kNone;
// Update the render buffers with any newly arrived render blocks and prepare
// the render buffers for reading the render data corresponding to the current
// capture block.
RenderDelayBuffer::BufferingEvent buffer_event =
render_buffer_->PrepareCaptureProcessing();
// Reset the delay controller at render buffer underrun.
if (buffer_event == RenderDelayBuffer::BufferingEvent::kRenderUnderrun) {
if (delay_controller_)
delay_controller_->Reset(false);
}
data_dumper_->DumpWav("aec3_processblock_capture_input2",
capture_block->View(/*band=*/0, /*channel=*/0), 16000,
1);
bool has_delay_estimator = !config_.delay.use_external_delay_estimator;
if (has_delay_estimator) {
RTC_DCHECK(delay_controller_);
// Compute and apply the render delay required to achieve proper signal
// alignment.
estimated_delay_ = delay_controller_->GetDelay(
render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(),
*capture_block);
if (estimated_delay_) {
bool delay_change =
render_buffer_->AlignFromDelay(estimated_delay_->delay);
if (delay_change) {
rtc::LoggingSeverity log_level =
config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING
: rtc::LS_INFO;
RTC_LOG_V(log_level) << "Delay changed to " << estimated_delay_->delay
<< " at block " << capture_call_counter_;
echo_path_variability.delay_change =
EchoPathVariability::DelayAdjustment::kNewDetectedDelay;
}
}
echo_path_variability.clock_drift = delay_controller_->HasClockdrift();
} else {
render_buffer_->AlignFromExternalDelay();
}
// Remove the echo from the capture signal.
if (has_delay_estimator || render_buffer_->HasReceivedBufferDelay()) {
echo_remover_->ProcessCapture(
echo_path_variability, capture_signal_saturation, estimated_delay_,
render_buffer_->GetRenderBuffer(), linear_output, capture_block);
}
// Update the metrics.
metrics_.UpdateCapture(false);
}
void BlockProcessorImpl::BufferRender(const Block& block) {
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.NumBands());
data_dumper_->DumpRaw("aec3_processblock_call_order",
static_cast<int>(BlockProcessorApiCall::kRender));
data_dumper_->DumpWav("aec3_processblock_render_input",
block.View(/*band=*/0, /*channel=*/0), 16000, 1);
render_event_ = render_buffer_->Insert(block);
metrics_.UpdateRender(render_event_ !=
RenderDelayBuffer::BufferingEvent::kNone);
render_properly_started_ = true;
if (delay_controller_)
delay_controller_->LogRenderCall();
}
void BlockProcessorImpl::UpdateEchoLeakageStatus(bool leakage_detected) {
echo_remover_->UpdateEchoLeakageStatus(leakage_detected);
}
void BlockProcessorImpl::GetMetrics(EchoControl::Metrics* metrics) const {
echo_remover_->GetMetrics(metrics);
constexpr int block_size_ms = 4;
absl::optional<size_t> delay = render_buffer_->Delay();
metrics->delay_ms = delay ? static_cast<int>(*delay) * block_size_ms : 0;
}
void BlockProcessorImpl::SetAudioBufferDelay(int delay_ms) {
render_buffer_->SetAudioBufferDelay(delay_ms);
}
void BlockProcessorImpl::SetCaptureOutputUsage(bool capture_output_used) {
echo_remover_->SetCaptureOutputUsage(capture_output_used);
}
} // namespace
BlockProcessor* BlockProcessor::Create(const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels) {
std::unique_ptr<RenderDelayBuffer> render_buffer(
RenderDelayBuffer::Create(config, sample_rate_hz, num_render_channels));
std::unique_ptr<RenderDelayController> delay_controller;
if (!config.delay.use_external_delay_estimator) {
delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz,
num_capture_channels));
}
std::unique_ptr<EchoRemover> echo_remover(EchoRemover::Create(
config, sample_rate_hz, num_render_channels, num_capture_channels));
return Create(config, sample_rate_hz, num_render_channels,
num_capture_channels, std::move(render_buffer),
std::move(delay_controller), std::move(echo_remover));
}
BlockProcessor* BlockProcessor::Create(
const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels,
std::unique_ptr<RenderDelayBuffer> render_buffer) {
std::unique_ptr<RenderDelayController> delay_controller;
if (!config.delay.use_external_delay_estimator) {
delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz,
num_capture_channels));
}
std::unique_ptr<EchoRemover> echo_remover(EchoRemover::Create(
config, sample_rate_hz, num_render_channels, num_capture_channels));
return Create(config, sample_rate_hz, num_render_channels,
num_capture_channels, std::move(render_buffer),
std::move(delay_controller), std::move(echo_remover));
}
BlockProcessor* BlockProcessor::Create(
const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels,
std::unique_ptr<RenderDelayBuffer> render_buffer,
std::unique_ptr<RenderDelayController> delay_controller,
std::unique_ptr<EchoRemover> echo_remover) {
return new BlockProcessorImpl(config, sample_rate_hz, num_render_channels,
num_capture_channels, std::move(render_buffer),
std::move(delay_controller),
std::move(echo_remover));
}
} // namespace webrtc

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
#include <stddef.h>
#include <memory>
#include <vector>
#include "api/audio/echo_canceller3_config.h"
#include "api/audio/echo_control.h"
#include "modules/audio_processing/aec3/block.h"
#include "modules/audio_processing/aec3/echo_remover.h"
#include "modules/audio_processing/aec3/render_delay_buffer.h"
#include "modules/audio_processing/aec3/render_delay_controller.h"
namespace webrtc {
// Class for performing echo cancellation on 64 sample blocks of audio data.
class BlockProcessor {
public:
static BlockProcessor* Create(const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels);
// Only used for testing purposes.
static BlockProcessor* Create(
const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels,
std::unique_ptr<RenderDelayBuffer> render_buffer);
static BlockProcessor* Create(
const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels,
std::unique_ptr<RenderDelayBuffer> render_buffer,
std::unique_ptr<RenderDelayController> delay_controller,
std::unique_ptr<EchoRemover> echo_remover);
virtual ~BlockProcessor() = default;
// Get current metrics.
virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0;
// Provides an optional external estimate of the audio buffer delay.
virtual void SetAudioBufferDelay(int delay_ms) = 0;
// Processes a block of capture data.
virtual void ProcessCapture(bool echo_path_gain_change,
bool capture_signal_saturation,
Block* linear_output,
Block* capture_block) = 0;
// Buffers a block of render data supplied by a FrameBlocker object.
virtual void BufferRender(const Block& render_block) = 0;
// Reports whether echo leakage has been detected in the echo canceller
// output.
virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
// Specifies whether the capture output will be used. The purpose of this is
// to allow the block processor to deactivate some of the processing when the
// resulting output is anyway not used, for instance when the endpoint is
// muted.
virtual void SetCaptureOutputUsage(bool capture_output_used) = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_

View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/block_processor_metrics.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "rtc_base/checks.h"
#include "system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
enum class RenderUnderrunCategory {
kNone,
kFew,
kSeveral,
kMany,
kConstant,
kNumCategories
};
enum class RenderOverrunCategory {
kNone,
kFew,
kSeveral,
kMany,
kConstant,
kNumCategories
};
} // namespace
void BlockProcessorMetrics::UpdateCapture(bool underrun) {
++capture_block_counter_;
if (underrun) {
++render_buffer_underruns_;
}
if (capture_block_counter_ == kMetricsReportingIntervalBlocks) {
metrics_reported_ = true;
RenderUnderrunCategory underrun_category;
if (render_buffer_underruns_ == 0) {
underrun_category = RenderUnderrunCategory::kNone;
} else if (render_buffer_underruns_ > (capture_block_counter_ >> 1)) {
underrun_category = RenderUnderrunCategory::kConstant;
} else if (render_buffer_underruns_ > 100) {
underrun_category = RenderUnderrunCategory::kMany;
} else if (render_buffer_underruns_ > 10) {
underrun_category = RenderUnderrunCategory::kSeveral;
} else {
underrun_category = RenderUnderrunCategory::kFew;
}
RTC_HISTOGRAM_ENUMERATION(
"WebRTC.Audio.EchoCanceller.RenderUnderruns",
static_cast<int>(underrun_category),
static_cast<int>(RenderUnderrunCategory::kNumCategories));
RenderOverrunCategory overrun_category;
if (render_buffer_overruns_ == 0) {
overrun_category = RenderOverrunCategory::kNone;
} else if (render_buffer_overruns_ > (buffer_render_calls_ >> 1)) {
overrun_category = RenderOverrunCategory::kConstant;
} else if (render_buffer_overruns_ > 100) {
overrun_category = RenderOverrunCategory::kMany;
} else if (render_buffer_overruns_ > 10) {
overrun_category = RenderOverrunCategory::kSeveral;
} else {
overrun_category = RenderOverrunCategory::kFew;
}
RTC_HISTOGRAM_ENUMERATION(
"WebRTC.Audio.EchoCanceller.RenderOverruns",
static_cast<int>(overrun_category),
static_cast<int>(RenderOverrunCategory::kNumCategories));
ResetMetrics();
capture_block_counter_ = 0;
} else {
metrics_reported_ = false;
}
}
void BlockProcessorMetrics::UpdateRender(bool overrun) {
++buffer_render_calls_;
if (overrun) {
++render_buffer_overruns_;
}
}
void BlockProcessorMetrics::ResetMetrics() {
render_buffer_underruns_ = 0;
render_buffer_overruns_ = 0;
buffer_render_calls_ = 0;
}
} // namespace webrtc

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
namespace webrtc {
// Handles the reporting of metrics for the block_processor.
class BlockProcessorMetrics {
public:
BlockProcessorMetrics() = default;
BlockProcessorMetrics(const BlockProcessorMetrics&) = delete;
BlockProcessorMetrics& operator=(const BlockProcessorMetrics&) = delete;
// Updates the metric with new capture data.
void UpdateCapture(bool underrun);
// Updates the metric with new render data.
void UpdateRender(bool overrun);
// Returns true if the metrics have just been reported, otherwise false.
bool MetricsReported() { return metrics_reported_; }
private:
// Resets the metrics.
void ResetMetrics();
int capture_block_counter_ = 0;
bool metrics_reported_ = false;
int render_buffer_underruns_ = 0;
int render_buffer_overruns_ = 0;
int buffer_render_calls_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/clockdrift_detector.h"
namespace webrtc {
ClockdriftDetector::ClockdriftDetector()
: level_(Level::kNone), stability_counter_(0) {
delay_history_.fill(0);
}
ClockdriftDetector::~ClockdriftDetector() = default;
void ClockdriftDetector::Update(int delay_estimate) {
if (delay_estimate == delay_history_[0]) {
// Reset clockdrift level if delay estimate is stable for 7500 blocks (30
// seconds).
if (++stability_counter_ > 7500)
level_ = Level::kNone;
return;
}
stability_counter_ = 0;
const int d1 = delay_history_[0] - delay_estimate;
const int d2 = delay_history_[1] - delay_estimate;
const int d3 = delay_history_[2] - delay_estimate;
// Patterns recognized as positive clockdrift:
// [x-3], x-2, x-1, x.
// [x-3], x-1, x-2, x.
const bool probable_drift_up =
(d1 == -1 && d2 == -2) || (d1 == -2 && d2 == -1);
const bool drift_up = probable_drift_up && d3 == -3;
// Patterns recognized as negative clockdrift:
// [x+3], x+2, x+1, x.
// [x+3], x+1, x+2, x.
const bool probable_drift_down = (d1 == 1 && d2 == 2) || (d1 == 2 && d2 == 1);
const bool drift_down = probable_drift_down && d3 == 3;
// Set clockdrift level.
if (drift_up || drift_down) {
level_ = Level::kVerified;
} else if ((probable_drift_up || probable_drift_down) &&
level_ == Level::kNone) {
level_ = Level::kProbable;
}
// Shift delay history one step.
delay_history_[2] = delay_history_[1];
delay_history_[1] = delay_history_[0];
delay_history_[0] = delay_estimate;
}
} // namespace webrtc

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
#define MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
#include <stddef.h>
#include <array>
namespace webrtc {
class ApmDataDumper;
struct DownsampledRenderBuffer;
struct EchoCanceller3Config;
// Detects clockdrift by analyzing the estimated delay.
class ClockdriftDetector {
public:
enum class Level { kNone, kProbable, kVerified, kNumCategories };
ClockdriftDetector();
~ClockdriftDetector();
void Update(int delay_estimate);
Level ClockdriftLevel() const { return level_; }
private:
std::array<int, 3> delay_history_;
Level level_;
size_t stability_counter_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_

View File

@ -0,0 +1,103 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/coarse_filter_update_gain.h"
#include <algorithm>
#include <functional>
#include "rtc_base/checks.h"
namespace webrtc {
CoarseFilterUpdateGain::CoarseFilterUpdateGain(
const EchoCanceller3Config::Filter::CoarseConfiguration& config,
size_t config_change_duration_blocks)
: config_change_duration_blocks_(
static_cast<int>(config_change_duration_blocks)) {
SetConfig(config, true);
RTC_DCHECK_LT(0, config_change_duration_blocks_);
one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_;
}
void CoarseFilterUpdateGain::HandleEchoPathChange() {
poor_signal_excitation_counter_ = 0;
call_counter_ = 0;
}
void CoarseFilterUpdateGain::Compute(
const std::array<float, kFftLengthBy2Plus1>& render_power,
const RenderSignalAnalyzer& render_signal_analyzer,
const FftData& E_coarse,
size_t size_partitions,
bool saturated_capture_signal,
FftData* G) {
RTC_DCHECK(G);
++call_counter_;
UpdateCurrentConfig();
if (render_signal_analyzer.PoorSignalExcitation()) {
poor_signal_excitation_counter_ = 0;
}
// Do not update the filter if the render is not sufficiently excited.
if (++poor_signal_excitation_counter_ < size_partitions ||
saturated_capture_signal || call_counter_ <= size_partitions) {
G->re.fill(0.f);
G->im.fill(0.f);
return;
}
// Compute mu.
std::array<float, kFftLengthBy2Plus1> mu;
const auto& X2 = render_power;
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
if (X2[k] > current_config_.noise_gate) {
mu[k] = current_config_.rate / X2[k];
} else {
mu[k] = 0.f;
}
}
// Avoid updating the filter close to narrow bands in the render signals.
render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu);
// G = mu * E * X2.
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
G->re[k] = mu[k] * E_coarse.re[k];
G->im[k] = mu[k] * E_coarse.im[k];
}
}
void CoarseFilterUpdateGain::UpdateCurrentConfig() {
RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_);
if (config_change_counter_ > 0) {
if (--config_change_counter_ > 0) {
auto average = [](float from, float to, float from_weight) {
return from * from_weight + to * (1.f - from_weight);
};
float change_factor =
config_change_counter_ * one_by_config_change_duration_blocks_;
current_config_.rate =
average(old_target_config_.rate, target_config_.rate, change_factor);
current_config_.noise_gate =
average(old_target_config_.noise_gate, target_config_.noise_gate,
change_factor);
} else {
current_config_ = old_target_config_ = target_config_;
}
}
RTC_DCHECK_LE(0, config_change_counter_);
}
} // namespace webrtc

View File

@ -0,0 +1,74 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
#define MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
#include <stddef.h>
#include <array>
#include "api/audio/echo_canceller3_config.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/fft_data.h"
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
namespace webrtc {
// Provides functionality for computing the fixed gain for the coarse filter.
class CoarseFilterUpdateGain {
public:
explicit CoarseFilterUpdateGain(
const EchoCanceller3Config::Filter::CoarseConfiguration& config,
size_t config_change_duration_blocks);
// Takes action in the case of a known echo path change.
void HandleEchoPathChange();
// Computes the gain.
void Compute(const std::array<float, kFftLengthBy2Plus1>& render_power,
const RenderSignalAnalyzer& render_signal_analyzer,
const FftData& E_coarse,
size_t size_partitions,
bool saturated_capture_signal,
FftData* G);
// Sets a new config.
void SetConfig(
const EchoCanceller3Config::Filter::CoarseConfiguration& config,
bool immediate_effect) {
if (immediate_effect) {
old_target_config_ = current_config_ = target_config_ = config;
config_change_counter_ = 0;
} else {
old_target_config_ = current_config_;
target_config_ = config;
config_change_counter_ = config_change_duration_blocks_;
}
}
private:
EchoCanceller3Config::Filter::CoarseConfiguration current_config_;
EchoCanceller3Config::Filter::CoarseConfiguration target_config_;
EchoCanceller3Config::Filter::CoarseConfiguration old_target_config_;
const int config_change_duration_blocks_;
float one_by_config_change_duration_blocks_;
// TODO(peah): Check whether this counter should instead be initialized to a
// large value.
size_t poor_signal_excitation_counter_ = 0;
size_t call_counter_ = 0;
int config_change_counter_ = 0;
void UpdateCurrentConfig();
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_

View File

@ -0,0 +1,186 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/comfort_noise_generator.h"
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
#include "rtc_base/system/arch.h"
#if defined(WEBRTC_ARCH_X86_FAMILY)
#include <emmintrin.h>
#endif
#include <algorithm>
#include <array>
#include <cmath>
#include <cstdint>
#include <functional>
#include <numeric>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_processing/aec3/vector_math.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
// Computes the noise floor value that matches a WGN input of noise_floor_dbfs.
float GetNoiseFloorFactor(float noise_floor_dbfs) {
// kdBfsNormalization = 20.f*log10(32768.f).
constexpr float kdBfsNormalization = 90.30899869919436f;
return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f);
}
// Table of sqrt(2) * sin(2*pi*i/32).
constexpr float kSqrt2Sin[32] = {
+0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f,
+1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f,
+1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f,
+0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f,
-1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f,
-1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f,
-0.5411961f, -0.2758994f};
void GenerateComfortNoise(Aec3Optimization optimization,
const std::array<float, kFftLengthBy2Plus1>& N2,
uint32_t* seed,
FftData* lower_band_noise,
FftData* upper_band_noise) {
FftData* N_low = lower_band_noise;
FftData* N_high = upper_band_noise;
// Compute square root spectrum.
std::array<float, kFftLengthBy2Plus1> N;
std::copy(N2.begin(), N2.end(), N.begin());
aec3::VectorMath(optimization).Sqrt(N);
// Compute the noise level for the upper bands.
constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1);
constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2;
const float high_band_noise_level =
std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) *
kOneByNumBands;
// The analysis and synthesis windowing cause loss of power when
// cross-fading the noise where frames are completely uncorrelated
// (generated with random phase), hence the factor sqrt(2).
// This is not the case for the speech signal where the input is overlapping
// (strong correlation).
N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] =
N_high->re[kFftLengthBy2] = 0.f;
for (size_t k = 1; k < kFftLengthBy2; k++) {
constexpr int kIndexMask = 32 - 1;
// Generate a random 31-bit integer.
seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1);
// Convert to a 5-bit index.
int i = seed[0] >> 26;
// y = sqrt(2) * sin(a)
const float x = kSqrt2Sin[i];
// x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2)
const float y = kSqrt2Sin[(i + 8) & kIndexMask];
// Form low-frequency noise via spectral shaping.
N_low->re[k] = N[k] * x;
N_low->im[k] = N[k] * y;
// Form the high-frequency noise via simple levelling.
N_high->re[k] = high_band_noise_level * x;
N_high->im[k] = high_band_noise_level * y;
}
}
} // namespace
ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config,
Aec3Optimization optimization,
size_t num_capture_channels)
: optimization_(optimization),
seed_(42),
num_capture_channels_(num_capture_channels),
noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)),
N2_initial_(
std::make_unique<std::vector<std::array<float, kFftLengthBy2Plus1>>>(
num_capture_channels_)),
Y2_smoothed_(num_capture_channels_),
N2_(num_capture_channels_) {
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
(*N2_initial_)[ch].fill(0.f);
Y2_smoothed_[ch].fill(0.f);
N2_[ch].fill(1.0e6f);
}
}
ComfortNoiseGenerator::~ComfortNoiseGenerator() = default;
void ComfortNoiseGenerator::Compute(
bool saturated_capture,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
capture_spectrum,
rtc::ArrayView<FftData> lower_band_noise,
rtc::ArrayView<FftData> upper_band_noise) {
const auto& Y2 = capture_spectrum;
if (!saturated_capture) {
// Smooth Y2.
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(),
Y2[ch].begin(), Y2_smoothed_[ch].begin(),
[](float a, float b) { return a + 0.1f * (b - a); });
}
if (N2_counter_ > 50) {
// Update N2 from Y2_smoothed.
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(),
N2_[ch].begin(), [](float a, float b) {
return b < a ? (0.9f * b + 0.1f * a) * 1.0002f
: a * 1.0002f;
});
}
}
if (N2_initial_) {
if (++N2_counter_ == 1000) {
N2_initial_.reset();
} else {
// Compute the N2_initial from N2.
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::transform(N2_[ch].begin(), N2_[ch].end(),
(*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(),
[](float a, float b) {
return a > b ? b + 0.001f * (a - b) : a;
});
}
}
}
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
for (auto& n : N2_[ch]) {
n = std::max(n, noise_floor_);
}
if (N2_initial_) {
for (auto& n : (*N2_initial_)[ch]) {
n = std::max(n, noise_floor_);
}
}
}
}
// Choose N2 estimate to use.
const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_;
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch],
&upper_band_noise[ch]);
}
}
} // namespace webrtc

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
#define MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
#include <stdint.h>
#include <array>
#include <memory>
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/aec_state.h"
#include "modules/audio_processing/aec3/fft_data.h"
#include "rtc_base/system/arch.h"
namespace webrtc {
namespace aec3 {
#if defined(WEBRTC_ARCH_X86_FAMILY)
void EstimateComfortNoise_SSE2(const std::array<float, kFftLengthBy2Plus1>& N2,
uint32_t* seed,
FftData* lower_band_noise,
FftData* upper_band_noise);
#endif
void EstimateComfortNoise(const std::array<float, kFftLengthBy2Plus1>& N2,
uint32_t* seed,
FftData* lower_band_noise,
FftData* upper_band_noise);
} // namespace aec3
// Generates the comfort noise.
class ComfortNoiseGenerator {
public:
ComfortNoiseGenerator(const EchoCanceller3Config& config,
Aec3Optimization optimization,
size_t num_capture_channels);
ComfortNoiseGenerator() = delete;
~ComfortNoiseGenerator();
ComfortNoiseGenerator(const ComfortNoiseGenerator&) = delete;
// Computes the comfort noise.
void Compute(bool saturated_capture,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
capture_spectrum,
rtc::ArrayView<FftData> lower_band_noise,
rtc::ArrayView<FftData> upper_band_noise);
// Returns the estimate of the background noise spectrum.
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> NoiseSpectrum()
const {
return N2_;
}
private:
const Aec3Optimization optimization_;
uint32_t seed_;
const size_t num_capture_channels_;
const float noise_floor_;
std::unique_ptr<std::vector<std::array<float, kFftLengthBy2Plus1>>>
N2_initial_;
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_smoothed_;
std::vector<std::array<float, kFftLengthBy2Plus1>> N2_;
int N2_counter_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/config_selector.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
// Validates that the mono and the multichannel configs have compatible fields.
bool CompatibleConfigs(const EchoCanceller3Config& mono_config,
const EchoCanceller3Config& multichannel_config) {
if (mono_config.delay.fixed_capture_delay_samples !=
multichannel_config.delay.fixed_capture_delay_samples) {
return false;
}
if (mono_config.filter.export_linear_aec_output !=
multichannel_config.filter.export_linear_aec_output) {
return false;
}
if (mono_config.filter.high_pass_filter_echo_reference !=
multichannel_config.filter.high_pass_filter_echo_reference) {
return false;
}
if (mono_config.multi_channel.detect_stereo_content !=
multichannel_config.multi_channel.detect_stereo_content) {
return false;
}
if (mono_config.multi_channel.stereo_detection_timeout_threshold_seconds !=
multichannel_config.multi_channel
.stereo_detection_timeout_threshold_seconds) {
return false;
}
return true;
}
} // namespace
ConfigSelector::ConfigSelector(
const EchoCanceller3Config& config,
const absl::optional<EchoCanceller3Config>& multichannel_config,
int num_render_input_channels)
: config_(config), multichannel_config_(multichannel_config) {
if (multichannel_config_.has_value()) {
RTC_DCHECK(CompatibleConfigs(config_, *multichannel_config_));
}
Update(!config_.multi_channel.detect_stereo_content &&
num_render_input_channels > 1);
RTC_DCHECK(active_config_);
}
void ConfigSelector::Update(bool multichannel_content) {
if (multichannel_content && multichannel_config_.has_value()) {
active_config_ = &(*multichannel_config_);
} else {
active_config_ = &config_;
}
}
} // namespace webrtc

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
#define MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
#include "absl/types/optional.h"
#include "api/audio/echo_canceller3_config.h"
namespace webrtc {
// Selects the config to use.
class ConfigSelector {
public:
ConfigSelector(
const EchoCanceller3Config& config,
const absl::optional<EchoCanceller3Config>& multichannel_config,
int num_render_input_channels);
// Updates the config selection based on the detection of multichannel
// content.
void Update(bool multichannel_content);
const EchoCanceller3Config& active_config() const { return *active_config_; }
private:
const EchoCanceller3Config config_;
const absl::optional<EchoCanceller3Config> multichannel_config_;
const EchoCanceller3Config* active_config_ = nullptr;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/decimator.h"
#include <array>
#include <vector>
#include "modules/audio_processing/aec3/aec3_common.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
// signal.butter(2, 3400/8000.0, 'lowpass', analog=False)
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetLowPassFilterDS2() {
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
{{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f},
{{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f},
{{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}};
}
// signal.ellip(6, 1, 40, 1800/8000, btype='lowpass', analog=False)
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetLowPassFilterDS4() {
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
{{-0.08873842f, 0.99605496f}, {0.75916227f, 0.23841065f}, 0.26250696827f},
{{0.62273832f, 0.78243018f}, {0.74892112f, 0.5410152f}, 0.26250696827f},
{{0.71107693f, 0.70311421f}, {0.74895534f, 0.63924616f}, 0.26250696827f}};
}
// signal.cheby1(1, 6, [1000/8000, 2000/8000], btype='bandpass', analog=False)
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetBandPassFilterDS8() {
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}};
}
// signal.butter(2, 1000/8000.0, 'highpass', analog=False)
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetHighPassFilter() {
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
{{1.f, 0.f}, {0.72712179f, 0.21296904f}, 0.7570763753338849f}};
}
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetPassThroughFilter() {
return std::vector<CascadedBiQuadFilter::BiQuadParam>{};
}
} // namespace
Decimator::Decimator(size_t down_sampling_factor)
: down_sampling_factor_(down_sampling_factor),
anti_aliasing_filter_(down_sampling_factor_ == 4
? GetLowPassFilterDS4()
: (down_sampling_factor_ == 8
? GetBandPassFilterDS8()
: GetLowPassFilterDS2())),
noise_reduction_filter_(down_sampling_factor_ == 8
? GetPassThroughFilter()
: GetHighPassFilter()) {
RTC_DCHECK(down_sampling_factor_ == 2 || down_sampling_factor_ == 4 ||
down_sampling_factor_ == 8);
}
void Decimator::Decimate(rtc::ArrayView<const float> in,
rtc::ArrayView<float> out) {
RTC_DCHECK_EQ(kBlockSize, in.size());
RTC_DCHECK_EQ(kBlockSize / down_sampling_factor_, out.size());
std::array<float, kBlockSize> x;
// Limit the frequency content of the signal to avoid aliasing.
anti_aliasing_filter_.Process(in, x);
// Reduce the impact of near-end noise.
noise_reduction_filter_.Process(x);
// Downsample the signal.
for (size_t j = 0, k = 0; j < out.size(); ++j, k += down_sampling_factor_) {
RTC_DCHECK_GT(kBlockSize, k);
out[j] = x[k];
}
}
} // namespace webrtc

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
#define MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
#include <array>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/utility/cascaded_biquad_filter.h"
namespace webrtc {
// Provides functionality for decimating a signal.
class Decimator {
public:
explicit Decimator(size_t down_sampling_factor);
Decimator(const Decimator&) = delete;
Decimator& operator=(const Decimator&) = delete;
// Downsamples the signal.
void Decimate(rtc::ArrayView<const float> in, rtc::ArrayView<float> out);
private:
const size_t down_sampling_factor_;
CascadedBiQuadFilter anti_aliasing_filter_;
CascadedBiQuadFilter noise_reduction_filter_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_

View File

@ -0,0 +1,33 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
#define MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
#include <stddef.h>
namespace webrtc {
// Stores delay_estimates.
struct DelayEstimate {
enum class Quality { kCoarse, kRefined };
DelayEstimate(Quality quality, size_t delay)
: quality(quality), delay(delay) {}
Quality quality;
size_t delay;
size_t blocks_since_last_change = 0;
size_t blocks_since_last_update = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/dominant_nearend_detector.h"
#include <numeric>
namespace webrtc {
DominantNearendDetector::DominantNearendDetector(
const EchoCanceller3Config::Suppressor::DominantNearendDetection& config,
size_t num_capture_channels)
: enr_threshold_(config.enr_threshold),
enr_exit_threshold_(config.enr_exit_threshold),
snr_threshold_(config.snr_threshold),
hold_duration_(config.hold_duration),
trigger_threshold_(config.trigger_threshold),
use_during_initial_phase_(config.use_during_initial_phase),
num_capture_channels_(num_capture_channels),
trigger_counters_(num_capture_channels_),
hold_counters_(num_capture_channels_) {}
void DominantNearendDetector::Update(
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
nearend_spectrum,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
residual_echo_spectrum,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
comfort_noise_spectrum,
bool initial_state) {
nearend_state_ = false;
auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
RTC_DCHECK_LE(16, spectrum.size());
return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
};
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
const float ne_sum = low_frequency_energy(nearend_spectrum[ch]);
const float echo_sum = low_frequency_energy(residual_echo_spectrum[ch]);
const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]);
// Detect strong active nearend if the nearend is sufficiently stronger than
// the echo and the nearend noise.
if ((!initial_state || use_during_initial_phase_) &&
echo_sum < enr_threshold_ * ne_sum &&
ne_sum > snr_threshold_ * noise_sum) {
if (++trigger_counters_[ch] >= trigger_threshold_) {
// After a period of strong active nearend activity, flag nearend mode.
hold_counters_[ch] = hold_duration_;
trigger_counters_[ch] = trigger_threshold_;
}
} else {
// Forget previously detected strong active nearend activity.
trigger_counters_[ch] = std::max(0, trigger_counters_[ch] - 1);
}
// Exit nearend-state early at strong echo.
if (echo_sum > enr_exit_threshold_ * ne_sum &&
echo_sum > snr_threshold_ * noise_sum) {
hold_counters_[ch] = 0;
}
// Remain in any nearend mode for a certain duration.
hold_counters_[ch] = std::max(0, hold_counters_[ch] - 1);
nearend_state_ = nearend_state_ || hold_counters_[ch] > 0;
}
}
} // namespace webrtc

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
#define MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
#include <vector>
#include "api/array_view.h"
#include "api/audio/echo_canceller3_config.h"
#include "modules/audio_processing/aec3/nearend_detector.h"
namespace webrtc {
// Class for selecting whether the suppressor is in the nearend or echo state.
class DominantNearendDetector : public NearendDetector {
public:
DominantNearendDetector(
const EchoCanceller3Config::Suppressor::DominantNearendDetection& config,
size_t num_capture_channels);
// Returns whether the current state is the nearend state.
bool IsNearendState() const override { return nearend_state_; }
// Updates the state selection based on latest spectral estimates.
void Update(rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
nearend_spectrum,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
residual_echo_spectrum,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
comfort_noise_spectrum,
bool initial_state) override;
private:
const float enr_threshold_;
const float enr_exit_threshold_;
const float snr_threshold_;
const int hold_duration_;
const int trigger_threshold_;
const bool use_during_initial_phase_;
const size_t num_capture_channels_;
bool nearend_state_ = false;
std::vector<int> trigger_counters_;
std::vector<int> hold_counters_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
#include <algorithm>
namespace webrtc {
DownsampledRenderBuffer::DownsampledRenderBuffer(size_t downsampled_buffer_size)
: size(static_cast<int>(downsampled_buffer_size)),
buffer(downsampled_buffer_size, 0.f) {
std::fill(buffer.begin(), buffer.end(), 0.f);
}
DownsampledRenderBuffer::~DownsampledRenderBuffer() = default;
} // namespace webrtc

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
#define MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
#include <stddef.h>
#include <vector>
#include "rtc_base/checks.h"
namespace webrtc {
// Holds the circular buffer of the downsampled render data.
struct DownsampledRenderBuffer {
explicit DownsampledRenderBuffer(size_t downsampled_buffer_size);
~DownsampledRenderBuffer();
int IncIndex(int index) const {
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
return index < size - 1 ? index + 1 : 0;
}
int DecIndex(int index) const {
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
return index > 0 ? index - 1 : size - 1;
}
int OffsetIndex(int index, int offset) const {
RTC_DCHECK_GE(buffer.size(), offset);
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
return (size + index + offset) % size;
}
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
void IncWriteIndex() { write = IncIndex(write); }
void DecWriteIndex() { write = DecIndex(write); }
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
void IncReadIndex() { read = IncIndex(read); }
void DecReadIndex() { read = DecIndex(read); }
const int size;
std::vector<float> buffer;
int write = 0;
int read = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_

View File

@ -0,0 +1,119 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/echo_audibility.h"
#include <algorithm>
#include <cmath>
#include <utility>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/block_buffer.h"
#include "modules/audio_processing/aec3/spectrum_buffer.h"
#include "modules/audio_processing/aec3/stationarity_estimator.h"
namespace webrtc {
EchoAudibility::EchoAudibility(bool use_render_stationarity_at_init)
: use_render_stationarity_at_init_(use_render_stationarity_at_init) {
Reset();
}
EchoAudibility::~EchoAudibility() = default;
void EchoAudibility::Update(const RenderBuffer& render_buffer,
rtc::ArrayView<const float> average_reverb,
int delay_blocks,
bool external_delay_seen) {
UpdateRenderNoiseEstimator(render_buffer.GetSpectrumBuffer(),
render_buffer.GetBlockBuffer(),
external_delay_seen);
if (external_delay_seen || use_render_stationarity_at_init_) {
UpdateRenderStationarityFlags(render_buffer, average_reverb, delay_blocks);
}
}
void EchoAudibility::Reset() {
render_stationarity_.Reset();
non_zero_render_seen_ = false;
render_spectrum_write_prev_ = absl::nullopt;
}
void EchoAudibility::UpdateRenderStationarityFlags(
const RenderBuffer& render_buffer,
rtc::ArrayView<const float> average_reverb,
int min_channel_delay_blocks) {
const SpectrumBuffer& spectrum_buffer = render_buffer.GetSpectrumBuffer();
int idx_at_delay = spectrum_buffer.OffsetIndex(spectrum_buffer.read,
min_channel_delay_blocks);
int num_lookahead = render_buffer.Headroom() - min_channel_delay_blocks + 1;
num_lookahead = std::max(0, num_lookahead);
render_stationarity_.UpdateStationarityFlags(spectrum_buffer, average_reverb,
idx_at_delay, num_lookahead);
}
void EchoAudibility::UpdateRenderNoiseEstimator(
const SpectrumBuffer& spectrum_buffer,
const BlockBuffer& block_buffer,
bool external_delay_seen) {
if (!render_spectrum_write_prev_) {
render_spectrum_write_prev_ = spectrum_buffer.write;
render_block_write_prev_ = block_buffer.write;
return;
}
int render_spectrum_write_current = spectrum_buffer.write;
if (!non_zero_render_seen_ && !external_delay_seen) {
non_zero_render_seen_ = !IsRenderTooLow(block_buffer);
}
if (non_zero_render_seen_) {
for (int idx = render_spectrum_write_prev_.value();
idx != render_spectrum_write_current;
idx = spectrum_buffer.DecIndex(idx)) {
render_stationarity_.UpdateNoiseEstimator(spectrum_buffer.buffer[idx]);
}
}
render_spectrum_write_prev_ = render_spectrum_write_current;
}
bool EchoAudibility::IsRenderTooLow(const BlockBuffer& block_buffer) {
const int num_render_channels =
static_cast<int>(block_buffer.buffer[0].NumChannels());
bool too_low = false;
const int render_block_write_current = block_buffer.write;
if (render_block_write_current == render_block_write_prev_) {
too_low = true;
} else {
for (int idx = render_block_write_prev_; idx != render_block_write_current;
idx = block_buffer.IncIndex(idx)) {
float max_abs_over_channels = 0.f;
for (int ch = 0; ch < num_render_channels; ++ch) {
rtc::ArrayView<const float, kBlockSize> block =
block_buffer.buffer[idx].View(/*band=*/0, /*channel=*/ch);
auto r = std::minmax_element(block.cbegin(), block.cend());
float max_abs_channel =
std::max(std::fabs(*r.first), std::fabs(*r.second));
max_abs_over_channels =
std::max(max_abs_over_channels, max_abs_channel);
}
if (max_abs_over_channels < 10.f) {
too_low = true; // Discards all blocks if one of them is too low.
break;
}
}
}
render_block_write_prev_ = render_block_write_current;
return too_low;
}
} // namespace webrtc

View File

@ -0,0 +1,85 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
#include <stddef.h>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "modules/audio_processing/aec3/block_buffer.h"
#include "modules/audio_processing/aec3/render_buffer.h"
#include "modules/audio_processing/aec3/spectrum_buffer.h"
#include "modules/audio_processing/aec3/stationarity_estimator.h"
namespace webrtc {
class EchoAudibility {
public:
explicit EchoAudibility(bool use_render_stationarity_at_init);
~EchoAudibility();
EchoAudibility(const EchoAudibility&) = delete;
EchoAudibility& operator=(const EchoAudibility&) = delete;
// Feed new render data to the echo audibility estimator.
void Update(const RenderBuffer& render_buffer,
rtc::ArrayView<const float> average_reverb,
int min_channel_delay_blocks,
bool external_delay_seen);
// Get the residual echo scaling.
void GetResidualEchoScaling(bool filter_has_had_time_to_converge,
rtc::ArrayView<float> residual_scaling) const {
for (size_t band = 0; band < residual_scaling.size(); ++band) {
if (render_stationarity_.IsBandStationary(band) &&
(filter_has_had_time_to_converge ||
use_render_stationarity_at_init_)) {
residual_scaling[band] = 0.f;
} else {
residual_scaling[band] = 1.0f;
}
}
}
// Returns true if the current render block is estimated as stationary.
bool IsBlockStationary() const {
return render_stationarity_.IsBlockStationary();
}
private:
// Reset the EchoAudibility class.
void Reset();
// Updates the render stationarity flags for the current frame.
void UpdateRenderStationarityFlags(const RenderBuffer& render_buffer,
rtc::ArrayView<const float> average_reverb,
int delay_blocks);
// Updates the noise estimator with the new render data since the previous
// call to this method.
void UpdateRenderNoiseEstimator(const SpectrumBuffer& spectrum_buffer,
const BlockBuffer& block_buffer,
bool external_delay_seen);
// Returns a bool being true if the render signal contains just close to zero
// values.
bool IsRenderTooLow(const BlockBuffer& block_buffer);
absl::optional<int> render_spectrum_write_prev_;
int render_block_write_prev_;
bool non_zero_render_seen_;
const bool use_render_stationarity_at_init_;
StationarityEstimator render_stationarity_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_

View File

@ -0,0 +1,991 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/echo_canceller3.h"
#include <algorithm>
#include <utility>
#include "absl/strings/string_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/high_pass_filter.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/experiments/field_trial_parser.h"
#include "rtc_base/logging.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
enum class EchoCanceller3ApiCall { kCapture, kRender };
bool DetectSaturation(rtc::ArrayView<const float> y) {
for (size_t k = 0; k < y.size(); ++k) {
if (y[k] >= 32700.0f || y[k] <= -32700.0f) {
return true;
}
}
return false;
}
// Retrieves a value from a field trial if it is available. If no value is
// present, the default value is returned. If the retrieved value is beyond the
// specified limits, the default value is returned instead.
void RetrieveFieldTrialValue(absl::string_view trial_name,
float min,
float max,
float* value_to_update) {
const std::string field_trial_str = field_trial::FindFullName(trial_name);
FieldTrialParameter<double> field_trial_param(/*key=*/"", *value_to_update);
ParseFieldTrial({&field_trial_param}, field_trial_str);
float field_trial_value = static_cast<float>(field_trial_param.Get());
if (field_trial_value >= min && field_trial_value <= max &&
field_trial_value != *value_to_update) {
RTC_LOG(LS_INFO) << "Key " << trial_name
<< " changing AEC3 parameter value from "
<< *value_to_update << " to " << field_trial_value;
*value_to_update = field_trial_value;
}
}
void RetrieveFieldTrialValue(absl::string_view trial_name,
int min,
int max,
int* value_to_update) {
const std::string field_trial_str = field_trial::FindFullName(trial_name);
FieldTrialParameter<int> field_trial_param(/*key=*/"", *value_to_update);
ParseFieldTrial({&field_trial_param}, field_trial_str);
float field_trial_value = field_trial_param.Get();
if (field_trial_value >= min && field_trial_value <= max &&
field_trial_value != *value_to_update) {
RTC_LOG(LS_INFO) << "Key " << trial_name
<< " changing AEC3 parameter value from "
<< *value_to_update << " to " << field_trial_value;
*value_to_update = field_trial_value;
}
}
void FillSubFrameView(
AudioBuffer* frame,
size_t sub_frame_index,
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
RTC_DCHECK_GE(1, sub_frame_index);
RTC_DCHECK_LE(0, sub_frame_index);
RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size());
RTC_DCHECK_EQ(frame->num_channels(), (*sub_frame_view)[0].size());
for (size_t band = 0; band < sub_frame_view->size(); ++band) {
for (size_t channel = 0; channel < (*sub_frame_view)[0].size(); ++channel) {
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
&frame->split_bands(channel)[band][sub_frame_index * kSubFrameLength],
kSubFrameLength);
}
}
}
void FillSubFrameView(
bool proper_downmix_needed,
std::vector<std::vector<std::vector<float>>>* frame,
size_t sub_frame_index,
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
RTC_DCHECK_GE(1, sub_frame_index);
RTC_DCHECK_EQ(frame->size(), sub_frame_view->size());
const size_t frame_num_channels = (*frame)[0].size();
const size_t sub_frame_num_channels = (*sub_frame_view)[0].size();
if (frame_num_channels > sub_frame_num_channels) {
RTC_DCHECK_EQ(sub_frame_num_channels, 1u);
if (proper_downmix_needed) {
// When a proper downmix is needed (which is the case when proper stereo
// is present in the echo reference signal but the echo canceller does the
// processing in mono) downmix the echo reference by averaging the channel
// content (otherwise downmixing is done by selecting channel 0).
for (size_t band = 0; band < frame->size(); ++band) {
for (size_t ch = 1; ch < frame_num_channels; ++ch) {
for (size_t k = 0; k < kSubFrameLength; ++k) {
(*frame)[band][/*channel=*/0]
[sub_frame_index * kSubFrameLength + k] +=
(*frame)[band][ch][sub_frame_index * kSubFrameLength + k];
}
}
const float one_by_num_channels = 1.0f / frame_num_channels;
for (size_t k = 0; k < kSubFrameLength; ++k) {
(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength +
k] *= one_by_num_channels;
}
}
}
for (size_t band = 0; band < frame->size(); ++band) {
(*sub_frame_view)[band][/*channel=*/0] = rtc::ArrayView<float>(
&(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength],
kSubFrameLength);
}
} else {
RTC_DCHECK_EQ(frame_num_channels, sub_frame_num_channels);
for (size_t band = 0; band < frame->size(); ++band) {
for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
&(*frame)[band][channel][sub_frame_index * kSubFrameLength],
kSubFrameLength);
}
}
}
}
void ProcessCaptureFrameContent(
AudioBuffer* linear_output,
AudioBuffer* capture,
bool level_change,
bool aec_reference_is_downmixed_stereo,
bool saturated_microphone_signal,
size_t sub_frame_index,
FrameBlocker* capture_blocker,
BlockFramer* linear_output_framer,
BlockFramer* output_framer,
BlockProcessor* block_processor,
Block* linear_output_block,
std::vector<std::vector<rtc::ArrayView<float>>>*
linear_output_sub_frame_view,
Block* capture_block,
std::vector<std::vector<rtc::ArrayView<float>>>* capture_sub_frame_view) {
FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view);
if (linear_output) {
RTC_DCHECK(linear_output_framer);
RTC_DCHECK(linear_output_block);
RTC_DCHECK(linear_output_sub_frame_view);
FillSubFrameView(linear_output, sub_frame_index,
linear_output_sub_frame_view);
}
capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view,
capture_block);
block_processor->ProcessCapture(
/*echo_path_gain_change=*/level_change ||
aec_reference_is_downmixed_stereo,
saturated_microphone_signal, linear_output_block, capture_block);
output_framer->InsertBlockAndExtractSubFrame(*capture_block,
capture_sub_frame_view);
if (linear_output) {
RTC_DCHECK(linear_output_framer);
linear_output_framer->InsertBlockAndExtractSubFrame(
*linear_output_block, linear_output_sub_frame_view);
}
}
void ProcessRemainingCaptureFrameContent(bool level_change,
bool aec_reference_is_downmixed_stereo,
bool saturated_microphone_signal,
FrameBlocker* capture_blocker,
BlockFramer* linear_output_framer,
BlockFramer* output_framer,
BlockProcessor* block_processor,
Block* linear_output_block,
Block* block) {
if (!capture_blocker->IsBlockAvailable()) {
return;
}
capture_blocker->ExtractBlock(block);
block_processor->ProcessCapture(
/*echo_path_gain_change=*/level_change ||
aec_reference_is_downmixed_stereo,
saturated_microphone_signal, linear_output_block, block);
output_framer->InsertBlock(*block);
if (linear_output_framer) {
RTC_DCHECK(linear_output_block);
linear_output_framer->InsertBlock(*linear_output_block);
}
}
void BufferRenderFrameContent(
bool proper_downmix_needed,
std::vector<std::vector<std::vector<float>>>* render_frame,
size_t sub_frame_index,
FrameBlocker* render_blocker,
BlockProcessor* block_processor,
Block* block,
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
FillSubFrameView(proper_downmix_needed, render_frame, sub_frame_index,
sub_frame_view);
render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block);
block_processor->BufferRender(*block);
}
void BufferRemainingRenderFrameContent(FrameBlocker* render_blocker,
BlockProcessor* block_processor,
Block* block) {
if (!render_blocker->IsBlockAvailable()) {
return;
}
render_blocker->ExtractBlock(block);
block_processor->BufferRender(*block);
}
void CopyBufferIntoFrame(const AudioBuffer& buffer,
size_t num_bands,
size_t num_channels,
std::vector<std::vector<std::vector<float>>>* frame) {
RTC_DCHECK_EQ(num_bands, frame->size());
RTC_DCHECK_EQ(num_channels, (*frame)[0].size());
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, (*frame)[0][0].size());
for (size_t band = 0; band < num_bands; ++band) {
for (size_t channel = 0; channel < num_channels; ++channel) {
rtc::ArrayView<const float> buffer_view(
&buffer.split_bands_const(channel)[band][0],
AudioBuffer::kSplitBandSize);
std::copy(buffer_view.begin(), buffer_view.end(),
(*frame)[band][channel].begin());
}
}
}
} // namespace
// TODO(webrtc:5298): Move this to a separate file.
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
EchoCanceller3Config adjusted_cfg = config;
if (field_trial::IsEnabled("WebRTC-Aec3StereoContentDetectionKillSwitch")) {
adjusted_cfg.multi_channel.detect_stereo_content = false;
}
if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) {
adjusted_cfg.suppressor.high_bands_suppression
.anti_howling_activation_threshold = 25.f;
adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 0.01f;
}
if (field_trial::IsEnabled("WebRTC-Aec3UseShortConfigChangeDuration")) {
adjusted_cfg.filter.config_change_duration_blocks = 10;
}
if (field_trial::IsEnabled("WebRTC-Aec3UseZeroInitialStateDuration")) {
adjusted_cfg.filter.initial_state_seconds = 0.f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3UseDot1SecondsInitialStateDuration")) {
adjusted_cfg.filter.initial_state_seconds = .1f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3UseDot2SecondsInitialStateDuration")) {
adjusted_cfg.filter.initial_state_seconds = .2f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3UseDot3SecondsInitialStateDuration")) {
adjusted_cfg.filter.initial_state_seconds = .3f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3UseDot6SecondsInitialStateDuration")) {
adjusted_cfg.filter.initial_state_seconds = .6f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3UseDot9SecondsInitialStateDuration")) {
adjusted_cfg.filter.initial_state_seconds = .9f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3Use1Dot2SecondsInitialStateDuration")) {
adjusted_cfg.filter.initial_state_seconds = 1.2f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3Use1Dot6SecondsInitialStateDuration")) {
adjusted_cfg.filter.initial_state_seconds = 1.6f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3Use2Dot0SecondsInitialStateDuration")) {
adjusted_cfg.filter.initial_state_seconds = 2.0f;
}
if (field_trial::IsEnabled("WebRTC-Aec3HighPassFilterEchoReference")) {
adjusted_cfg.filter.high_pass_filter_echo_reference = true;
}
if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) {
adjusted_cfg.ep_strength.echo_can_saturate = false;
}
const std::string use_nearend_reverb_len_tunings =
field_trial::FindFullName("WebRTC-Aec3UseNearendReverbLen");
FieldTrialParameter<double> nearend_reverb_default_len(
"default_len", adjusted_cfg.ep_strength.default_len);
FieldTrialParameter<double> nearend_reverb_nearend_len(
"nearend_len", adjusted_cfg.ep_strength.nearend_len);
ParseFieldTrial({&nearend_reverb_default_len, &nearend_reverb_nearend_len},
use_nearend_reverb_len_tunings);
float default_len = static_cast<float>(nearend_reverb_default_len.Get());
float nearend_len = static_cast<float>(nearend_reverb_nearend_len.Get());
if (default_len > -1 && default_len < 1 && nearend_len > -1 &&
nearend_len < 1) {
adjusted_cfg.ep_strength.default_len =
static_cast<float>(nearend_reverb_default_len.Get());
adjusted_cfg.ep_strength.nearend_len =
static_cast<float>(nearend_reverb_nearend_len.Get());
}
if (field_trial::IsEnabled("WebRTC-Aec3ConservativeTailFreqResponse")) {
adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = true;
}
if (field_trial::IsDisabled("WebRTC-Aec3ConservativeTailFreqResponse")) {
adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = false;
}
if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) {
// Two blocks headroom.
adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2;
}
if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToZeroKillSwitch")) {
adjusted_cfg.erle.clamp_quality_estimate_to_zero = false;
}
if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToOneKillSwitch")) {
adjusted_cfg.erle.clamp_quality_estimate_to_one = false;
}
if (field_trial::IsEnabled("WebRTC-Aec3OnsetDetectionKillSwitch")) {
adjusted_cfg.erle.onset_detection = false;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceRenderDelayEstimationDownmixing")) {
adjusted_cfg.delay.render_alignment_mixing.downmix = true;
adjusted_cfg.delay.render_alignment_mixing.adaptive_selection = false;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceCaptureDelayEstimationDownmixing")) {
adjusted_cfg.delay.capture_alignment_mixing.downmix = true;
adjusted_cfg.delay.capture_alignment_mixing.adaptive_selection = false;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceCaptureDelayEstimationLeftRightPrioritization")) {
adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
true;
}
if (field_trial::IsEnabled(
"WebRTC-"
"Aec3RenderDelayEstimationLeftRightPrioritizationKillSwitch")) {
adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
false;
}
if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) {
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3VerySensitiveDominantNearendActivation")) {
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.75f;
}
if (field_trial::IsEnabled("WebRTC-Aec3TransparentAntiHowlingGain")) {
adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 1.f;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceMoreTransparentNormalSuppressorTuning")) {
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = 0.4f;
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = 0.5f;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceMoreTransparentNearendSuppressorTuning")) {
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = 1.29f;
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = 1.3f;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceMoreTransparentNormalSuppressorHfTuning")) {
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = 0.3f;
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = 0.4f;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceMoreTransparentNearendSuppressorHfTuning")) {
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = 1.09f;
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = 1.1f;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceRapidlyAdjustingNormalSuppressorTunings")) {
adjusted_cfg.suppressor.normal_tuning.max_inc_factor = 2.5f;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceRapidlyAdjustingNearendSuppressorTunings")) {
adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = 2.5f;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceSlowlyAdjustingNormalSuppressorTunings")) {
adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = .2f;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceSlowlyAdjustingNearendSuppressorTunings")) {
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f;
}
if (field_trial::IsEnabled("WebRTC-Aec3EnforceConservativeHfSuppression")) {
adjusted_cfg.suppressor.conservative_hf_suppression = true;
}
if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) {
adjusted_cfg.echo_audibility.use_stationarity_properties = true;
}
if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceStationarityPropertiesAtInit")) {
adjusted_cfg.echo_audibility.use_stationarity_properties_at_init = true;
}
if (field_trial::IsEnabled("WebRTC-Aec3EnforceLowActiveRenderLimit")) {
adjusted_cfg.render_levels.active_render_limit = 50.f;
} else if (field_trial::IsEnabled(
"WebRTC-Aec3EnforceVeryLowActiveRenderLimit")) {
adjusted_cfg.render_levels.active_render_limit = 30.f;
}
if (field_trial::IsEnabled("WebRTC-Aec3NonlinearModeReverbKillSwitch")) {
adjusted_cfg.echo_model.model_reverb_in_nonlinear_mode = false;
}
// Field-trial based override for the whole suppressor tuning.
const std::string suppressor_tuning_override_trial_name =
field_trial::FindFullName("WebRTC-Aec3SuppressorTuningOverride");
FieldTrialParameter<double> nearend_tuning_mask_lf_enr_transparent(
"nearend_tuning_mask_lf_enr_transparent",
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
FieldTrialParameter<double> nearend_tuning_mask_lf_enr_suppress(
"nearend_tuning_mask_lf_enr_suppress",
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
FieldTrialParameter<double> nearend_tuning_mask_hf_enr_transparent(
"nearend_tuning_mask_hf_enr_transparent",
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
FieldTrialParameter<double> nearend_tuning_mask_hf_enr_suppress(
"nearend_tuning_mask_hf_enr_suppress",
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
FieldTrialParameter<double> nearend_tuning_max_inc_factor(
"nearend_tuning_max_inc_factor",
adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
FieldTrialParameter<double> nearend_tuning_max_dec_factor_lf(
"nearend_tuning_max_dec_factor_lf",
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
FieldTrialParameter<double> normal_tuning_mask_lf_enr_transparent(
"normal_tuning_mask_lf_enr_transparent",
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
FieldTrialParameter<double> normal_tuning_mask_lf_enr_suppress(
"normal_tuning_mask_lf_enr_suppress",
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
FieldTrialParameter<double> normal_tuning_mask_hf_enr_transparent(
"normal_tuning_mask_hf_enr_transparent",
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
FieldTrialParameter<double> normal_tuning_mask_hf_enr_suppress(
"normal_tuning_mask_hf_enr_suppress",
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
FieldTrialParameter<double> normal_tuning_max_inc_factor(
"normal_tuning_max_inc_factor",
adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
FieldTrialParameter<double> normal_tuning_max_dec_factor_lf(
"normal_tuning_max_dec_factor_lf",
adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
FieldTrialParameter<double> dominant_nearend_detection_enr_threshold(
"dominant_nearend_detection_enr_threshold",
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
FieldTrialParameter<double> dominant_nearend_detection_enr_exit_threshold(
"dominant_nearend_detection_enr_exit_threshold",
adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
FieldTrialParameter<double> dominant_nearend_detection_snr_threshold(
"dominant_nearend_detection_snr_threshold",
adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
FieldTrialParameter<int> dominant_nearend_detection_hold_duration(
"dominant_nearend_detection_hold_duration",
adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
FieldTrialParameter<int> dominant_nearend_detection_trigger_threshold(
"dominant_nearend_detection_trigger_threshold",
adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
ParseFieldTrial(
{&nearend_tuning_mask_lf_enr_transparent,
&nearend_tuning_mask_lf_enr_suppress,
&nearend_tuning_mask_hf_enr_transparent,
&nearend_tuning_mask_hf_enr_suppress, &nearend_tuning_max_inc_factor,
&nearend_tuning_max_dec_factor_lf,
&normal_tuning_mask_lf_enr_transparent,
&normal_tuning_mask_lf_enr_suppress,
&normal_tuning_mask_hf_enr_transparent,
&normal_tuning_mask_hf_enr_suppress, &normal_tuning_max_inc_factor,
&normal_tuning_max_dec_factor_lf,
&dominant_nearend_detection_enr_threshold,
&dominant_nearend_detection_enr_exit_threshold,
&dominant_nearend_detection_snr_threshold,
&dominant_nearend_detection_hold_duration,
&dominant_nearend_detection_trigger_threshold},
suppressor_tuning_override_trial_name);
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent =
static_cast<float>(nearend_tuning_mask_lf_enr_transparent.Get());
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress =
static_cast<float>(nearend_tuning_mask_lf_enr_suppress.Get());
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent =
static_cast<float>(nearend_tuning_mask_hf_enr_transparent.Get());
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress =
static_cast<float>(nearend_tuning_mask_hf_enr_suppress.Get());
adjusted_cfg.suppressor.nearend_tuning.max_inc_factor =
static_cast<float>(nearend_tuning_max_inc_factor.Get());
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf =
static_cast<float>(nearend_tuning_max_dec_factor_lf.Get());
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent =
static_cast<float>(normal_tuning_mask_lf_enr_transparent.Get());
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress =
static_cast<float>(normal_tuning_mask_lf_enr_suppress.Get());
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent =
static_cast<float>(normal_tuning_mask_hf_enr_transparent.Get());
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress =
static_cast<float>(normal_tuning_mask_hf_enr_suppress.Get());
adjusted_cfg.suppressor.normal_tuning.max_inc_factor =
static_cast<float>(normal_tuning_max_inc_factor.Get());
adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf =
static_cast<float>(normal_tuning_max_dec_factor_lf.Get());
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold =
static_cast<float>(dominant_nearend_detection_enr_threshold.Get());
adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold =
static_cast<float>(dominant_nearend_detection_enr_exit_threshold.Get());
adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold =
static_cast<float>(dominant_nearend_detection_snr_threshold.Get());
adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration =
dominant_nearend_detection_hold_duration.Get();
adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold =
dominant_nearend_detection_trigger_threshold.Get();
// Field trial-based overrides of individual suppressor parameters.
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNearendLfMaskTransparentOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNearendLfMaskSuppressOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNearendHfMaskTransparentOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNearendHfMaskSuppressOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNearendMaxIncFactorOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNearendMaxDecFactorLfOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNormalLfMaskTransparentOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNormalLfMaskSuppressOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNormalHfMaskTransparentOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNormalHfMaskSuppressOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNormalMaxIncFactorOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorNormalMaxDecFactorLfOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorDominantNearendEnrThresholdOverride", 0.f, 100.f,
&adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorDominantNearendEnrExitThresholdOverride", 0.f,
100.f,
&adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorDominantNearendSnrThresholdOverride", 0.f, 100.f,
&adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorDominantNearendHoldDurationOverride", 0, 1000,
&adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorDominantNearendTriggerThresholdOverride", 0, 1000,
&adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
RetrieveFieldTrialValue(
"WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f,
&adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain);
// Field trial-based overrides of individual delay estimator parameters.
RetrieveFieldTrialValue("WebRTC-Aec3DelayEstimateSmoothingOverride", 0.f, 1.f,
&adjusted_cfg.delay.delay_estimate_smoothing);
RetrieveFieldTrialValue(
"WebRTC-Aec3DelayEstimateSmoothingDelayFoundOverride", 0.f, 1.f,
&adjusted_cfg.delay.delay_estimate_smoothing_delay_found);
int max_allowed_excess_render_blocks_override =
adjusted_cfg.buffering.max_allowed_excess_render_blocks;
RetrieveFieldTrialValue(
"WebRTC-Aec3BufferingMaxAllowedExcessRenderBlocksOverride", 0, 20,
&max_allowed_excess_render_blocks_override);
adjusted_cfg.buffering.max_allowed_excess_render_blocks =
max_allowed_excess_render_blocks_override;
return adjusted_cfg;
}
class EchoCanceller3::RenderWriter {
public:
RenderWriter(ApmDataDumper* data_dumper,
const EchoCanceller3Config& config,
SwapQueue<std::vector<std::vector<std::vector<float>>>,
Aec3RenderQueueItemVerifier>* render_transfer_queue,
size_t num_bands,
size_t num_channels);
RenderWriter() = delete;
RenderWriter(const RenderWriter&) = delete;
RenderWriter& operator=(const RenderWriter&) = delete;
~RenderWriter();
void Insert(const AudioBuffer& input);
private:
ApmDataDumper* data_dumper_;
const size_t num_bands_;
const size_t num_channels_;
std::unique_ptr<HighPassFilter> high_pass_filter_;
std::vector<std::vector<std::vector<float>>> render_queue_input_frame_;
SwapQueue<std::vector<std::vector<std::vector<float>>>,
Aec3RenderQueueItemVerifier>* render_transfer_queue_;
};
EchoCanceller3::RenderWriter::RenderWriter(
ApmDataDumper* data_dumper,
const EchoCanceller3Config& config,
SwapQueue<std::vector<std::vector<std::vector<float>>>,
Aec3RenderQueueItemVerifier>* render_transfer_queue,
size_t num_bands,
size_t num_channels)
: data_dumper_(data_dumper),
num_bands_(num_bands),
num_channels_(num_channels),
render_queue_input_frame_(
num_bands_,
std::vector<std::vector<float>>(
num_channels_,
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
render_transfer_queue_(render_transfer_queue) {
RTC_DCHECK(data_dumper);
if (config.filter.high_pass_filter_echo_reference) {
high_pass_filter_ = std::make_unique<HighPassFilter>(16000, num_channels);
}
}
EchoCanceller3::RenderWriter::~RenderWriter() = default;
void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band());
RTC_DCHECK_EQ(num_bands_, input.num_bands());
RTC_DCHECK_EQ(num_channels_, input.num_channels());
// TODO(bugs.webrtc.org/8759) Temporary work-around.
if (num_bands_ != input.num_bands())
return;
data_dumper_->DumpWav("aec3_render_input", AudioBuffer::kSplitBandSize,
&input.split_bands_const(0)[0][0], 16000, 1);
CopyBufferIntoFrame(input, num_bands_, num_channels_,
&render_queue_input_frame_);
if (high_pass_filter_) {
high_pass_filter_->Process(&render_queue_input_frame_[0]);
}
static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_));
}
std::atomic<int> EchoCanceller3::instance_count_(0);
EchoCanceller3::EchoCanceller3(
const EchoCanceller3Config& config,
const absl::optional<EchoCanceller3Config>& multichannel_config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels)
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
config_(AdjustConfig(config)),
sample_rate_hz_(sample_rate_hz),
num_bands_(NumBandsForRate(sample_rate_hz_)),
num_render_input_channels_(num_render_channels),
num_capture_channels_(num_capture_channels),
config_selector_(AdjustConfig(config),
multichannel_config,
num_render_input_channels_),
multichannel_content_detector_(
config_selector_.active_config().multi_channel.detect_stereo_content,
num_render_input_channels_,
config_selector_.active_config()
.multi_channel.stereo_detection_threshold,
config_selector_.active_config()
.multi_channel.stereo_detection_timeout_threshold_seconds,
config_selector_.active_config()
.multi_channel.stereo_detection_hysteresis_seconds),
output_framer_(num_bands_, num_capture_channels_),
capture_blocker_(num_bands_, num_capture_channels_),
render_transfer_queue_(
kRenderTransferQueueSizeFrames,
std::vector<std::vector<std::vector<float>>>(
num_bands_,
std::vector<std::vector<float>>(
num_render_input_channels_,
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
Aec3RenderQueueItemVerifier(num_bands_,
num_render_input_channels_,
AudioBuffer::kSplitBandSize)),
render_queue_output_frame_(
num_bands_,
std::vector<std::vector<float>>(
num_render_input_channels_,
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
render_block_(num_bands_, num_render_input_channels_),
capture_block_(num_bands_, num_capture_channels_),
capture_sub_frame_view_(
num_bands_,
std::vector<rtc::ArrayView<float>>(num_capture_channels_)) {
RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
block_delay_buffer_.reset(new BlockDelayBuffer(
num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize,
config_.delay.fixed_capture_delay_samples));
}
render_writer_.reset(new RenderWriter(
data_dumper_.get(), config_selector_.active_config(),
&render_transfer_queue_, num_bands_, num_render_input_channels_));
RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000);
RTC_DCHECK_GE(kMaxNumBands, num_bands_);
if (config_selector_.active_config().filter.export_linear_aec_output) {
linear_output_framer_.reset(
new BlockFramer(/*num_bands=*/1, num_capture_channels_));
linear_output_block_ =
std::make_unique<Block>(/*num_bands=*/1, num_capture_channels_),
linear_output_sub_frame_view_ =
std::vector<std::vector<rtc::ArrayView<float>>>(
1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
}
Initialize();
RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_
<< " Hz, num render channels: " << num_render_input_channels_
<< ", num capture channels: " << num_capture_channels_;
}
EchoCanceller3::~EchoCanceller3() = default;
void EchoCanceller3::Initialize() {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
num_render_channels_to_aec_ =
multichannel_content_detector_.IsProperMultiChannelContentDetected()
? num_render_input_channels_
: 1;
config_selector_.Update(
multichannel_content_detector_.IsProperMultiChannelContentDetected());
render_block_.SetNumChannels(num_render_channels_to_aec_);
render_blocker_.reset(
new FrameBlocker(num_bands_, num_render_channels_to_aec_));
block_processor_.reset(BlockProcessor::Create(
config_selector_.active_config(), sample_rate_hz_,
num_render_channels_to_aec_, num_capture_channels_));
render_sub_frame_view_ = std::vector<std::vector<rtc::ArrayView<float>>>(
num_bands_,
std::vector<rtc::ArrayView<float>>(num_render_channels_to_aec_));
}
void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) {
RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_);
RTC_DCHECK_EQ(render.num_channels(), num_render_input_channels_);
data_dumper_->DumpRaw("aec3_call_order",
static_cast<int>(EchoCanceller3ApiCall::kRender));
return render_writer_->Insert(render);
}
void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
data_dumper_->DumpWav("aec3_capture_analyze_input", capture.num_frames(),
capture.channels_const()[0], sample_rate_hz_, 1);
saturated_microphone_signal_ = false;
for (size_t channel = 0; channel < capture.num_channels(); ++channel) {
saturated_microphone_signal_ |=
DetectSaturation(rtc::ArrayView<const float>(
capture.channels_const()[channel], capture.num_frames()));
if (saturated_microphone_signal_) {
break;
}
}
}
void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) {
ProcessCapture(capture, nullptr, level_change);
}
void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
AudioBuffer* linear_output,
bool level_change) {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
RTC_DCHECK(capture);
RTC_DCHECK_EQ(num_bands_, capture->num_bands());
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band());
RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_);
data_dumper_->DumpRaw("aec3_call_order",
static_cast<int>(EchoCanceller3ApiCall::kCapture));
if (linear_output && !linear_output_framer_) {
RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without "
"properly configuring AEC3.";
RTC_DCHECK_NOTREACHED();
}
// Report capture call in the metrics and periodically update API call
// metrics.
api_call_metrics_.ReportCaptureCall();
// Optionally delay the capture signal.
if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
RTC_DCHECK(block_delay_buffer_);
block_delay_buffer_->DelaySignal(capture);
}
rtc::ArrayView<float> capture_lower_band = rtc::ArrayView<float>(
&capture->split_bands(0)[0][0], AudioBuffer::kSplitBandSize);
data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, 16000, 1);
EmptyRenderQueue();
ProcessCaptureFrameContent(
linear_output, capture, level_change,
multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(),
saturated_microphone_signal_, 0, &capture_blocker_,
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
linear_output_block_.get(), &linear_output_sub_frame_view_,
&capture_block_, &capture_sub_frame_view_);
ProcessCaptureFrameContent(
linear_output, capture, level_change,
multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(),
saturated_microphone_signal_, 1, &capture_blocker_,
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
linear_output_block_.get(), &linear_output_sub_frame_view_,
&capture_block_, &capture_sub_frame_view_);
ProcessRemainingCaptureFrameContent(
level_change,
multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(),
saturated_microphone_signal_, &capture_blocker_,
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
linear_output_block_.get(), &capture_block_);
data_dumper_->DumpWav("aec3_capture_output", AudioBuffer::kSplitBandSize,
&capture->split_bands(0)[0][0], 16000, 1);
}
EchoControl::Metrics EchoCanceller3::GetMetrics() const {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
Metrics metrics;
block_processor_->GetMetrics(&metrics);
return metrics;
}
void EchoCanceller3::SetAudioBufferDelay(int delay_ms) {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
block_processor_->SetAudioBufferDelay(delay_ms);
}
void EchoCanceller3::SetCaptureOutputUsage(bool capture_output_used) {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
block_processor_->SetCaptureOutputUsage(capture_output_used);
}
bool EchoCanceller3::ActiveProcessing() const {
return true;
}
EchoCanceller3Config EchoCanceller3::CreateDefaultMultichannelConfig() {
EchoCanceller3Config cfg;
// Use shorter and more rapidly adapting coarse filter to compensate for
// thge increased number of total filter parameters to adapt.
cfg.filter.coarse.length_blocks = 11;
cfg.filter.coarse.rate = 0.95f;
cfg.filter.coarse_initial.length_blocks = 11;
cfg.filter.coarse_initial.rate = 0.95f;
// Use more concervative suppressor behavior for non-nearend speech.
cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
return cfg;
}
void EchoCanceller3::SetBlockProcessorForTesting(
std::unique_ptr<BlockProcessor> block_processor) {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
RTC_DCHECK(block_processor);
block_processor_ = std::move(block_processor);
}
void EchoCanceller3::EmptyRenderQueue() {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
bool frame_to_buffer =
render_transfer_queue_.Remove(&render_queue_output_frame_);
while (frame_to_buffer) {
// Report render call in the metrics.
api_call_metrics_.ReportRenderCall();
if (multichannel_content_detector_.UpdateDetection(
render_queue_output_frame_)) {
// Reinitialize the AEC when proper stereo is detected.
Initialize();
}
// Buffer frame content.
BufferRenderFrameContent(
/*proper_downmix_needed=*/multichannel_content_detector_
.IsTemporaryMultiChannelContentDetected(),
&render_queue_output_frame_, 0, render_blocker_.get(),
block_processor_.get(), &render_block_, &render_sub_frame_view_);
BufferRenderFrameContent(
/*proper_downmix_needed=*/multichannel_content_detector_
.IsTemporaryMultiChannelContentDetected(),
&render_queue_output_frame_, 1, render_blocker_.get(),
block_processor_.get(), &render_block_, &render_sub_frame_view_);
BufferRemainingRenderFrameContent(render_blocker_.get(),
block_processor_.get(), &render_block_);
frame_to_buffer =
render_transfer_queue_.Remove(&render_queue_output_frame_);
}
}
} // namespace webrtc

View File

@ -0,0 +1,230 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
#include <stddef.h>
#include <atomic>
#include <memory>
#include <vector>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/audio/echo_canceller3_config.h"
#include "api/audio/echo_control.h"
#include "modules/audio_processing/aec3/api_call_jitter_metrics.h"
#include "modules/audio_processing/aec3/block_delay_buffer.h"
#include "modules/audio_processing/aec3/block_framer.h"
#include "modules/audio_processing/aec3/block_processor.h"
#include "modules/audio_processing/aec3/config_selector.h"
#include "modules/audio_processing/aec3/frame_blocker.h"
#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/race_checker.h"
#include "rtc_base/swap_queue.h"
#include "rtc_base/thread_annotations.h"
namespace webrtc {
// Method for adjusting config parameter dependencies.
// Only to be used externally to AEC3 for testing purposes.
// TODO(webrtc:5298): Move this to a separate file.
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config);
// Functor for verifying the invariance of the frames being put into the render
// queue.
class Aec3RenderQueueItemVerifier {
public:
Aec3RenderQueueItemVerifier(size_t num_bands,
size_t num_channels,
size_t frame_length)
: num_bands_(num_bands),
num_channels_(num_channels),
frame_length_(frame_length) {}
bool operator()(const std::vector<std::vector<std::vector<float>>>& v) const {
if (v.size() != num_bands_) {
return false;
}
for (const auto& band : v) {
if (band.size() != num_channels_) {
return false;
}
for (const auto& channel : band) {
if (channel.size() != frame_length_) {
return false;
}
}
}
return true;
}
private:
const size_t num_bands_;
const size_t num_channels_;
const size_t frame_length_;
};
// Main class for the echo canceller3.
// It does 4 things:
// -Receives 10 ms frames of band-split audio.
// -Provides the lower level echo canceller functionality with
// blocks of 64 samples of audio data.
// -Partially handles the jitter in the render and capture API
// call sequence.
//
// The class is supposed to be used in a non-concurrent manner apart from the
// AnalyzeRender call which can be called concurrently with the other methods.
class EchoCanceller3 : public EchoControl {
public:
EchoCanceller3(
const EchoCanceller3Config& config,
const absl::optional<EchoCanceller3Config>& multichannel_config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels);
~EchoCanceller3() override;
EchoCanceller3(const EchoCanceller3&) = delete;
EchoCanceller3& operator=(const EchoCanceller3&) = delete;
// Analyzes and stores an internal copy of the split-band domain render
// signal.
void AnalyzeRender(AudioBuffer* render) override { AnalyzeRender(*render); }
// Analyzes the full-band domain capture signal to detect signal saturation.
void AnalyzeCapture(AudioBuffer* capture) override {
AnalyzeCapture(*capture);
}
// Processes the split-band domain capture signal in order to remove any echo
// present in the signal.
void ProcessCapture(AudioBuffer* capture, bool level_change) override;
// As above, but also returns the linear filter output.
void ProcessCapture(AudioBuffer* capture,
AudioBuffer* linear_output,
bool level_change) override;
// Collect current metrics from the echo canceller.
Metrics GetMetrics() const override;
// Provides an optional external estimate of the audio buffer delay.
void SetAudioBufferDelay(int delay_ms) override;
// Specifies whether the capture output will be used. The purpose of this is
// to allow the echo controller to deactivate some of the processing when the
// resulting output is anyway not used, for instance when the endpoint is
// muted.
void SetCaptureOutputUsage(bool capture_output_used) override;
bool ActiveProcessing() const override;
// Signals whether an external detector has detected echo leakage from the
// echo canceller.
// Note that in the case echo leakage has been flagged, it should be unflagged
// once it is no longer occurring.
void UpdateEchoLeakageStatus(bool leakage_detected) {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
block_processor_->UpdateEchoLeakageStatus(leakage_detected);
}
// Produces a default configuration for multichannel.
static EchoCanceller3Config CreateDefaultMultichannelConfig();
private:
friend class EchoCanceller3Tester;
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo);
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
DetectionOfProperStereoUsingThreshold);
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
DetectionOfProperStereoUsingHysteresis);
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
StereoContentDetectionForMonoSignals);
class RenderWriter;
// (Re-)Initializes the selected subset of the EchoCanceller3 fields, at
// creation as well as during reconfiguration.
void Initialize();
// Only for testing. Replaces the internal block processor.
void SetBlockProcessorForTesting(
std::unique_ptr<BlockProcessor> block_processor);
// Only for testing. Returns whether stereo processing is active.
bool StereoRenderProcessingActiveForTesting() const {
return multichannel_content_detector_.IsProperMultiChannelContentDetected();
}
// Only for testing.
const EchoCanceller3Config& GetActiveConfigForTesting() const {
return config_selector_.active_config();
}
// Empties the render SwapQueue.
void EmptyRenderQueue();
// Analyzes and stores an internal copy of the split-band domain render
// signal.
void AnalyzeRender(const AudioBuffer& render);
// Analyzes the full-band domain capture signal to detect signal saturation.
void AnalyzeCapture(const AudioBuffer& capture);
rtc::RaceChecker capture_race_checker_;
rtc::RaceChecker render_race_checker_;
// State that is accessed by the AnalyzeRender call.
std::unique_ptr<RenderWriter> render_writer_
RTC_GUARDED_BY(render_race_checker_);
// State that may be accessed by the capture thread.
static std::atomic<int> instance_count_;
std::unique_ptr<ApmDataDumper> data_dumper_;
const EchoCanceller3Config config_;
const int sample_rate_hz_;
const int num_bands_;
const size_t num_render_input_channels_;
size_t num_render_channels_to_aec_;
const size_t num_capture_channels_;
ConfigSelector config_selector_;
MultiChannelContentDetector multichannel_content_detector_;
std::unique_ptr<BlockFramer> linear_output_framer_
RTC_GUARDED_BY(capture_race_checker_);
BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_);
FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_);
std::unique_ptr<FrameBlocker> render_blocker_
RTC_GUARDED_BY(capture_race_checker_);
SwapQueue<std::vector<std::vector<std::vector<float>>>,
Aec3RenderQueueItemVerifier>
render_transfer_queue_;
std::unique_ptr<BlockProcessor> block_processor_
RTC_GUARDED_BY(capture_race_checker_);
std::vector<std::vector<std::vector<float>>> render_queue_output_frame_
RTC_GUARDED_BY(capture_race_checker_);
bool saturated_microphone_signal_ RTC_GUARDED_BY(capture_race_checker_) =
false;
Block render_block_ RTC_GUARDED_BY(capture_race_checker_);
std::unique_ptr<Block> linear_output_block_
RTC_GUARDED_BY(capture_race_checker_);
Block capture_block_ RTC_GUARDED_BY(capture_race_checker_);
std::vector<std::vector<rtc::ArrayView<float>>> render_sub_frame_view_
RTC_GUARDED_BY(capture_race_checker_);
std::vector<std::vector<rtc::ArrayView<float>>> linear_output_sub_frame_view_
RTC_GUARDED_BY(capture_race_checker_);
std::vector<std::vector<rtc::ArrayView<float>>> capture_sub_frame_view_
RTC_GUARDED_BY(capture_race_checker_);
std::unique_ptr<BlockDelayBuffer> block_delay_buffer_
RTC_GUARDED_BY(capture_race_checker_);
ApiCallJitterMetrics api_call_metrics_ RTC_GUARDED_BY(capture_race_checker_);
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_

View File

@ -0,0 +1,127 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/echo_path_delay_estimator.h"
#include <array>
#include "api/audio/echo_canceller3_config.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
namespace webrtc {
EchoPathDelayEstimator::EchoPathDelayEstimator(
ApmDataDumper* data_dumper,
const EchoCanceller3Config& config,
size_t num_capture_channels)
: data_dumper_(data_dumper),
down_sampling_factor_(config.delay.down_sampling_factor),
sub_block_size_(down_sampling_factor_ != 0
? kBlockSize / down_sampling_factor_
: kBlockSize),
capture_mixer_(num_capture_channels,
config.delay.capture_alignment_mixing),
capture_decimator_(down_sampling_factor_),
matched_filter_(
data_dumper_,
DetectOptimization(),
sub_block_size_,
kMatchedFilterWindowSizeSubBlocks,
config.delay.num_filters,
kMatchedFilterAlignmentShiftSizeSubBlocks,
config.delay.down_sampling_factor == 8
? config.render_levels.poor_excitation_render_limit_ds8
: config.render_levels.poor_excitation_render_limit,
config.delay.delay_estimate_smoothing,
config.delay.delay_estimate_smoothing_delay_found,
config.delay.delay_candidate_detection_threshold,
config.delay.detect_pre_echo),
matched_filter_lag_aggregator_(data_dumper_,
matched_filter_.GetMaxFilterLag(),
config.delay) {
RTC_DCHECK(data_dumper);
RTC_DCHECK(down_sampling_factor_ > 0);
}
EchoPathDelayEstimator::~EchoPathDelayEstimator() = default;
void EchoPathDelayEstimator::Reset(bool reset_delay_confidence) {
Reset(true, reset_delay_confidence);
}
absl::optional<DelayEstimate> EchoPathDelayEstimator::EstimateDelay(
const DownsampledRenderBuffer& render_buffer,
const Block& capture) {
std::array<float, kBlockSize> downsampled_capture_data;
rtc::ArrayView<float> downsampled_capture(downsampled_capture_data.data(),
sub_block_size_);
std::array<float, kBlockSize> downmixed_capture;
capture_mixer_.ProduceOutput(capture, downmixed_capture);
capture_decimator_.Decimate(downmixed_capture, downsampled_capture);
data_dumper_->DumpWav("aec3_capture_decimator_output",
downsampled_capture.size(), downsampled_capture.data(),
16000 / down_sampling_factor_, 1);
matched_filter_.Update(render_buffer, downsampled_capture,
matched_filter_lag_aggregator_.ReliableDelayFound());
absl::optional<DelayEstimate> aggregated_matched_filter_lag =
matched_filter_lag_aggregator_.Aggregate(
matched_filter_.GetBestLagEstimate());
// Run clockdrift detection.
if (aggregated_matched_filter_lag &&
(*aggregated_matched_filter_lag).quality ==
DelayEstimate::Quality::kRefined)
clockdrift_detector_.Update(
matched_filter_lag_aggregator_.GetDelayAtHighestPeak());
// TODO(peah): Move this logging outside of this class once EchoCanceller3
// development is done.
data_dumper_->DumpRaw(
"aec3_echo_path_delay_estimator_delay",
aggregated_matched_filter_lag
? static_cast<int>(aggregated_matched_filter_lag->delay *
down_sampling_factor_)
: -1);
// Return the detected delay in samples as the aggregated matched filter lag
// compensated by the down sampling factor for the signal being correlated.
if (aggregated_matched_filter_lag) {
aggregated_matched_filter_lag->delay *= down_sampling_factor_;
}
if (old_aggregated_lag_ && aggregated_matched_filter_lag &&
old_aggregated_lag_->delay == aggregated_matched_filter_lag->delay) {
++consistent_estimate_counter_;
} else {
consistent_estimate_counter_ = 0;
}
old_aggregated_lag_ = aggregated_matched_filter_lag;
constexpr size_t kNumBlocksPerSecondBy2 = kNumBlocksPerSecond / 2;
if (consistent_estimate_counter_ > kNumBlocksPerSecondBy2) {
Reset(false, false);
}
return aggregated_matched_filter_lag;
}
void EchoPathDelayEstimator::Reset(bool reset_lag_aggregator,
bool reset_delay_confidence) {
if (reset_lag_aggregator) {
matched_filter_lag_aggregator_.Reset(reset_delay_confidence);
}
matched_filter_.Reset(/*full_reset=*/reset_lag_aggregator);
old_aggregated_lag_ = absl::nullopt;
consistent_estimate_counter_ = 0;
}
} // namespace webrtc

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
#include <stddef.h>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "modules/audio_processing/aec3/alignment_mixer.h"
#include "modules/audio_processing/aec3/block.h"
#include "modules/audio_processing/aec3/clockdrift_detector.h"
#include "modules/audio_processing/aec3/decimator.h"
#include "modules/audio_processing/aec3/delay_estimate.h"
#include "modules/audio_processing/aec3/matched_filter.h"
#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h"
namespace webrtc {
class ApmDataDumper;
struct DownsampledRenderBuffer;
struct EchoCanceller3Config;
// Estimates the delay of the echo path.
class EchoPathDelayEstimator {
public:
EchoPathDelayEstimator(ApmDataDumper* data_dumper,
const EchoCanceller3Config& config,
size_t num_capture_channels);
~EchoPathDelayEstimator();
EchoPathDelayEstimator(const EchoPathDelayEstimator&) = delete;
EchoPathDelayEstimator& operator=(const EchoPathDelayEstimator&) = delete;
// Resets the estimation. If the delay confidence is reset, the reset behavior
// is as if the call is restarted.
void Reset(bool reset_delay_confidence);
// Produce a delay estimate if such is avaliable.
absl::optional<DelayEstimate> EstimateDelay(
const DownsampledRenderBuffer& render_buffer,
const Block& capture);
// Log delay estimator properties.
void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const {
matched_filter_.LogFilterProperties(sample_rate_hz, shift,
down_sampling_factor_);
}
// Returns the level of detected clockdrift.
ClockdriftDetector::Level Clockdrift() const {
return clockdrift_detector_.ClockdriftLevel();
}
private:
ApmDataDumper* const data_dumper_;
const size_t down_sampling_factor_;
const size_t sub_block_size_;
AlignmentMixer capture_mixer_;
Decimator capture_decimator_;
MatchedFilter matched_filter_;
MatchedFilterLagAggregator matched_filter_lag_aggregator_;
absl::optional<DelayEstimate> old_aggregated_lag_;
size_t consistent_estimate_counter_ = 0;
ClockdriftDetector clockdrift_detector_;
// Internal reset method with more granularity.
void Reset(bool reset_lag_aggregator, bool reset_delay_confidence);
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_

View File

@ -0,0 +1,22 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/echo_path_variability.h"
namespace webrtc {
EchoPathVariability::EchoPathVariability(bool gain_change,
DelayAdjustment delay_change,
bool clock_drift)
: gain_change(gain_change),
delay_change(delay_change),
clock_drift(clock_drift) {}
} // namespace webrtc

View File

@ -0,0 +1,33 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
namespace webrtc {
struct EchoPathVariability {
enum class DelayAdjustment { kNone, kBufferFlush, kNewDetectedDelay };
EchoPathVariability(bool gain_change,
DelayAdjustment delay_change,
bool clock_drift);
bool AudioPathChanged() const {
return gain_change || delay_change != DelayAdjustment::kNone;
}
bool gain_change;
DelayAdjustment delay_change;
bool clock_drift;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_

View File

@ -0,0 +1,521 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/echo_remover.h"
#include <math.h>
#include <stddef.h>
#include <algorithm>
#include <array>
#include <atomic>
#include <cmath>
#include <memory>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/aec3_fft.h"
#include "modules/audio_processing/aec3/aec_state.h"
#include "modules/audio_processing/aec3/comfort_noise_generator.h"
#include "modules/audio_processing/aec3/echo_path_variability.h"
#include "modules/audio_processing/aec3/echo_remover_metrics.h"
#include "modules/audio_processing/aec3/fft_data.h"
#include "modules/audio_processing/aec3/render_buffer.h"
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
#include "modules/audio_processing/aec3/residual_echo_estimator.h"
#include "modules/audio_processing/aec3/subtractor.h"
#include "modules/audio_processing/aec3/subtractor_output.h"
#include "modules/audio_processing/aec3/suppression_filter.h"
#include "modules/audio_processing/aec3/suppression_gain.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
namespace {
// Maximum number of channels for which the capture channel data is stored on
// the stack. If the number of channels are larger than this, they are stored
// using scratch memory that is pre-allocated on the heap. The reason for this
// partitioning is not to waste heap space for handling the more common numbers
// of channels, while at the same time not limiting the support for higher
// numbers of channels by enforcing the capture channel data to be stored on the
// stack using a fixed maximum value.
constexpr size_t kMaxNumChannelsOnStack = 2;
// Chooses the number of channels to store on the heap when that is required due
// to the number of capture channels being larger than the pre-defined number
// of channels to store on the stack.
size_t NumChannelsOnHeap(size_t num_capture_channels) {
return num_capture_channels > kMaxNumChannelsOnStack ? num_capture_channels
: 0;
}
void LinearEchoPower(const FftData& E,
const FftData& Y,
std::array<float, kFftLengthBy2Plus1>* S2) {
for (size_t k = 0; k < E.re.size(); ++k) {
(*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) +
(Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]);
}
}
// Fades between two input signals using a fix-sized transition.
void SignalTransition(rtc::ArrayView<const float> from,
rtc::ArrayView<const float> to,
rtc::ArrayView<float> out) {
if (from == to) {
RTC_DCHECK_EQ(to.size(), out.size());
std::copy(to.begin(), to.end(), out.begin());
} else {
constexpr size_t kTransitionSize = 30;
constexpr float kOneByTransitionSizePlusOne = 1.f / (kTransitionSize + 1);
RTC_DCHECK_EQ(from.size(), to.size());
RTC_DCHECK_EQ(from.size(), out.size());
RTC_DCHECK_LE(kTransitionSize, out.size());
for (size_t k = 0; k < kTransitionSize; ++k) {
float a = (k + 1) * kOneByTransitionSizePlusOne;
out[k] = a * to[k] + (1.f - a) * from[k];
}
std::copy(to.begin() + kTransitionSize, to.end(),
out.begin() + kTransitionSize);
}
}
// Computes a windowed (square root Hanning) padded FFT and updates the related
// memory.
void WindowedPaddedFft(const Aec3Fft& fft,
rtc::ArrayView<const float> v,
rtc::ArrayView<float> v_old,
FftData* V) {
fft.PaddedFft(v, v_old, Aec3Fft::Window::kSqrtHanning, V);
std::copy(v.begin(), v.end(), v_old.begin());
}
// Class for removing the echo from the capture signal.
class EchoRemoverImpl final : public EchoRemover {
public:
EchoRemoverImpl(const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels);
~EchoRemoverImpl() override;
EchoRemoverImpl(const EchoRemoverImpl&) = delete;
EchoRemoverImpl& operator=(const EchoRemoverImpl&) = delete;
void GetMetrics(EchoControl::Metrics* metrics) const override;
// Removes the echo from a block of samples from the capture signal. The
// supplied render signal is assumed to be pre-aligned with the capture
// signal.
void ProcessCapture(EchoPathVariability echo_path_variability,
bool capture_signal_saturation,
const absl::optional<DelayEstimate>& external_delay,
RenderBuffer* render_buffer,
Block* linear_output,
Block* capture) override;
// Updates the status on whether echo leakage is detected in the output of the
// echo remover.
void UpdateEchoLeakageStatus(bool leakage_detected) override {
echo_leakage_detected_ = leakage_detected;
}
void SetCaptureOutputUsage(bool capture_output_used) override {
capture_output_used_ = capture_output_used;
}
private:
// Selects which of the coarse and refined linear filter outputs that is most
// appropriate to pass to the suppressor and forms the linear filter output by
// smoothly transition between those.
void FormLinearFilterOutput(const SubtractorOutput& subtractor_output,
rtc::ArrayView<float> output);
static std::atomic<int> instance_count_;
const EchoCanceller3Config config_;
const Aec3Fft fft_;
std::unique_ptr<ApmDataDumper> data_dumper_;
const Aec3Optimization optimization_;
const int sample_rate_hz_;
const size_t num_render_channels_;
const size_t num_capture_channels_;
const bool use_coarse_filter_output_;
Subtractor subtractor_;
SuppressionGain suppression_gain_;
ComfortNoiseGenerator cng_;
SuppressionFilter suppression_filter_;
RenderSignalAnalyzer render_signal_analyzer_;
ResidualEchoEstimator residual_echo_estimator_;
bool echo_leakage_detected_ = false;
bool capture_output_used_ = true;
AecState aec_state_;
EchoRemoverMetrics metrics_;
std::vector<std::array<float, kFftLengthBy2>> e_old_;
std::vector<std::array<float, kFftLengthBy2>> y_old_;
size_t block_counter_ = 0;
int gain_change_hangover_ = 0;
bool refined_filter_output_last_selected_ = true;
std::vector<std::array<float, kFftLengthBy2>> e_heap_;
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_heap_;
std::vector<std::array<float, kFftLengthBy2Plus1>> E2_heap_;
std::vector<std::array<float, kFftLengthBy2Plus1>> R2_heap_;
std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded_heap_;
std::vector<std::array<float, kFftLengthBy2Plus1>> S2_linear_heap_;
std::vector<FftData> Y_heap_;
std::vector<FftData> E_heap_;
std::vector<FftData> comfort_noise_heap_;
std::vector<FftData> high_band_comfort_noise_heap_;
std::vector<SubtractorOutput> subtractor_output_heap_;
};
std::atomic<int> EchoRemoverImpl::instance_count_(0);
EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels)
: config_(config),
fft_(),
data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
optimization_(DetectOptimization()),
sample_rate_hz_(sample_rate_hz),
num_render_channels_(num_render_channels),
num_capture_channels_(num_capture_channels),
use_coarse_filter_output_(
config_.filter.enable_coarse_filter_output_usage),
subtractor_(config,
num_render_channels_,
num_capture_channels_,
data_dumper_.get(),
optimization_),
suppression_gain_(config_,
optimization_,
sample_rate_hz,
num_capture_channels),
cng_(config_, optimization_, num_capture_channels_),
suppression_filter_(optimization_,
sample_rate_hz_,
num_capture_channels_),
render_signal_analyzer_(config_),
residual_echo_estimator_(config_, num_render_channels),
aec_state_(config_, num_capture_channels_),
e_old_(num_capture_channels_, {0.f}),
y_old_(num_capture_channels_, {0.f}),
e_heap_(NumChannelsOnHeap(num_capture_channels_), {0.f}),
Y2_heap_(NumChannelsOnHeap(num_capture_channels_)),
E2_heap_(NumChannelsOnHeap(num_capture_channels_)),
R2_heap_(NumChannelsOnHeap(num_capture_channels_)),
R2_unbounded_heap_(NumChannelsOnHeap(num_capture_channels_)),
S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)),
Y_heap_(NumChannelsOnHeap(num_capture_channels_)),
E_heap_(NumChannelsOnHeap(num_capture_channels_)),
comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
high_band_comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
subtractor_output_heap_(NumChannelsOnHeap(num_capture_channels_)) {
RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
}
EchoRemoverImpl::~EchoRemoverImpl() = default;
void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const {
// Echo return loss (ERL) is inverted to go from gain to attenuation.
metrics->echo_return_loss = -10.0 * std::log10(aec_state_.ErlTimeDomain());
metrics->echo_return_loss_enhancement =
Log2TodB(aec_state_.FullBandErleLog2());
}
void EchoRemoverImpl::ProcessCapture(
EchoPathVariability echo_path_variability,
bool capture_signal_saturation,
const absl::optional<DelayEstimate>& external_delay,
RenderBuffer* render_buffer,
Block* linear_output,
Block* capture) {
++block_counter_;
const Block& x = render_buffer->GetBlock(0);
Block* y = capture;
RTC_DCHECK(render_buffer);
RTC_DCHECK(y);
RTC_DCHECK_EQ(x.NumBands(), NumBandsForRate(sample_rate_hz_));
RTC_DCHECK_EQ(y->NumBands(), NumBandsForRate(sample_rate_hz_));
RTC_DCHECK_EQ(x.NumChannels(), num_render_channels_);
RTC_DCHECK_EQ(y->NumChannels(), num_capture_channels_);
// Stack allocated data to use when the number of channels is low.
std::array<std::array<float, kFftLengthBy2>, kMaxNumChannelsOnStack> e_stack;
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
Y2_stack;
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
E2_stack;
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
R2_stack;
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
R2_unbounded_stack;
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
S2_linear_stack;
std::array<FftData, kMaxNumChannelsOnStack> Y_stack;
std::array<FftData, kMaxNumChannelsOnStack> E_stack;
std::array<FftData, kMaxNumChannelsOnStack> comfort_noise_stack;
std::array<FftData, kMaxNumChannelsOnStack> high_band_comfort_noise_stack;
std::array<SubtractorOutput, kMaxNumChannelsOnStack> subtractor_output_stack;
rtc::ArrayView<std::array<float, kFftLengthBy2>> e(e_stack.data(),
num_capture_channels_);
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2(
Y2_stack.data(), num_capture_channels_);
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2(
E2_stack.data(), num_capture_channels_);
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2(
R2_stack.data(), num_capture_channels_);
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded(
R2_unbounded_stack.data(), num_capture_channels_);
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> S2_linear(
S2_linear_stack.data(), num_capture_channels_);
rtc::ArrayView<FftData> Y(Y_stack.data(), num_capture_channels_);
rtc::ArrayView<FftData> E(E_stack.data(), num_capture_channels_);
rtc::ArrayView<FftData> comfort_noise(comfort_noise_stack.data(),
num_capture_channels_);
rtc::ArrayView<FftData> high_band_comfort_noise(
high_band_comfort_noise_stack.data(), num_capture_channels_);
rtc::ArrayView<SubtractorOutput> subtractor_output(
subtractor_output_stack.data(), num_capture_channels_);
if (NumChannelsOnHeap(num_capture_channels_) > 0) {
// If the stack-allocated space is too small, use the heap for storing the
// microphone data.
e = rtc::ArrayView<std::array<float, kFftLengthBy2>>(e_heap_.data(),
num_capture_channels_);
Y2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
Y2_heap_.data(), num_capture_channels_);
E2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
E2_heap_.data(), num_capture_channels_);
R2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
R2_heap_.data(), num_capture_channels_);
R2_unbounded = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
R2_unbounded_heap_.data(), num_capture_channels_);
S2_linear = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
S2_linear_heap_.data(), num_capture_channels_);
Y = rtc::ArrayView<FftData>(Y_heap_.data(), num_capture_channels_);
E = rtc::ArrayView<FftData>(E_heap_.data(), num_capture_channels_);
comfort_noise = rtc::ArrayView<FftData>(comfort_noise_heap_.data(),
num_capture_channels_);
high_band_comfort_noise = rtc::ArrayView<FftData>(
high_band_comfort_noise_heap_.data(), num_capture_channels_);
subtractor_output = rtc::ArrayView<SubtractorOutput>(
subtractor_output_heap_.data(), num_capture_channels_);
}
data_dumper_->DumpWav("aec3_echo_remover_capture_input",
y->View(/*band=*/0, /*channel=*/0), 16000, 1);
data_dumper_->DumpWav("aec3_echo_remover_render_input",
x.View(/*band=*/0, /*channel=*/0), 16000, 1);
data_dumper_->DumpRaw("aec3_echo_remover_capture_input",
y->View(/*band=*/0, /*channel=*/0));
data_dumper_->DumpRaw("aec3_echo_remover_render_input",
x.View(/*band=*/0, /*channel=*/0));
aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
if (echo_path_variability.AudioPathChanged()) {
// Ensure that the gain change is only acted on once per frame.
if (echo_path_variability.gain_change) {
if (gain_change_hangover_ == 0) {
constexpr int kMaxBlocksPerFrame = 3;
gain_change_hangover_ = kMaxBlocksPerFrame;
rtc::LoggingSeverity log_level =
config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING
: rtc::LS_VERBOSE;
RTC_LOG_V(log_level)
<< "Gain change detected at block " << block_counter_;
} else {
echo_path_variability.gain_change = false;
}
}
subtractor_.HandleEchoPathChange(echo_path_variability);
aec_state_.HandleEchoPathChange(echo_path_variability);
if (echo_path_variability.delay_change !=
EchoPathVariability::DelayAdjustment::kNone) {
suppression_gain_.SetInitialState(true);
}
}
if (gain_change_hangover_ > 0) {
--gain_change_hangover_;
}
// Analyze the render signal.
render_signal_analyzer_.Update(*render_buffer,
aec_state_.MinDirectPathFilterDelay());
// State transition.
if (aec_state_.TransitionTriggered()) {
subtractor_.ExitInitialState();
suppression_gain_.SetInitialState(false);
}
// Perform linear echo cancellation.
subtractor_.Process(*render_buffer, *y, render_signal_analyzer_, aec_state_,
subtractor_output);
// Compute spectra.
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
FormLinearFilterOutput(subtractor_output[ch], e[ch]);
WindowedPaddedFft(fft_, y->View(/*band=*/0, ch), y_old_[ch], &Y[ch]);
WindowedPaddedFft(fft_, e[ch], e_old_[ch], &E[ch]);
LinearEchoPower(E[ch], Y[ch], &S2_linear[ch]);
Y[ch].Spectrum(optimization_, Y2[ch]);
E[ch].Spectrum(optimization_, E2[ch]);
}
// Optionally return the linear filter output.
if (linear_output) {
RTC_DCHECK_GE(1, linear_output->NumBands());
RTC_DCHECK_EQ(num_capture_channels_, linear_output->NumChannels());
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::copy(e[ch].begin(), e[ch].end(),
linear_output->begin(/*band=*/0, ch));
}
}
// Update the AEC state information.
aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponses(),
subtractor_.FilterImpulseResponses(), *render_buffer, E2,
Y2, subtractor_output);
// Choose the linear output.
const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y;
data_dumper_->DumpWav("aec3_output_linear",
y->View(/*band=*/0, /*channel=*/0), 16000, 1);
data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1);
// Estimate the comfort noise.
cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise,
high_band_comfort_noise);
// Only do the below processing if the output of the audio processing module
// is used.
std::array<float, kFftLengthBy2Plus1> G;
if (capture_output_used_) {
// Estimate the residual echo power.
residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
suppression_gain_.IsDominantNearend(), R2,
R2_unbounded);
// Suppressor nearend estimate.
if (aec_state_.UsableLinearEstimate()) {
// E2 is bound by Y2.
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(),
E2[ch].begin(),
[](float a, float b) { return std::min(a, b); });
}
}
const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2;
// Suppressor echo estimate.
const auto& echo_spectrum =
aec_state_.UsableLinearEstimate() ? S2_linear : R2;
// Determine if the suppressor should assume clock drift.
const bool clock_drift = config_.echo_removal_control.has_clock_drift ||
echo_path_variability.clock_drift;
// Compute preferred gains.
float high_bands_gain;
suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2, R2_unbounded,
cng_.NoiseSpectrum(), render_signal_analyzer_,
aec_state_, x, clock_drift, &high_bands_gain, &G);
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
high_bands_gain, Y_fft, y);
} else {
G.fill(0.f);
}
// Update the metrics.
metrics_.Update(aec_state_, cng_.NoiseSpectrum()[0], G);
// Debug outputs for the purpose of development and analysis.
data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize,
&subtractor_output[0].s_refined[0], 16000, 1);
data_dumper_->DumpRaw("aec3_output", y->View(/*band=*/0, /*channel=*/0));
data_dumper_->DumpRaw("aec3_narrow_render",
render_signal_analyzer_.NarrowPeakBand() ? 1 : 0);
data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()[0]);
data_dumper_->DumpRaw("aec3_suppressor_gain", G);
data_dumper_->DumpWav("aec3_output", y->View(/*band=*/0, /*channel=*/0),
16000, 1);
data_dumper_->DumpRaw("aec3_using_subtractor_output[0]",
aec_state_.UseLinearFilterOutput() ? 1 : 0);
data_dumper_->DumpRaw("aec3_E2", E2[0]);
data_dumper_->DumpRaw("aec3_S2_linear", S2_linear[0]);
data_dumper_->DumpRaw("aec3_Y2", Y2[0]);
data_dumper_->DumpRaw(
"aec3_X2", render_buffer->Spectrum(
aec_state_.MinDirectPathFilterDelay())[/*channel=*/0]);
data_dumper_->DumpRaw("aec3_R2", R2[0]);
data_dumper_->DumpRaw("aec3_filter_delay",
aec_state_.MinDirectPathFilterDelay());
data_dumper_->DumpRaw("aec3_capture_saturation",
aec_state_.SaturatedCapture() ? 1 : 0);
}
void EchoRemoverImpl::FormLinearFilterOutput(
const SubtractorOutput& subtractor_output,
rtc::ArrayView<float> output) {
RTC_DCHECK_EQ(subtractor_output.e_refined.size(), output.size());
RTC_DCHECK_EQ(subtractor_output.e_coarse.size(), output.size());
bool use_refined_output = true;
if (use_coarse_filter_output_) {
// As the output of the refined adaptive filter generally should be better
// than the coarse filter output, add a margin and threshold for when
// choosing the coarse filter output.
if (subtractor_output.e2_coarse < 0.9f * subtractor_output.e2_refined &&
subtractor_output.y2 > 30.f * 30.f * kBlockSize &&
(subtractor_output.s2_refined > 60.f * 60.f * kBlockSize ||
subtractor_output.s2_coarse > 60.f * 60.f * kBlockSize)) {
use_refined_output = false;
} else {
// If the refined filter is diverged, choose the filter output that has
// the lowest power.
if (subtractor_output.e2_coarse < subtractor_output.e2_refined &&
subtractor_output.y2 < subtractor_output.e2_refined) {
use_refined_output = false;
}
}
}
SignalTransition(refined_filter_output_last_selected_
? subtractor_output.e_refined
: subtractor_output.e_coarse,
use_refined_output ? subtractor_output.e_refined
: subtractor_output.e_coarse,
output);
refined_filter_output_last_selected_ = use_refined_output;
}
} // namespace
EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels) {
return new EchoRemoverImpl(config, sample_rate_hz, num_render_channels,
num_capture_channels);
}
} // namespace webrtc

View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
#include <vector>
#include "absl/types/optional.h"
#include "api/audio/echo_canceller3_config.h"
#include "api/audio/echo_control.h"
#include "modules/audio_processing/aec3/block.h"
#include "modules/audio_processing/aec3/delay_estimate.h"
#include "modules/audio_processing/aec3/echo_path_variability.h"
#include "modules/audio_processing/aec3/render_buffer.h"
namespace webrtc {
// Class for removing the echo from the capture signal.
class EchoRemover {
public:
static EchoRemover* Create(const EchoCanceller3Config& config,
int sample_rate_hz,
size_t num_render_channels,
size_t num_capture_channels);
virtual ~EchoRemover() = default;
// Get current metrics.
virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0;
// Removes the echo from a block of samples from the capture signal. The
// supplied render signal is assumed to be pre-aligned with the capture
// signal.
virtual void ProcessCapture(
EchoPathVariability echo_path_variability,
bool capture_signal_saturation,
const absl::optional<DelayEstimate>& external_delay,
RenderBuffer* render_buffer,
Block* linear_output,
Block* capture) = 0;
// Updates the status on whether echo leakage is detected in the output of the
// echo remover.
virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
// Specifies whether the capture output will be used. The purpose of this is
// to allow the echo remover to deactivate some of the processing when the
// resulting output is anyway not used, for instance when the endpoint is
// muted.
virtual void SetCaptureOutputUsage(bool capture_output_used) = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_

View File

@ -0,0 +1,165 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/echo_remover_metrics.h"
#include <math.h>
#include <stddef.h>
#include <algorithm>
#include <cmath>
#include <numeric>
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"
#include "system_wrappers/include/metrics.h"
#ifdef max
#undef max
#endif
#ifdef min
#undef min
#endif
namespace webrtc {
EchoRemoverMetrics::DbMetric::DbMetric() : DbMetric(0.f, 0.f, 0.f) {}
EchoRemoverMetrics::DbMetric::DbMetric(float sum_value,
float floor_value,
float ceil_value)
: sum_value(sum_value), floor_value(floor_value), ceil_value(ceil_value) {}
void EchoRemoverMetrics::DbMetric::Update(float value) {
sum_value += value;
floor_value = std::min(floor_value, value);
ceil_value = std::max(ceil_value, value);
}
void EchoRemoverMetrics::DbMetric::UpdateInstant(float value) {
sum_value = value;
floor_value = std::min(floor_value, value);
ceil_value = std::max(ceil_value, value);
}
EchoRemoverMetrics::EchoRemoverMetrics() {
ResetMetrics();
}
void EchoRemoverMetrics::ResetMetrics() {
erl_time_domain_ = DbMetric(0.f, 10000.f, 0.000f);
erle_time_domain_ = DbMetric(0.f, 0.f, 1000.f);
saturated_capture_ = false;
}
void EchoRemoverMetrics::Update(
const AecState& aec_state,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
const std::array<float, kFftLengthBy2Plus1>& suppressor_gain) {
metrics_reported_ = false;
if (++block_counter_ <= kMetricsCollectionBlocks) {
erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain());
erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2());
saturated_capture_ = saturated_capture_ || aec_state.SaturatedCapture();
} else {
// Report the metrics over several frames in order to lower the impact of
// the logarithms involved on the computational complexity.
switch (block_counter_) {
case kMetricsCollectionBlocks + 1:
RTC_HISTOGRAM_BOOLEAN(
"WebRTC.Audio.EchoCanceller.UsableLinearEstimate",
static_cast<int>(aec_state.UsableLinearEstimate() ? 1 : 0));
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay",
aec_state.MinDirectPathFilterDelay(), 0, 30,
31);
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation",
static_cast<int>(saturated_capture_ ? 1 : 0));
break;
case kMetricsCollectionBlocks + 2:
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.Erl.Value",
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
erl_time_domain_.sum_value),
0, 59, 30);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.Erl.Max",
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
erl_time_domain_.ceil_value),
0, 59, 30);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.Erl.Min",
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
erl_time_domain_.floor_value),
0, 59, 30);
break;
case kMetricsCollectionBlocks + 3:
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.Erle.Value",
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
erle_time_domain_.sum_value),
0, 19, 20);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.Erle.Max",
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
erle_time_domain_.ceil_value),
0, 19, 20);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.EchoCanceller.Erle.Min",
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
erle_time_domain_.floor_value),
0, 19, 20);
metrics_reported_ = true;
RTC_DCHECK_EQ(kMetricsReportingIntervalBlocks, block_counter_);
block_counter_ = 0;
ResetMetrics();
break;
default:
RTC_DCHECK_NOTREACHED();
break;
}
}
}
namespace aec3 {
void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value,
std::array<EchoRemoverMetrics::DbMetric, 2>* statistic) {
RTC_DCHECK(statistic);
// Truncation is intended in the band width computation.
constexpr int kNumBands = 2;
constexpr int kBandWidth = 65 / kNumBands;
constexpr float kOneByBandWidth = 1.f / kBandWidth;
RTC_DCHECK_EQ(kNumBands, statistic->size());
RTC_DCHECK_EQ(65, value.size());
for (size_t k = 0; k < statistic->size(); ++k) {
float average_band =
std::accumulate(value.begin() + kBandWidth * k,
value.begin() + kBandWidth * (k + 1), 0.f) *
kOneByBandWidth;
(*statistic)[k].Update(average_band);
}
}
int TransformDbMetricForReporting(bool negate,
float min_value,
float max_value,
float offset,
float scaling,
float value) {
float new_value = 10.f * std::log10(value * scaling + 1e-10f) + offset;
if (negate) {
new_value = -new_value;
}
return static_cast<int>(rtc::SafeClamp(new_value, min_value, max_value));
}
} // namespace aec3
} // namespace webrtc

View File

@ -0,0 +1,78 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
#include <array>
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/aec_state.h"
namespace webrtc {
// Handles the reporting of metrics for the echo remover.
class EchoRemoverMetrics {
public:
struct DbMetric {
DbMetric();
DbMetric(float sum_value, float floor_value, float ceil_value);
void Update(float value);
void UpdateInstant(float value);
float sum_value;
float floor_value;
float ceil_value;
};
EchoRemoverMetrics();
EchoRemoverMetrics(const EchoRemoverMetrics&) = delete;
EchoRemoverMetrics& operator=(const EchoRemoverMetrics&) = delete;
// Updates the metric with new data.
void Update(
const AecState& aec_state,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
const std::array<float, kFftLengthBy2Plus1>& suppressor_gain);
// Returns true if the metrics have just been reported, otherwise false.
bool MetricsReported() { return metrics_reported_; }
private:
// Resets the metrics.
void ResetMetrics();
int block_counter_ = 0;
DbMetric erl_time_domain_;
DbMetric erle_time_domain_;
bool saturated_capture_ = false;
bool metrics_reported_ = false;
};
namespace aec3 {
// Updates a banded metric of type DbMetric with the values in the supplied
// array.
void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value,
std::array<EchoRemoverMetrics::DbMetric, 2>* statistic);
// Transforms a DbMetric from the linear domain into the logarithmic domain.
int TransformDbMetricForReporting(bool negate,
float min_value,
float max_value,
float offset,
float scaling,
float value);
} // namespace aec3
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_

View File

@ -0,0 +1,146 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/erl_estimator.h"
#include <algorithm>
#include <numeric>
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
constexpr float kMinErl = 0.01f;
constexpr float kMaxErl = 1000.f;
} // namespace
ErlEstimator::ErlEstimator(size_t startup_phase_length_blocks_)
: startup_phase_length_blocks__(startup_phase_length_blocks_) {
erl_.fill(kMaxErl);
hold_counters_.fill(0);
erl_time_domain_ = kMaxErl;
hold_counter_time_domain_ = 0;
}
ErlEstimator::~ErlEstimator() = default;
void ErlEstimator::Reset() {
blocks_since_reset_ = 0;
}
void ErlEstimator::Update(
const std::vector<bool>& converged_filters,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> render_spectra,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
capture_spectra) {
const size_t num_capture_channels = converged_filters.size();
RTC_DCHECK_EQ(capture_spectra.size(), num_capture_channels);
// Corresponds to WGN of power -46 dBFS.
constexpr float kX2Min = 44015068.0f;
const auto first_converged_iter =
std::find(converged_filters.begin(), converged_filters.end(), true);
const bool any_filter_converged =
first_converged_iter != converged_filters.end();
if (++blocks_since_reset_ < startup_phase_length_blocks__ ||
!any_filter_converged) {
return;
}
// Use the maximum spectrum across capture and the maximum across render.
std::array<float, kFftLengthBy2Plus1> max_capture_spectrum_data;
std::array<float, kFftLengthBy2Plus1> max_capture_spectrum =
capture_spectra[/*channel=*/0];
if (num_capture_channels > 1) {
// Initialize using the first channel with a converged filter.
const size_t first_converged =
std::distance(converged_filters.begin(), first_converged_iter);
RTC_DCHECK_GE(first_converged, 0);
RTC_DCHECK_LT(first_converged, num_capture_channels);
max_capture_spectrum_data = capture_spectra[first_converged];
for (size_t ch = first_converged + 1; ch < num_capture_channels; ++ch) {
if (!converged_filters[ch]) {
continue;
}
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
max_capture_spectrum_data[k] =
std::max(max_capture_spectrum_data[k], capture_spectra[ch][k]);
}
}
max_capture_spectrum = max_capture_spectrum_data;
}
const size_t num_render_channels = render_spectra.size();
std::array<float, kFftLengthBy2Plus1> max_render_spectrum_data;
rtc::ArrayView<const float, kFftLengthBy2Plus1> max_render_spectrum =
render_spectra[/*channel=*/0];
if (num_render_channels > 1) {
std::copy(render_spectra[0].begin(), render_spectra[0].end(),
max_render_spectrum_data.begin());
for (size_t ch = 1; ch < num_render_channels; ++ch) {
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
max_render_spectrum_data[k] =
std::max(max_render_spectrum_data[k], render_spectra[ch][k]);
}
}
max_render_spectrum = max_render_spectrum_data;
}
const auto& X2 = max_render_spectrum;
const auto& Y2 = max_capture_spectrum;
// Update the estimates in a maximum statistics manner.
for (size_t k = 1; k < kFftLengthBy2; ++k) {
if (X2[k] > kX2Min) {
const float new_erl = Y2[k] / X2[k];
if (new_erl < erl_[k]) {
hold_counters_[k - 1] = 1000;
erl_[k] += 0.1f * (new_erl - erl_[k]);
erl_[k] = std::max(erl_[k], kMinErl);
}
}
}
std::for_each(hold_counters_.begin(), hold_counters_.end(),
[](int& a) { --a; });
std::transform(hold_counters_.begin(), hold_counters_.end(), erl_.begin() + 1,
erl_.begin() + 1, [](int a, float b) {
return a > 0 ? b : std::min(kMaxErl, 2.f * b);
});
erl_[0] = erl_[1];
erl_[kFftLengthBy2] = erl_[kFftLengthBy2 - 1];
// Compute ERL over all frequency bins.
const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f);
if (X2_sum > kX2Min * X2.size()) {
const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f);
const float new_erl = Y2_sum / X2_sum;
if (new_erl < erl_time_domain_) {
hold_counter_time_domain_ = 1000;
erl_time_domain_ += 0.1f * (new_erl - erl_time_domain_);
erl_time_domain_ = std::max(erl_time_domain_, kMinErl);
}
}
--hold_counter_time_domain_;
erl_time_domain_ = (hold_counter_time_domain_ > 0)
? erl_time_domain_
: std::min(kMaxErl, 2.f * erl_time_domain_);
}
} // namespace webrtc

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
#include <stddef.h>
#include <array>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
namespace webrtc {
// Estimates the echo return loss based on the signal spectra.
class ErlEstimator {
public:
explicit ErlEstimator(size_t startup_phase_length_blocks_);
~ErlEstimator();
ErlEstimator(const ErlEstimator&) = delete;
ErlEstimator& operator=(const ErlEstimator&) = delete;
// Resets the ERL estimation.
void Reset();
// Updates the ERL estimate.
void Update(const std::vector<bool>& converged_filters,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
render_spectra,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
capture_spectra);
// Returns the most recent ERL estimate.
const std::array<float, kFftLengthBy2Plus1>& Erl() const { return erl_; }
float ErlTimeDomain() const { return erl_time_domain_; }
private:
const size_t startup_phase_length_blocks__;
std::array<float, kFftLengthBy2Plus1> erl_;
std::array<int, kFftLengthBy2Minus1> hold_counters_;
float erl_time_domain_;
int hold_counter_time_domain_;
size_t blocks_since_reset_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_

View File

@ -0,0 +1,89 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/erle_estimator.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "rtc_base/checks.h"
namespace webrtc {
ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks,
const EchoCanceller3Config& config,
size_t num_capture_channels)
: startup_phase_length_blocks_(startup_phase_length_blocks),
fullband_erle_estimator_(config.erle, num_capture_channels),
subband_erle_estimator_(config, num_capture_channels) {
if (config.erle.num_sections > 1) {
signal_dependent_erle_estimator_ =
std::make_unique<SignalDependentErleEstimator>(config,
num_capture_channels);
}
Reset(true);
}
ErleEstimator::~ErleEstimator() = default;
void ErleEstimator::Reset(bool delay_change) {
fullband_erle_estimator_.Reset();
subband_erle_estimator_.Reset();
if (signal_dependent_erle_estimator_) {
signal_dependent_erle_estimator_->Reset();
}
if (delay_change) {
blocks_since_reset_ = 0;
}
}
void ErleEstimator::Update(
const RenderBuffer& render_buffer,
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
filter_frequency_responses,
rtc::ArrayView<const float, kFftLengthBy2Plus1>
avg_render_spectrum_with_reverb,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> capture_spectra,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
subtractor_spectra,
const std::vector<bool>& converged_filters) {
RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(),
capture_spectra.size());
RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(),
subtractor_spectra.size());
const auto& X2_reverb = avg_render_spectrum_with_reverb;
const auto& Y2 = capture_spectra;
const auto& E2 = subtractor_spectra;
if (++blocks_since_reset_ < startup_phase_length_blocks_) {
return;
}
subband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
if (signal_dependent_erle_estimator_) {
signal_dependent_erle_estimator_->Update(
render_buffer, filter_frequency_responses, X2_reverb, Y2, E2,
subband_erle_estimator_.Erle(/*onset_compensated=*/false),
subband_erle_estimator_.Erle(/*onset_compensated=*/true),
converged_filters);
}
fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
}
void ErleEstimator::Dump(
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
fullband_erle_estimator_.Dump(data_dumper);
subband_erle_estimator_.Dump(data_dumper);
if (signal_dependent_erle_estimator_) {
signal_dependent_erle_estimator_->Dump(data_dumper);
}
}
} // namespace webrtc

View File

@ -0,0 +1,112 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
#include <stddef.h>
#include <array>
#include <memory>
#include <vector>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/audio/echo_canceller3_config.h"
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/fullband_erle_estimator.h"
#include "modules/audio_processing/aec3/render_buffer.h"
#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h"
#include "modules/audio_processing/aec3/subband_erle_estimator.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
namespace webrtc {
// Estimates the echo return loss enhancement. One estimate is done per subband
// and another one is done using the aggreation of energy over all the subbands.
class ErleEstimator {
public:
ErleEstimator(size_t startup_phase_length_blocks,
const EchoCanceller3Config& config,
size_t num_capture_channels);
~ErleEstimator();
// Resets the fullband ERLE estimator and the subbands ERLE estimators.
void Reset(bool delay_change);
// Updates the ERLE estimates.
void Update(
const RenderBuffer& render_buffer,
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
filter_frequency_responses,
rtc::ArrayView<const float, kFftLengthBy2Plus1>
avg_render_spectrum_with_reverb,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
capture_spectra,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
subtractor_spectra,
const std::vector<bool>& converged_filters);
// Returns the most recent subband ERLE estimates.
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
bool onset_compensated) const {
return signal_dependent_erle_estimator_
? signal_dependent_erle_estimator_->Erle(onset_compensated)
: subband_erle_estimator_.Erle(onset_compensated);
}
// Returns the non-capped subband ERLE.
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
const {
// Unbounded ERLE is only used with the subband erle estimator where the
// ERLE is often capped at low values. When the signal dependent ERLE
// estimator is used the capped ERLE is returned.
return !signal_dependent_erle_estimator_
? subband_erle_estimator_.ErleUnbounded()
: signal_dependent_erle_estimator_->Erle(
/*onset_compensated=*/false);
}
// Returns the subband ERLE that are estimated during onsets (only used for
// testing).
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets()
const {
return subband_erle_estimator_.ErleDuringOnsets();
}
// Returns the fullband ERLE estimate.
float FullbandErleLog2() const {
return fullband_erle_estimator_.FullbandErleLog2();
}
// Returns an estimation of the current linear filter quality based on the
// current and past fullband ERLE estimates. The returned value is a float
// vector with content between 0 and 1 where 1 indicates that, at this current
// time instant, the linear filter is reaching its maximum subtraction
// performance.
rtc::ArrayView<const absl::optional<float>> GetInstLinearQualityEstimates()
const {
return fullband_erle_estimator_.GetInstLinearQualityEstimates();
}
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
private:
const size_t startup_phase_length_blocks_;
FullBandErleEstimator fullband_erle_estimator_;
SubbandErleEstimator subband_erle_estimator_;
std::unique_ptr<SignalDependentErleEstimator>
signal_dependent_erle_estimator_;
size_t blocks_since_reset_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_

View File

@ -0,0 +1,27 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/fft_buffer.h"
namespace webrtc {
FftBuffer::FftBuffer(size_t size, size_t num_channels)
: size(static_cast<int>(size)),
buffer(size, std::vector<FftData>(num_channels)) {
for (auto& block : buffer) {
for (auto& channel_fft_data : block) {
channel_fft_data.Clear();
}
}
}
FftBuffer::~FftBuffer() = default;
} // namespace webrtc

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
#define MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
#include <stddef.h>
#include <vector>
#include "modules/audio_processing/aec3/fft_data.h"
#include "rtc_base/checks.h"
namespace webrtc {
// Struct for bundling a circular buffer of FftData objects together with the
// read and write indices.
struct FftBuffer {
FftBuffer(size_t size, size_t num_channels);
~FftBuffer();
int IncIndex(int index) const {
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
return index < size - 1 ? index + 1 : 0;
}
int DecIndex(int index) const {
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
return index > 0 ? index - 1 : size - 1;
}
int OffsetIndex(int index, int offset) const {
RTC_DCHECK_GE(buffer.size(), offset);
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
return (size + index + offset) % size;
}
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
void IncWriteIndex() { write = IncIndex(write); }
void DecWriteIndex() { write = DecIndex(write); }
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
void IncReadIndex() { read = IncIndex(read); }
void DecReadIndex() { read = DecIndex(read); }
const int size;
std::vector<std::vector<FftData>> buffer;
int write = 0;
int read = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_

View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
#define MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
#include "rtc_base/system/arch.h"
#if defined(WEBRTC_ARCH_X86_FAMILY)
#include <emmintrin.h>
#endif
#include <algorithm>
#include <array>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
namespace webrtc {
// Struct that holds imaginary data produced from 128 point real-valued FFTs.
struct FftData {
// Copies the data in src.
void Assign(const FftData& src) {
std::copy(src.re.begin(), src.re.end(), re.begin());
std::copy(src.im.begin(), src.im.end(), im.begin());
im[0] = im[kFftLengthBy2] = 0;
}
// Clears all the imaginary.
void Clear() {
re.fill(0.f);
im.fill(0.f);
}
// Computes the power spectrum of the data.
void SpectrumAVX2(rtc::ArrayView<float> power_spectrum) const;
// Computes the power spectrum of the data.
void Spectrum(Aec3Optimization optimization,
rtc::ArrayView<float> power_spectrum) const {
RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size());
switch (optimization) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2: {
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
constexpr int kLimit = kNumFourBinBands * 4;
for (size_t k = 0; k < kLimit; k += 4) {
const __m128 r = _mm_loadu_ps(&re[k]);
const __m128 i = _mm_loadu_ps(&im[k]);
const __m128 ii = _mm_mul_ps(i, i);
const __m128 rr = _mm_mul_ps(r, r);
const __m128 rrii = _mm_add_ps(rr, ii);
_mm_storeu_ps(&power_spectrum[k], rrii);
}
power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] +
im[kFftLengthBy2] * im[kFftLengthBy2];
} break;
case Aec3Optimization::kAvx2:
SpectrumAVX2(power_spectrum);
break;
#endif
default:
std::transform(re.begin(), re.end(), im.begin(), power_spectrum.begin(),
[](float a, float b) { return a * a + b * b; });
}
}
// Copy the data from an interleaved array.
void CopyFromPackedArray(const std::array<float, kFftLength>& v) {
re[0] = v[0];
re[kFftLengthBy2] = v[1];
im[0] = im[kFftLengthBy2] = 0;
for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) {
re[k] = v[j++];
im[k] = v[j++];
}
}
// Copies the data into an interleaved array.
void CopyToPackedArray(std::array<float, kFftLength>* v) const {
RTC_DCHECK(v);
(*v)[0] = re[0];
(*v)[1] = re[kFftLengthBy2];
for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) {
(*v)[j++] = re[k];
(*v)[j++] = im[k];
}
}
std::array<float, kFftLengthBy2Plus1> re;
std::array<float, kFftLengthBy2Plus1> im;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_

View File

@ -0,0 +1,289 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/filter_analyzer.h"
#include <math.h>
#include <algorithm>
#include <array>
#include <numeric>
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/render_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
size_t FindPeakIndex(rtc::ArrayView<const float> filter_time_domain,
size_t peak_index_in,
size_t start_sample,
size_t end_sample) {
size_t peak_index_out = peak_index_in;
float max_h2 =
filter_time_domain[peak_index_out] * filter_time_domain[peak_index_out];
for (size_t k = start_sample; k <= end_sample; ++k) {
float tmp = filter_time_domain[k] * filter_time_domain[k];
if (tmp > max_h2) {
peak_index_out = k;
max_h2 = tmp;
}
}
return peak_index_out;
}
} // namespace
std::atomic<int> FilterAnalyzer::instance_count_(0);
FilterAnalyzer::FilterAnalyzer(const EchoCanceller3Config& config,
size_t num_capture_channels)
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
bounded_erl_(config.ep_strength.bounded_erl),
default_gain_(config.ep_strength.default_gain),
h_highpass_(num_capture_channels,
std::vector<float>(
GetTimeDomainLength(config.filter.refined.length_blocks),
0.f)),
filter_analysis_states_(num_capture_channels,
FilterAnalysisState(config)),
filter_delays_blocks_(num_capture_channels, 0) {
Reset();
}
FilterAnalyzer::~FilterAnalyzer() = default;
void FilterAnalyzer::Reset() {
blocks_since_reset_ = 0;
ResetRegion();
for (auto& state : filter_analysis_states_) {
state.Reset(default_gain_);
}
std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(), 0);
}
void FilterAnalyzer::Update(
rtc::ArrayView<const std::vector<float>> filters_time_domain,
const RenderBuffer& render_buffer,
bool* any_filter_consistent,
float* max_echo_path_gain) {
RTC_DCHECK(any_filter_consistent);
RTC_DCHECK(max_echo_path_gain);
RTC_DCHECK_EQ(filters_time_domain.size(), filter_analysis_states_.size());
RTC_DCHECK_EQ(filters_time_domain.size(), h_highpass_.size());
++blocks_since_reset_;
SetRegionToAnalyze(filters_time_domain[0].size());
AnalyzeRegion(filters_time_domain, render_buffer);
// Aggregate the results for all capture channels.
auto& st_ch0 = filter_analysis_states_[0];
*any_filter_consistent = st_ch0.consistent_estimate;
*max_echo_path_gain = st_ch0.gain;
min_filter_delay_blocks_ = filter_delays_blocks_[0];
for (size_t ch = 1; ch < filters_time_domain.size(); ++ch) {
auto& st_ch = filter_analysis_states_[ch];
*any_filter_consistent =
*any_filter_consistent || st_ch.consistent_estimate;
*max_echo_path_gain = std::max(*max_echo_path_gain, st_ch.gain);
min_filter_delay_blocks_ =
std::min(min_filter_delay_blocks_, filter_delays_blocks_[ch]);
}
}
void FilterAnalyzer::AnalyzeRegion(
rtc::ArrayView<const std::vector<float>> filters_time_domain,
const RenderBuffer& render_buffer) {
// Preprocess the filter to avoid issues with low-frequency components in the
// filter.
PreProcessFilters(filters_time_domain);
data_dumper_->DumpRaw("aec3_linear_filter_processed_td", h_highpass_[0]);
constexpr float kOneByBlockSize = 1.f / kBlockSize;
for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) {
RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size());
RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size());
auto& st_ch = filter_analysis_states_[ch];
RTC_DCHECK_EQ(h_highpass_[ch].size(), filters_time_domain[ch].size());
RTC_DCHECK_GT(h_highpass_[ch].size(), 0);
st_ch.peak_index = std::min(st_ch.peak_index, h_highpass_[ch].size() - 1);
st_ch.peak_index =
FindPeakIndex(h_highpass_[ch], st_ch.peak_index, region_.start_sample_,
region_.end_sample_);
filter_delays_blocks_[ch] = st_ch.peak_index >> kBlockSizeLog2;
UpdateFilterGain(h_highpass_[ch], &st_ch);
st_ch.filter_length_blocks =
filters_time_domain[ch].size() * kOneByBlockSize;
st_ch.consistent_estimate = st_ch.consistent_filter_detector.Detect(
h_highpass_[ch], region_,
render_buffer.GetBlock(-filter_delays_blocks_[ch]), st_ch.peak_index,
filter_delays_blocks_[ch]);
}
}
void FilterAnalyzer::UpdateFilterGain(
rtc::ArrayView<const float> filter_time_domain,
FilterAnalysisState* st) {
bool sufficient_time_to_converge =
blocks_since_reset_ > 5 * kNumBlocksPerSecond;
if (sufficient_time_to_converge && st->consistent_estimate) {
st->gain = fabsf(filter_time_domain[st->peak_index]);
} else {
// TODO(peah): Verify whether this check against a float is ok.
if (st->gain) {
st->gain = std::max(st->gain, fabsf(filter_time_domain[st->peak_index]));
}
}
if (bounded_erl_ && st->gain) {
st->gain = std::max(st->gain, 0.01f);
}
}
void FilterAnalyzer::PreProcessFilters(
rtc::ArrayView<const std::vector<float>> filters_time_domain) {
for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) {
RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size());
RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size());
RTC_DCHECK_GE(h_highpass_[ch].capacity(), filters_time_domain[ch].size());
h_highpass_[ch].resize(filters_time_domain[ch].size());
// Minimum phase high-pass filter with cutoff frequency at about 600 Hz.
constexpr std::array<float, 3> h = {
{0.7929742f, -0.36072128f, -0.47047766f}};
std::fill(h_highpass_[ch].begin() + region_.start_sample_,
h_highpass_[ch].begin() + region_.end_sample_ + 1, 0.f);
float* h_highpass_ch = h_highpass_[ch].data();
const float* filters_time_domain_ch = filters_time_domain[ch].data();
const size_t region_end = region_.end_sample_;
for (size_t k = std::max(h.size() - 1, region_.start_sample_);
k <= region_end; ++k) {
float tmp = h_highpass_ch[k];
for (size_t j = 0; j < h.size(); ++j) {
tmp += filters_time_domain_ch[k - j] * h[j];
}
h_highpass_ch[k] = tmp;
}
}
}
void FilterAnalyzer::ResetRegion() {
region_.start_sample_ = 0;
region_.end_sample_ = 0;
}
void FilterAnalyzer::SetRegionToAnalyze(size_t filter_size) {
constexpr size_t kNumberBlocksToUpdate = 1;
auto& r = region_;
r.start_sample_ = r.end_sample_ >= filter_size - 1 ? 0 : r.end_sample_ + 1;
r.end_sample_ =
std::min(r.start_sample_ + kNumberBlocksToUpdate * kBlockSize - 1,
filter_size - 1);
// Check range.
RTC_DCHECK_LT(r.start_sample_, filter_size);
RTC_DCHECK_LT(r.end_sample_, filter_size);
RTC_DCHECK_LE(r.start_sample_, r.end_sample_);
}
FilterAnalyzer::ConsistentFilterDetector::ConsistentFilterDetector(
const EchoCanceller3Config& config)
: active_render_threshold_(config.render_levels.active_render_limit *
config.render_levels.active_render_limit *
kFftLengthBy2) {
Reset();
}
void FilterAnalyzer::ConsistentFilterDetector::Reset() {
significant_peak_ = false;
filter_floor_accum_ = 0.f;
filter_secondary_peak_ = 0.f;
filter_floor_low_limit_ = 0;
filter_floor_high_limit_ = 0;
consistent_estimate_counter_ = 0;
consistent_delay_reference_ = -10;
}
bool FilterAnalyzer::ConsistentFilterDetector::Detect(
rtc::ArrayView<const float> filter_to_analyze,
const FilterRegion& region,
const Block& x_block,
size_t peak_index,
int delay_blocks) {
if (region.start_sample_ == 0) {
filter_floor_accum_ = 0.f;
filter_secondary_peak_ = 0.f;
filter_floor_low_limit_ = peak_index < 64 ? 0 : peak_index - 64;
filter_floor_high_limit_ =
peak_index > filter_to_analyze.size() - 129 ? 0 : peak_index + 128;
}
float filter_floor_accum = filter_floor_accum_;
float filter_secondary_peak = filter_secondary_peak_;
for (size_t k = region.start_sample_;
k < std::min(region.end_sample_ + 1, filter_floor_low_limit_); ++k) {
float abs_h = fabsf(filter_to_analyze[k]);
filter_floor_accum += abs_h;
filter_secondary_peak = std::max(filter_secondary_peak, abs_h);
}
for (size_t k = std::max(filter_floor_high_limit_, region.start_sample_);
k <= region.end_sample_; ++k) {
float abs_h = fabsf(filter_to_analyze[k]);
filter_floor_accum += abs_h;
filter_secondary_peak = std::max(filter_secondary_peak, abs_h);
}
filter_floor_accum_ = filter_floor_accum;
filter_secondary_peak_ = filter_secondary_peak;
if (region.end_sample_ == filter_to_analyze.size() - 1) {
float filter_floor = filter_floor_accum_ /
(filter_floor_low_limit_ + filter_to_analyze.size() -
filter_floor_high_limit_);
float abs_peak = fabsf(filter_to_analyze[peak_index]);
significant_peak_ = abs_peak > 10.f * filter_floor &&
abs_peak > 2.f * filter_secondary_peak_;
}
if (significant_peak_) {
bool active_render_block = false;
for (int ch = 0; ch < x_block.NumChannels(); ++ch) {
rtc::ArrayView<const float, kBlockSize> x_channel =
x_block.View(/*band=*/0, ch);
const float x_energy = std::inner_product(
x_channel.begin(), x_channel.end(), x_channel.begin(), 0.f);
if (x_energy > active_render_threshold_) {
active_render_block = true;
break;
}
}
if (consistent_delay_reference_ == delay_blocks) {
if (active_render_block) {
++consistent_estimate_counter_;
}
} else {
consistent_estimate_counter_ = 0;
consistent_delay_reference_ = delay_blocks;
}
}
return consistent_estimate_counter_ > 1.5f * kNumBlocksPerSecond;
}
} // namespace webrtc

Some files were not shown because too many files have changed in this diff Show More