update code.

This commit is contained in:
luocai
2024-09-05 09:59:28 +08:00
parent 4f3dc015f7
commit ccf69909d6
223 changed files with 36168 additions and 0 deletions

View File

@ -0,0 +1,211 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/audio_processing.h"
#include <string>
#include "rtc_base/checks.h"
#include "rtc_base/strings/string_builder.h"
namespace webrtc {
namespace {
using Agc1Config = AudioProcessing::Config::GainController1;
using Agc2Config = AudioProcessing::Config::GainController2;
std::string NoiseSuppressionLevelToString(
const AudioProcessing::Config::NoiseSuppression::Level& level) {
switch (level) {
case AudioProcessing::Config::NoiseSuppression::Level::kLow:
return "Low";
case AudioProcessing::Config::NoiseSuppression::Level::kModerate:
return "Moderate";
case AudioProcessing::Config::NoiseSuppression::Level::kHigh:
return "High";
case AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh:
return "VeryHigh";
}
RTC_CHECK_NOTREACHED();
}
std::string GainController1ModeToString(const Agc1Config::Mode& mode) {
switch (mode) {
case Agc1Config::Mode::kAdaptiveAnalog:
return "AdaptiveAnalog";
case Agc1Config::Mode::kAdaptiveDigital:
return "AdaptiveDigital";
case Agc1Config::Mode::kFixedDigital:
return "FixedDigital";
}
RTC_CHECK_NOTREACHED();
}
} // namespace
constexpr int AudioProcessing::kNativeSampleRatesHz[];
void CustomProcessing::SetRuntimeSetting(
AudioProcessing::RuntimeSetting setting) {}
bool Agc1Config::operator==(const Agc1Config& rhs) const {
const auto& analog_lhs = analog_gain_controller;
const auto& analog_rhs = rhs.analog_gain_controller;
return enabled == rhs.enabled && mode == rhs.mode &&
target_level_dbfs == rhs.target_level_dbfs &&
compression_gain_db == rhs.compression_gain_db &&
enable_limiter == rhs.enable_limiter &&
analog_lhs.enabled == analog_rhs.enabled &&
analog_lhs.startup_min_volume == analog_rhs.startup_min_volume &&
analog_lhs.clipped_level_min == analog_rhs.clipped_level_min &&
analog_lhs.enable_digital_adaptive ==
analog_rhs.enable_digital_adaptive &&
analog_lhs.clipped_level_step == analog_rhs.clipped_level_step &&
analog_lhs.clipped_ratio_threshold ==
analog_rhs.clipped_ratio_threshold &&
analog_lhs.clipped_wait_frames == analog_rhs.clipped_wait_frames &&
analog_lhs.clipping_predictor.mode ==
analog_rhs.clipping_predictor.mode &&
analog_lhs.clipping_predictor.window_length ==
analog_rhs.clipping_predictor.window_length &&
analog_lhs.clipping_predictor.reference_window_length ==
analog_rhs.clipping_predictor.reference_window_length &&
analog_lhs.clipping_predictor.reference_window_delay ==
analog_rhs.clipping_predictor.reference_window_delay &&
analog_lhs.clipping_predictor.clipping_threshold ==
analog_rhs.clipping_predictor.clipping_threshold &&
analog_lhs.clipping_predictor.crest_factor_margin ==
analog_rhs.clipping_predictor.crest_factor_margin &&
analog_lhs.clipping_predictor.use_predicted_step ==
analog_rhs.clipping_predictor.use_predicted_step;
}
bool Agc2Config::AdaptiveDigital::operator==(
const Agc2Config::AdaptiveDigital& rhs) const {
return enabled == rhs.enabled && headroom_db == rhs.headroom_db &&
max_gain_db == rhs.max_gain_db &&
initial_gain_db == rhs.initial_gain_db &&
max_gain_change_db_per_second == rhs.max_gain_change_db_per_second &&
max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs;
}
bool Agc2Config::InputVolumeController::operator==(
const Agc2Config::InputVolumeController& rhs) const {
return enabled == rhs.enabled;
}
bool Agc2Config::operator==(const Agc2Config& rhs) const {
return enabled == rhs.enabled &&
fixed_digital.gain_db == rhs.fixed_digital.gain_db &&
adaptive_digital == rhs.adaptive_digital &&
input_volume_controller == rhs.input_volume_controller;
}
bool AudioProcessing::Config::CaptureLevelAdjustment::operator==(
const AudioProcessing::Config::CaptureLevelAdjustment& rhs) const {
return enabled == rhs.enabled && pre_gain_factor == rhs.pre_gain_factor &&
post_gain_factor == rhs.post_gain_factor &&
analog_mic_gain_emulation == rhs.analog_mic_gain_emulation;
}
bool AudioProcessing::Config::CaptureLevelAdjustment::AnalogMicGainEmulation::
operator==(const AudioProcessing::Config::CaptureLevelAdjustment::
AnalogMicGainEmulation& rhs) const {
return enabled == rhs.enabled && initial_level == rhs.initial_level;
}
std::string AudioProcessing::Config::ToString() const {
char buf[2048];
rtc::SimpleStringBuilder builder(buf);
builder << "AudioProcessing::Config{ "
"pipeline: { "
"maximum_internal_processing_rate: "
<< pipeline.maximum_internal_processing_rate
<< ", multi_channel_render: " << pipeline.multi_channel_render
<< ", multi_channel_capture: " << pipeline.multi_channel_capture
<< " }, pre_amplifier: { enabled: " << pre_amplifier.enabled
<< ", fixed_gain_factor: " << pre_amplifier.fixed_gain_factor
<< " },capture_level_adjustment: { enabled: "
<< capture_level_adjustment.enabled
<< ", pre_gain_factor: " << capture_level_adjustment.pre_gain_factor
<< ", post_gain_factor: " << capture_level_adjustment.post_gain_factor
<< ", analog_mic_gain_emulation: { enabled: "
<< capture_level_adjustment.analog_mic_gain_emulation.enabled
<< ", initial_level: "
<< capture_level_adjustment.analog_mic_gain_emulation.initial_level
<< " }}, high_pass_filter: { enabled: " << high_pass_filter.enabled
<< " }, echo_canceller: { enabled: " << echo_canceller.enabled
<< ", mobile_mode: " << echo_canceller.mobile_mode
<< ", enforce_high_pass_filtering: "
<< echo_canceller.enforce_high_pass_filtering
<< " }, noise_suppression: { enabled: " << noise_suppression.enabled
<< ", level: "
<< NoiseSuppressionLevelToString(noise_suppression.level)
<< " }, transient_suppression: { enabled: "
<< transient_suppression.enabled
<< " }, gain_controller1: { enabled: " << gain_controller1.enabled
<< ", mode: " << GainController1ModeToString(gain_controller1.mode)
<< ", target_level_dbfs: " << gain_controller1.target_level_dbfs
<< ", compression_gain_db: " << gain_controller1.compression_gain_db
<< ", enable_limiter: " << gain_controller1.enable_limiter
<< ", analog_gain_controller { enabled: "
<< gain_controller1.analog_gain_controller.enabled
<< ", startup_min_volume: "
<< gain_controller1.analog_gain_controller.startup_min_volume
<< ", clipped_level_min: "
<< gain_controller1.analog_gain_controller.clipped_level_min
<< ", enable_digital_adaptive: "
<< gain_controller1.analog_gain_controller.enable_digital_adaptive
<< ", clipped_level_step: "
<< gain_controller1.analog_gain_controller.clipped_level_step
<< ", clipped_ratio_threshold: "
<< gain_controller1.analog_gain_controller.clipped_ratio_threshold
<< ", clipped_wait_frames: "
<< gain_controller1.analog_gain_controller.clipped_wait_frames
<< ", clipping_predictor: { enabled: "
<< gain_controller1.analog_gain_controller.clipping_predictor.enabled
<< ", mode: "
<< gain_controller1.analog_gain_controller.clipping_predictor.mode
<< ", window_length: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.window_length
<< ", reference_window_length: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.reference_window_length
<< ", reference_window_delay: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.reference_window_delay
<< ", clipping_threshold: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.clipping_threshold
<< ", crest_factor_margin: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.crest_factor_margin
<< ", use_predicted_step: "
<< gain_controller1.analog_gain_controller.clipping_predictor
.use_predicted_step
<< " }}}, gain_controller2: { enabled: " << gain_controller2.enabled
<< ", fixed_digital: { gain_db: "
<< gain_controller2.fixed_digital.gain_db
<< " }, adaptive_digital: { enabled: "
<< gain_controller2.adaptive_digital.enabled
<< ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
<< ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
<< ", initial_gain_db: "
<< gain_controller2.adaptive_digital.initial_gain_db
<< ", max_gain_change_db_per_second: "
<< gain_controller2.adaptive_digital.max_gain_change_db_per_second
<< ", max_output_noise_level_dbfs: "
<< gain_controller2.adaptive_digital.max_output_noise_level_dbfs
<< " }, input_volume_control : { enabled "
<< gain_controller2.input_volume_controller.enabled << "}}";
return builder.str();
}
} // namespace webrtc

View File

@ -0,0 +1,944 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_AUDIO_PROCESSING_H_
#define API_AUDIO_AUDIO_PROCESSING_H_
// MSVC++ requires this to be set before any other includes to get M_PI.
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES
#endif
#include <math.h>
#include <stddef.h> // size_t
#include <stdio.h> // FILE
#include <string.h>
#include <array>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/audio/audio_processing_statistics.h"
#include "api/audio/echo_control.h"
#include "api/ref_count.h"
#include "api/scoped_refptr.h"
#include "api/task_queue/task_queue_base.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
class AecDump;
class AudioBuffer;
class StreamConfig;
class ProcessingConfig;
class EchoDetector;
// The Audio Processing Module (APM) provides a collection of voice processing
// components designed for real-time communications software.
//
// APM operates on two audio streams on a frame-by-frame basis. Frames of the
// primary stream, on which all processing is applied, are passed to
// `ProcessStream()`. Frames of the reverse direction stream are passed to
// `ProcessReverseStream()`. On the client-side, this will typically be the
// near-end (capture) and far-end (render) streams, respectively. APM should be
// placed in the signal chain as close to the audio hardware abstraction layer
// (HAL) as possible.
//
// On the server-side, the reverse stream will normally not be used, with
// processing occurring on each incoming stream.
//
// Component interfaces follow a similar pattern and are accessed through
// corresponding getters in APM. All components are disabled at create-time,
// with default settings that are recommended for most situations. New settings
// can be applied without enabling a component. Enabling a component triggers
// memory allocation and initialization to allow it to start processing the
// streams.
//
// Thread safety is provided with the following assumptions to reduce locking
// overhead:
// 1. The stream getters and setters are called from the same thread as
// ProcessStream(). More precisely, stream functions are never called
// concurrently with ProcessStream().
// 2. Parameter getters are never called concurrently with the corresponding
// setter.
//
// APM accepts only linear PCM audio data in chunks of ~10 ms (see
// AudioProcessing::GetFrameSize() for details) and sample rates ranging from
// 8000 Hz to 384000 Hz. The int16 interfaces use interleaved data, while the
// float interfaces use deinterleaved data.
//
// Usage example, omitting error checking:
// rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
//
// AudioProcessing::Config config;
// config.echo_canceller.enabled = true;
// config.echo_canceller.mobile_mode = false;
//
// config.gain_controller1.enabled = true;
// config.gain_controller1.mode =
// AudioProcessing::Config::GainController1::kAdaptiveAnalog;
// config.gain_controller1.analog_level_minimum = 0;
// config.gain_controller1.analog_level_maximum = 255;
//
// config.gain_controller2.enabled = true;
//
// config.high_pass_filter.enabled = true;
//
// apm->ApplyConfig(config)
//
// // Start a voice call...
//
// // ... Render frame arrives bound for the audio HAL ...
// apm->ProcessReverseStream(render_frame);
//
// // ... Capture frame arrives from the audio HAL ...
// // Call required set_stream_ functions.
// apm->set_stream_delay_ms(delay_ms);
// apm->set_stream_analog_level(analog_level);
//
// apm->ProcessStream(capture_frame);
//
// // Call required stream_ functions.
// analog_level = apm->recommended_stream_analog_level();
// has_voice = apm->stream_has_voice();
//
// // Repeat render and capture processing for the duration of the call...
// // Start a new call...
// apm->Initialize();
//
// // Close the application...
// apm.reset();
//
class RTC_EXPORT AudioProcessing : public RefCountInterface {
public:
// The struct below constitutes the new parameter scheme for the audio
// processing. It is being introduced gradually and until it is fully
// introduced, it is prone to change.
// TODO(peah): Remove this comment once the new config scheme is fully rolled
// out.
//
// The parameters and behavior of the audio processing module are controlled
// by changing the default values in the AudioProcessing::Config struct.
// The config is applied by passing the struct to the ApplyConfig method.
//
// This config is intended to be used during setup, and to enable/disable
// top-level processing effects. Use during processing may cause undesired
// submodule resets, affecting the audio quality. Use the RuntimeSetting
// construct for runtime configuration.
struct RTC_EXPORT Config {
// Sets the properties of the audio processing pipeline.
struct RTC_EXPORT Pipeline {
// Ways to downmix a multi-channel track to mono.
enum class DownmixMethod {
kAverageChannels, // Average across channels.
kUseFirstChannel // Use the first channel.
};
// Maximum allowed processing rate used internally. May only be set to
// 32000 or 48000 and any differing values will be treated as 48000.
int maximum_internal_processing_rate = 48000;
// Allow multi-channel processing of render audio.
bool multi_channel_render = false;
// Allow multi-channel processing of capture audio when AEC3 is active
// or a custom AEC is injected..
bool multi_channel_capture = false;
// Indicates how to downmix multi-channel capture audio to mono (when
// needed).
DownmixMethod capture_downmix_method = DownmixMethod::kAverageChannels;
} pipeline;
// Enabled the pre-amplifier. It amplifies the capture signal
// before any other processing is done.
// TODO(webrtc:5298): Deprecate and use the pre-gain functionality in
// capture_level_adjustment instead.
struct PreAmplifier {
bool enabled = false;
float fixed_gain_factor = 1.0f;
} pre_amplifier;
// Functionality for general level adjustment in the capture pipeline. This
// should not be used together with the legacy PreAmplifier functionality.
struct CaptureLevelAdjustment {
bool operator==(const CaptureLevelAdjustment& rhs) const;
bool operator!=(const CaptureLevelAdjustment& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
// The `pre_gain_factor` scales the signal before any processing is done.
float pre_gain_factor = 1.0f;
// The `post_gain_factor` scales the signal after all processing is done.
float post_gain_factor = 1.0f;
struct AnalogMicGainEmulation {
bool operator==(const AnalogMicGainEmulation& rhs) const;
bool operator!=(const AnalogMicGainEmulation& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
// Initial analog gain level to use for the emulated analog gain. Must
// be in the range [0...255].
int initial_level = 255;
} analog_mic_gain_emulation;
} capture_level_adjustment;
struct HighPassFilter {
bool enabled = false;
bool apply_in_full_band = true;
} high_pass_filter;
struct EchoCanceller {
bool enabled = false;
bool mobile_mode = false;
bool export_linear_aec_output = false;
// Enforce the highpass filter to be on (has no effect for the mobile
// mode).
bool enforce_high_pass_filtering = true;
} echo_canceller;
// Enables background noise suppression.
struct NoiseSuppression {
bool enabled = false;
enum Level { kLow, kModerate, kHigh, kVeryHigh };
Level level = kModerate;
bool analyze_linear_aec_output_when_available = false;
} noise_suppression;
// TODO(bugs.webrtc.org/357281131): Deprecated. Stop using and remove.
// Enables transient suppression.
struct TransientSuppression {
bool enabled = false;
} transient_suppression;
// Enables automatic gain control (AGC) functionality.
// The automatic gain control (AGC) component brings the signal to an
// appropriate range. This is done by applying a digital gain directly and,
// in the analog mode, prescribing an analog gain to be applied at the audio
// HAL.
// Recommended to be enabled on the client-side.
struct RTC_EXPORT GainController1 {
bool operator==(const GainController1& rhs) const;
bool operator!=(const GainController1& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
enum Mode {
// Adaptive mode intended for use if an analog volume control is
// available on the capture device. It will require the user to provide
// coupling between the OS mixer controls and AGC through the
// stream_analog_level() functions.
// It consists of an analog gain prescription for the audio device and a
// digital compression stage.
kAdaptiveAnalog,
// Adaptive mode intended for situations in which an analog volume
// control is unavailable. It operates in a similar fashion to the
// adaptive analog mode, but with scaling instead applied in the digital
// domain. As with the analog mode, it additionally uses a digital
// compression stage.
kAdaptiveDigital,
// Fixed mode which enables only the digital compression stage also used
// by the two adaptive modes.
// It is distinguished from the adaptive modes by considering only a
// short time-window of the input signal. It applies a fixed gain
// through most of the input level range, and compresses (gradually
// reduces gain with increasing level) the input signal at higher
// levels. This mode is preferred on embedded devices where the capture
// signal level is predictable, so that a known gain can be applied.
kFixedDigital
};
Mode mode = kAdaptiveAnalog;
// Sets the target peak level (or envelope) of the AGC in dBFs (decibels
// from digital full-scale). The convention is to use positive values. For
// instance, passing in a value of 3 corresponds to -3 dBFs, or a target
// level 3 dB below full-scale. Limited to [0, 31].
int target_level_dbfs = 3;
// Sets the maximum gain the digital compression stage may apply, in dB. A
// higher number corresponds to greater compression, while a value of 0
// will leave the signal uncompressed. Limited to [0, 90].
// For updates after APM setup, use a RuntimeSetting instead.
int compression_gain_db = 9;
// When enabled, the compression stage will hard limit the signal to the
// target level. Otherwise, the signal will be compressed but not limited
// above the target level.
bool enable_limiter = true;
// Enables the analog gain controller functionality.
struct AnalogGainController {
bool enabled = true;
// TODO(bugs.webrtc.org/7494): Deprecated. Stop using and remove.
int startup_min_volume = 0;
// Lowest analog microphone level that will be applied in response to
// clipping.
int clipped_level_min = 70;
// If true, an adaptive digital gain is applied.
bool enable_digital_adaptive = true;
// Amount the microphone level is lowered with every clipping event.
// Limited to (0, 255].
int clipped_level_step = 15;
// Proportion of clipped samples required to declare a clipping event.
// Limited to (0.f, 1.f).
float clipped_ratio_threshold = 0.1f;
// Time in frames to wait after a clipping event before checking again.
// Limited to values higher than 0.
int clipped_wait_frames = 300;
// Enables clipping prediction functionality.
struct ClippingPredictor {
bool enabled = false;
enum Mode {
// Clipping event prediction mode with fixed step estimation.
kClippingEventPrediction,
// Clipped peak estimation mode with adaptive step estimation.
kAdaptiveStepClippingPeakPrediction,
// Clipped peak estimation mode with fixed step estimation.
kFixedStepClippingPeakPrediction,
};
Mode mode = kClippingEventPrediction;
// Number of frames in the sliding analysis window.
int window_length = 5;
// Number of frames in the sliding reference window.
int reference_window_length = 5;
// Reference window delay (unit: number of frames).
int reference_window_delay = 5;
// Clipping prediction threshold (dBFS).
float clipping_threshold = -1.0f;
// Crest factor drop threshold (dB).
float crest_factor_margin = 3.0f;
// If true, the recommended clipped level step is used to modify the
// analog gain. Otherwise, the predictor runs without affecting the
// analog gain.
bool use_predicted_step = true;
} clipping_predictor;
} analog_gain_controller;
} gain_controller1;
// Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which
// replaces the AGC sub-module parametrized by `gain_controller1`.
// AGC2 brings the captured audio signal to the desired level by combining
// three different controllers (namely, input volume controller, adapative
// digital controller and fixed digital controller) and a limiter.
// TODO(bugs.webrtc.org:7494): Name `GainController` when AGC1 removed.
struct RTC_EXPORT GainController2 {
bool operator==(const GainController2& rhs) const;
bool operator!=(const GainController2& rhs) const {
return !(*this == rhs);
}
// AGC2 must be created if and only if `enabled` is true.
bool enabled = false;
// Parameters for the input volume controller, which adjusts the input
// volume applied when the audio is captured (e.g., microphone volume on
// a soundcard, input volume on HAL).
struct InputVolumeController {
bool operator==(const InputVolumeController& rhs) const;
bool operator!=(const InputVolumeController& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
} input_volume_controller;
// Parameters for the adaptive digital controller, which adjusts and
// applies a digital gain after echo cancellation and after noise
// suppression.
struct RTC_EXPORT AdaptiveDigital {
bool operator==(const AdaptiveDigital& rhs) const;
bool operator!=(const AdaptiveDigital& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
float headroom_db = 5.0f;
float max_gain_db = 50.0f;
float initial_gain_db = 15.0f;
float max_gain_change_db_per_second = 6.0f;
float max_output_noise_level_dbfs = -50.0f;
} adaptive_digital;
// Parameters for the fixed digital controller, which applies a fixed
// digital gain after the adaptive digital controller and before the
// limiter.
struct FixedDigital {
// By setting `gain_db` to a value greater than zero, the limiter can be
// turned into a compressor that first applies a fixed gain.
float gain_db = 0.0f;
} fixed_digital;
} gain_controller2;
std::string ToString() const;
};
// Specifies the properties of a setting to be passed to AudioProcessing at
// runtime.
class RuntimeSetting {
public:
enum class Type {
kNotSpecified,
kCapturePreGain,
kCaptureCompressionGain,
kCaptureFixedPostGain,
kPlayoutVolumeChange,
kCustomRenderProcessingRuntimeSetting,
kPlayoutAudioDeviceChange,
kCapturePostGain,
kCaptureOutputUsed
};
// Play-out audio device properties.
struct PlayoutAudioDeviceInfo {
int id; // Identifies the audio device.
int max_volume; // Maximum play-out volume.
};
RuntimeSetting() : type_(Type::kNotSpecified), value_(0.0f) {}
~RuntimeSetting() = default;
static RuntimeSetting CreateCapturePreGain(float gain) {
return {Type::kCapturePreGain, gain};
}
static RuntimeSetting CreateCapturePostGain(float gain) {
return {Type::kCapturePostGain, gain};
}
// Corresponds to Config::GainController1::compression_gain_db, but for
// runtime configuration.
static RuntimeSetting CreateCompressionGainDb(int gain_db) {
RTC_DCHECK_GE(gain_db, 0);
RTC_DCHECK_LE(gain_db, 90);
return {Type::kCaptureCompressionGain, static_cast<float>(gain_db)};
}
// Corresponds to Config::GainController2::fixed_digital::gain_db, but for
// runtime configuration.
static RuntimeSetting CreateCaptureFixedPostGain(float gain_db) {
RTC_DCHECK_GE(gain_db, 0.0f);
RTC_DCHECK_LE(gain_db, 90.0f);
return {Type::kCaptureFixedPostGain, gain_db};
}
// Creates a runtime setting to notify play-out (aka render) audio device
// changes.
static RuntimeSetting CreatePlayoutAudioDeviceChange(
PlayoutAudioDeviceInfo audio_device) {
return {Type::kPlayoutAudioDeviceChange, audio_device};
}
// Creates a runtime setting to notify play-out (aka render) volume changes.
// `volume` is the unnormalized volume, the maximum of which
static RuntimeSetting CreatePlayoutVolumeChange(int volume) {
return {Type::kPlayoutVolumeChange, volume};
}
static RuntimeSetting CreateCustomRenderSetting(float payload) {
return {Type::kCustomRenderProcessingRuntimeSetting, payload};
}
static RuntimeSetting CreateCaptureOutputUsedSetting(
bool capture_output_used) {
return {Type::kCaptureOutputUsed, capture_output_used};
}
Type type() const { return type_; }
// Getters do not return a value but instead modify the argument to protect
// from implicit casting.
void GetFloat(float* value) const {
RTC_DCHECK(value);
*value = value_.float_value;
}
void GetInt(int* value) const {
RTC_DCHECK(value);
*value = value_.int_value;
}
void GetBool(bool* value) const {
RTC_DCHECK(value);
*value = value_.bool_value;
}
void GetPlayoutAudioDeviceInfo(PlayoutAudioDeviceInfo* value) const {
RTC_DCHECK(value);
*value = value_.playout_audio_device_info;
}
private:
RuntimeSetting(Type id, float value) : type_(id), value_(value) {}
RuntimeSetting(Type id, int value) : type_(id), value_(value) {}
RuntimeSetting(Type id, PlayoutAudioDeviceInfo value)
: type_(id), value_(value) {}
Type type_;
union U {
U() {}
U(int value) : int_value(value) {}
U(float value) : float_value(value) {}
U(PlayoutAudioDeviceInfo value) : playout_audio_device_info(value) {}
float float_value;
int int_value;
bool bool_value;
PlayoutAudioDeviceInfo playout_audio_device_info;
} value_;
};
~AudioProcessing() override {}
// Initializes internal states, while retaining all user settings. This
// should be called before beginning to process a new audio stream. However,
// it is not necessary to call before processing the first stream after
// creation.
//
// It is also not necessary to call if the audio parameters (sample
// rate and number of channels) have changed. Passing updated parameters
// directly to `ProcessStream()` and `ProcessReverseStream()` is permissible.
// If the parameters are known at init-time though, they may be provided.
// TODO(webrtc:5298): Change to return void.
virtual int Initialize() = 0;
// The int16 interfaces require:
// - only `NativeRate`s be used
// - that the input, output and reverse rates must match
// - that `processing_config.output_stream()` matches
// `processing_config.input_stream()`.
//
// The float interfaces accept arbitrary rates and support differing input and
// output layouts, but the output must have either one channel or the same
// number of channels as the input.
virtual int Initialize(const ProcessingConfig& processing_config) = 0;
// TODO(peah): This method is a temporary solution used to take control
// over the parameters in the audio processing module and is likely to change.
virtual void ApplyConfig(const Config& config) = 0;
// TODO(ajm): Only intended for internal use. Make private and friend the
// necessary classes?
virtual int proc_sample_rate_hz() const = 0;
virtual int proc_split_sample_rate_hz() const = 0;
virtual size_t num_input_channels() const = 0;
virtual size_t num_proc_channels() const = 0;
virtual size_t num_output_channels() const = 0;
virtual size_t num_reverse_channels() const = 0;
// Set to true when the output of AudioProcessing will be muted or in some
// other way not used. Ideally, the captured audio would still be processed,
// but some components may change behavior based on this information.
// Default false. This method takes a lock. To achieve this in a lock-less
// manner the PostRuntimeSetting can instead be used.
virtual void set_output_will_be_muted(bool muted) = 0;
// Enqueues a runtime setting.
virtual void SetRuntimeSetting(RuntimeSetting setting) = 0;
// Enqueues a runtime setting. Returns a bool indicating whether the
// enqueueing was successfull.
virtual bool PostRuntimeSetting(RuntimeSetting setting) = 0;
// Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio as
// specified in `input_config` and `output_config`. `src` and `dest` may use
// the same memory, if desired.
virtual int ProcessStream(const int16_t* const src,
const StreamConfig& input_config,
const StreamConfig& output_config,
int16_t* const dest) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
// `src` points to a channel buffer, arranged according to `input_stream`. At
// output, the channels will be arranged according to `output_stream` in
// `dest`.
//
// The output must have one channel or as many channels as the input. `src`
// and `dest` may use the same memory, if desired.
virtual int ProcessStream(const float* const* src,
const StreamConfig& input_config,
const StreamConfig& output_config,
float* const* dest) = 0;
// Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio for
// the reverse direction audio stream as specified in `input_config` and
// `output_config`. `src` and `dest` may use the same memory, if desired.
virtual int ProcessReverseStream(const int16_t* const src,
const StreamConfig& input_config,
const StreamConfig& output_config,
int16_t* const dest) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
// `data` points to a channel buffer, arranged according to `reverse_config`.
virtual int ProcessReverseStream(const float* const* src,
const StreamConfig& input_config,
const StreamConfig& output_config,
float* const* dest) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element
// of `data` points to a channel buffer, arranged according to
// `reverse_config`.
virtual int AnalyzeReverseStream(const float* const* data,
const StreamConfig& reverse_config) = 0;
// Returns the most recently produced ~10 ms of the linear AEC output at a
// rate of 16 kHz. If there is more than one capture channel, a mono
// representation of the input is returned. Returns true/false to indicate
// whether an output returned.
virtual bool GetLinearAecOutput(
rtc::ArrayView<std::array<float, 160>> linear_output) const = 0;
// This must be called prior to ProcessStream() if and only if adaptive analog
// gain control is enabled, to pass the current analog level from the audio
// HAL. Must be within the range [0, 255].
virtual void set_stream_analog_level(int level) = 0;
// When an analog mode is set, this should be called after
// `set_stream_analog_level()` and `ProcessStream()` to obtain the recommended
// new analog level for the audio HAL. It is the user's responsibility to
// apply this level.
virtual int recommended_stream_analog_level() const = 0;
// This must be called if and only if echo processing is enabled.
//
// Sets the `delay` in ms between ProcessReverseStream() receiving a far-end
// frame and ProcessStream() receiving a near-end frame containing the
// corresponding echo. On the client-side this can be expressed as
// delay = (t_render - t_analyze) + (t_process - t_capture)
// where,
// - t_analyze is the time a frame is passed to ProcessReverseStream() and
// t_render is the time the first sample of the same frame is rendered by
// the audio hardware.
// - t_capture is the time the first sample of a frame is captured by the
// audio hardware and t_process is the time the same frame is passed to
// ProcessStream().
virtual int set_stream_delay_ms(int delay) = 0;
virtual int stream_delay_ms() const = 0;
// Call to signal that a key press occurred (true) or did not occur (false)
// with this chunk of audio.
virtual void set_stream_key_pressed(bool key_pressed) = 0;
// Creates and attaches an webrtc::AecDump for recording debugging
// information.
// The `worker_queue` may not be null and must outlive the created
// AecDump instance. |max_log_size_bytes == -1| means the log size
// will be unlimited. `handle` may not be null. The AecDump takes
// responsibility for `handle` and closes it in the destructor. A
// return value of true indicates that the file has been
// sucessfully opened, while a value of false indicates that
// opening the file failed.
virtual bool CreateAndAttachAecDump(
absl::string_view file_name,
int64_t max_log_size_bytes,
absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
virtual bool CreateAndAttachAecDump(
absl::Nonnull<FILE*> handle,
int64_t max_log_size_bytes,
absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
// TODO(webrtc:5298) Deprecated variant.
// Attaches provided webrtc::AecDump for recording debugging
// information. Log file and maximum file size logic is supposed to
// be handled by implementing instance of AecDump. Calling this
// method when another AecDump is attached resets the active AecDump
// with a new one. This causes the d-tor of the earlier AecDump to
// be called. The d-tor call may block until all pending logging
// tasks are completed.
virtual void AttachAecDump(std::unique_ptr<AecDump> aec_dump) = 0;
// If no AecDump is attached, this has no effect. If an AecDump is
// attached, it's destructor is called. The d-tor may block until
// all pending logging tasks are completed.
virtual void DetachAecDump() = 0;
// Get audio processing statistics.
virtual AudioProcessingStats GetStatistics() = 0;
// TODO(webrtc:5298) Deprecated variant. The `has_remote_tracks` argument
// should be set if there are active remote tracks (this would usually be true
// during a call). If there are no remote tracks some of the stats will not be
// set by AudioProcessing, because they only make sense if there is at least
// one remote track.
virtual AudioProcessingStats GetStatistics(bool has_remote_tracks) = 0;
// Returns the last applied configuration.
virtual AudioProcessing::Config GetConfig() const = 0;
enum Error {
// Fatal errors.
kNoError = 0,
kUnspecifiedError = -1,
kCreationFailedError = -2,
kUnsupportedComponentError = -3,
kUnsupportedFunctionError = -4,
kNullPointerError = -5,
kBadParameterError = -6,
kBadSampleRateError = -7,
kBadDataLengthError = -8,
kBadNumberChannelsError = -9,
kFileError = -10,
kStreamParameterNotSetError = -11,
kNotEnabledError = -12,
// Warnings are non-fatal.
// This results when a set_stream_ parameter is out of range. Processing
// will continue, but the parameter may have been truncated.
kBadStreamParameterWarning = -13
};
// Native rates supported by the integer interfaces.
enum NativeRate {
kSampleRate8kHz = 8000,
kSampleRate16kHz = 16000,
kSampleRate32kHz = 32000,
kSampleRate48kHz = 48000
};
// TODO(kwiberg): We currently need to support a compiler (Visual C++) that
// complains if we don't explicitly state the size of the array here. Remove
// the size when that's no longer the case.
static constexpr int kNativeSampleRatesHz[4] = {
kSampleRate8kHz, kSampleRate16kHz, kSampleRate32kHz, kSampleRate48kHz};
static constexpr size_t kNumNativeSampleRates =
arraysize(kNativeSampleRatesHz);
static constexpr int kMaxNativeSampleRateHz =
kNativeSampleRatesHz[kNumNativeSampleRates - 1];
// APM processes audio in chunks of about 10 ms. See GetFrameSize() for
// details.
static constexpr int kChunkSizeMs = 10;
// Returns floor(sample_rate_hz/100): the number of samples per channel used
// as input and output to the audio processing module in calls to
// ProcessStream, ProcessReverseStream, AnalyzeReverseStream, and
// GetLinearAecOutput.
//
// This is exactly 10 ms for sample rates divisible by 100. For example:
// - 48000 Hz (480 samples per channel),
// - 44100 Hz (441 samples per channel),
// - 16000 Hz (160 samples per channel).
//
// Sample rates not divisible by 100 are received/produced in frames of
// approximately 10 ms. For example:
// - 22050 Hz (220 samples per channel, or ~9.98 ms per frame),
// - 11025 Hz (110 samples per channel, or ~9.98 ms per frame).
// These nondivisible sample rates yield lower audio quality compared to
// multiples of 100. Internal resampling to 10 ms frames causes a simulated
// clock drift effect which impacts the performance of (for example) echo
// cancellation.
static int GetFrameSize(int sample_rate_hz) { return sample_rate_hz / 100; }
};
// Experimental interface for a custom analysis submodule.
class CustomAudioAnalyzer {
public:
// (Re-) Initializes the submodule.
virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
// Analyzes the given capture or render signal.
virtual void Analyze(const AudioBuffer* audio) = 0;
// Returns a string representation of the module state.
virtual std::string ToString() const = 0;
virtual ~CustomAudioAnalyzer() {}
};
// Interface for a custom processing submodule.
class CustomProcessing {
public:
// (Re-)Initializes the submodule.
virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
// Processes the given capture or render signal.
virtual void Process(AudioBuffer* audio) = 0;
// Returns a string representation of the module state.
virtual std::string ToString() const = 0;
// Handles RuntimeSettings. TODO(webrtc:9262): make pure virtual
// after updating dependencies.
virtual void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting);
virtual ~CustomProcessing() {}
};
class RTC_EXPORT AudioProcessingBuilder {
public:
AudioProcessingBuilder();
AudioProcessingBuilder(const AudioProcessingBuilder&) = delete;
AudioProcessingBuilder& operator=(const AudioProcessingBuilder&) = delete;
~AudioProcessingBuilder();
// Sets the APM configuration.
AudioProcessingBuilder& SetConfig(const AudioProcessing::Config& config) {
config_ = config;
return *this;
}
// Sets the echo controller factory to inject when APM is created.
AudioProcessingBuilder& SetEchoControlFactory(
std::unique_ptr<EchoControlFactory> echo_control_factory) {
echo_control_factory_ = std::move(echo_control_factory);
return *this;
}
// Sets the capture post-processing sub-module to inject when APM is created.
AudioProcessingBuilder& SetCapturePostProcessing(
std::unique_ptr<CustomProcessing> capture_post_processing) {
capture_post_processing_ = std::move(capture_post_processing);
return *this;
}
// Sets the render pre-processing sub-module to inject when APM is created.
AudioProcessingBuilder& SetRenderPreProcessing(
std::unique_ptr<CustomProcessing> render_pre_processing) {
render_pre_processing_ = std::move(render_pre_processing);
return *this;
}
// Sets the echo detector to inject when APM is created.
AudioProcessingBuilder& SetEchoDetector(
rtc::scoped_refptr<EchoDetector> echo_detector) {
echo_detector_ = std::move(echo_detector);
return *this;
}
// Sets the capture analyzer sub-module to inject when APM is created.
AudioProcessingBuilder& SetCaptureAnalyzer(
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer) {
capture_analyzer_ = std::move(capture_analyzer);
return *this;
}
// Creates an APM instance with the specified config or the default one if
// unspecified. Injects the specified components transferring the ownership
// to the newly created APM instance - i.e., except for the config, the
// builder is reset to its initial state.
rtc::scoped_refptr<AudioProcessing> Create();
private:
AudioProcessing::Config config_;
std::unique_ptr<EchoControlFactory> echo_control_factory_;
std::unique_ptr<CustomProcessing> capture_post_processing_;
std::unique_ptr<CustomProcessing> render_pre_processing_;
rtc::scoped_refptr<EchoDetector> echo_detector_;
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer_;
};
class StreamConfig {
public:
// sample_rate_hz: The sampling rate of the stream.
// num_channels: The number of audio channels in the stream.
StreamConfig(int sample_rate_hz = 0,
size_t num_channels = 0) // NOLINT(runtime/explicit)
: sample_rate_hz_(sample_rate_hz),
num_channels_(num_channels),
num_frames_(calculate_frames(sample_rate_hz)) {}
void set_sample_rate_hz(int value) {
sample_rate_hz_ = value;
num_frames_ = calculate_frames(value);
}
void set_num_channels(size_t value) { num_channels_ = value; }
int sample_rate_hz() const { return sample_rate_hz_; }
// The number of channels in the stream.
size_t num_channels() const { return num_channels_; }
size_t num_frames() const { return num_frames_; }
size_t num_samples() const { return num_channels_ * num_frames_; }
bool operator==(const StreamConfig& other) const {
return sample_rate_hz_ == other.sample_rate_hz_ &&
num_channels_ == other.num_channels_;
}
bool operator!=(const StreamConfig& other) const { return !(*this == other); }
private:
static size_t calculate_frames(int sample_rate_hz) {
return static_cast<size_t>(AudioProcessing::GetFrameSize(sample_rate_hz));
}
int sample_rate_hz_;
size_t num_channels_;
size_t num_frames_;
};
class ProcessingConfig {
public:
enum StreamName {
kInputStream,
kOutputStream,
kReverseInputStream,
kReverseOutputStream,
kNumStreamNames,
};
const StreamConfig& input_stream() const {
return streams[StreamName::kInputStream];
}
const StreamConfig& output_stream() const {
return streams[StreamName::kOutputStream];
}
const StreamConfig& reverse_input_stream() const {
return streams[StreamName::kReverseInputStream];
}
const StreamConfig& reverse_output_stream() const {
return streams[StreamName::kReverseOutputStream];
}
StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
StreamConfig& reverse_input_stream() {
return streams[StreamName::kReverseInputStream];
}
StreamConfig& reverse_output_stream() {
return streams[StreamName::kReverseOutputStream];
}
bool operator==(const ProcessingConfig& other) const {
for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
if (this->streams[i] != other.streams[i]) {
return false;
}
}
return true;
}
bool operator!=(const ProcessingConfig& other) const {
return !(*this == other);
}
StreamConfig streams[StreamName::kNumStreamNames];
};
// Interface for an echo detector submodule.
class EchoDetector : public RefCountInterface {
public:
// (Re-)Initializes the submodule.
virtual void Initialize(int capture_sample_rate_hz,
int num_capture_channels,
int render_sample_rate_hz,
int num_render_channels) = 0;
// Analysis (not changing) of the first channel of the render signal.
virtual void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio) = 0;
// Analysis (not changing) of the capture signal.
virtual void AnalyzeCaptureAudio(
rtc::ArrayView<const float> capture_audio) = 0;
struct Metrics {
absl::optional<double> echo_likelihood;
absl::optional<double> echo_likelihood_recent_max;
};
// Collect current metrics from the echo detector.
virtual Metrics GetMetrics() const = 0;
};
} // namespace webrtc
#endif // API_AUDIO_AUDIO_PROCESSING_H_

View File

@ -0,0 +1,22 @@
/*
* Copyright 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/audio_processing_statistics.h"
namespace webrtc {
AudioProcessingStats::AudioProcessingStats() = default;
AudioProcessingStats::AudioProcessingStats(const AudioProcessingStats& other) =
default;
AudioProcessingStats::~AudioProcessingStats() = default;
} // namespace webrtc

View File

@ -0,0 +1,67 @@
/*
* Copyright 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
#define API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
#include <stdint.h>
#include "absl/types/optional.h"
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
// This version of the stats uses Optionals, it will replace the regular
// AudioProcessingStatistics struct.
struct RTC_EXPORT AudioProcessingStats {
AudioProcessingStats();
AudioProcessingStats(const AudioProcessingStats& other);
~AudioProcessingStats();
// Deprecated.
// TODO(bugs.webrtc.org/11226): Remove.
// True if voice is detected in the last capture frame, after processing.
// It is conservative in flagging audio as speech, with low likelihood of
// incorrectly flagging a frame as voice.
// Only reported if voice detection is enabled in AudioProcessing::Config.
absl::optional<bool> voice_detected;
// AEC Statistics.
// ERL = 10log_10(P_far / P_echo)
absl::optional<double> echo_return_loss;
// ERLE = 10log_10(P_echo / P_out)
absl::optional<double> echo_return_loss_enhancement;
// Fraction of time that the AEC linear filter is divergent, in a 1-second
// non-overlapped aggregation window.
absl::optional<double> divergent_filter_fraction;
// The delay metrics consists of the delay median and standard deviation. It
// also consists of the fraction of delay estimates that can make the echo
// cancellation perform poorly. The values are aggregated until the first
// call to `GetStatistics()` and afterwards aggregated and updated every
// second. Note that if there are several clients pulling metrics from
// `GetStatistics()` during a session the first call from any of them will
// change to one second aggregation window for all.
absl::optional<int32_t> delay_median_ms;
absl::optional<int32_t> delay_standard_deviation_ms;
// Residual echo detector likelihood.
absl::optional<double> residual_echo_likelihood;
// Maximum residual echo likelihood from the last time period.
absl::optional<double> residual_echo_likelihood_recent_max;
// The instantaneous delay estimate produced in the AEC. The unit is in
// milliseconds and the value is the instantaneous value at the time of the
// call to `GetStatistics()`.
absl::optional<int32_t> delay_ms;
};
} // namespace webrtc
#endif // API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_

View File

@ -0,0 +1,269 @@
/*
* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_AUDIO_VIEW_H_
#define API_AUDIO_AUDIO_VIEW_H_
#include "api/array_view.h"
#include "api/audio/channel_layout.h"
#include "rtc_base/checks.h"
namespace webrtc {
// This file contains 3 types of view classes:
//
// * MonoView<>: A single channel contiguous buffer of samples.
//
// * InterleavedView<>: Channel samples are interleaved (side-by-side) in
// the buffer. A single channel InterleavedView<> is the same thing as a
// MonoView<>
//
// * DeinterleavedView<>: Each channel's samples are contiguous within the
// buffer. Channels can be enumerated and accessing the individual channel
// data is done via MonoView<>.
//
// The views are comparable to and built on rtc::ArrayView<> but add
// audio specific properties for the dimensions of the buffer and the above
// specialized [de]interleaved support.
//
// There are also a few generic utility functions that can simplify
// generic code for supporting more than one type of view.
// MonoView<> represents a view over a single contiguous, audio buffer. This
// can be either an single channel (mono) interleaved buffer (e.g. AudioFrame),
// or a de-interleaved channel (e.g. from AudioBuffer).
template <typename T>
using MonoView = rtc::ArrayView<T>;
// InterleavedView<> is a view over an interleaved audio buffer (e.g. from
// AudioFrame).
template <typename T>
class InterleavedView {
public:
using value_type = T;
InterleavedView() = default;
template <typename U>
InterleavedView(U* data, size_t samples_per_channel, size_t num_channels)
: num_channels_(num_channels),
samples_per_channel_(samples_per_channel),
data_(data, num_channels * samples_per_channel) {
RTC_DCHECK_LE(num_channels_, kMaxConcurrentChannels);
RTC_DCHECK(num_channels_ == 0u || samples_per_channel_ != 0u);
}
// Construct an InterleavedView from a C-style array. Samples per channels
// is calculated based on the array size / num_channels.
template <typename U, size_t N>
InterleavedView(U (&array)[N], // NOLINT
size_t num_channels)
: InterleavedView(array, N / num_channels, num_channels) {
RTC_DCHECK_EQ(N % num_channels, 0u);
}
template <typename U>
InterleavedView(const InterleavedView<U>& other)
: num_channels_(other.num_channels()),
samples_per_channel_(other.samples_per_channel()),
data_(other.data()) {}
size_t num_channels() const { return num_channels_; }
size_t samples_per_channel() const { return samples_per_channel_; }
rtc::ArrayView<T> data() const { return data_; }
bool empty() const { return data_.empty(); }
size_t size() const { return data_.size(); }
MonoView<T> AsMono() const {
RTC_DCHECK_EQ(num_channels(), 1u);
RTC_DCHECK_EQ(data_.size(), samples_per_channel_);
return data_;
}
// A simple wrapper around memcpy that includes checks for properties.
// TODO(tommi): Consider if this can be utility function for both interleaved
// and deinterleaved views.
template <typename U>
void CopyFrom(const InterleavedView<U>& source) {
static_assert(sizeof(T) == sizeof(U), "");
RTC_DCHECK_EQ(num_channels(), source.num_channels());
RTC_DCHECK_EQ(samples_per_channel(), source.samples_per_channel());
RTC_DCHECK_GE(data_.size(), source.data().size());
const auto data = source.data();
memcpy(&data_[0], &data[0], data.size() * sizeof(U));
}
T& operator[](size_t idx) const { return data_[idx]; }
T* begin() const { return data_.begin(); }
T* end() const { return data_.end(); }
const T* cbegin() const { return data_.cbegin(); }
const T* cend() const { return data_.cend(); }
std::reverse_iterator<T*> rbegin() const { return data_.rbegin(); }
std::reverse_iterator<T*> rend() const { return data_.rend(); }
std::reverse_iterator<const T*> crbegin() const { return data_.crbegin(); }
std::reverse_iterator<const T*> crend() const { return data_.crend(); }
private:
// TODO(tommi): Consider having these both be stored as uint16_t to
// save a few bytes per view. Use `dchecked_cast` to support size_t during
// construction.
size_t num_channels_ = 0u;
size_t samples_per_channel_ = 0u;
rtc::ArrayView<T> data_;
};
template <typename T>
class DeinterleavedView {
public:
using value_type = T;
DeinterleavedView() = default;
template <typename U>
DeinterleavedView(U* data, size_t samples_per_channel, size_t num_channels)
: num_channels_(num_channels),
samples_per_channel_(samples_per_channel),
data_(data, num_channels * samples_per_channel_) {}
template <typename U>
DeinterleavedView(const DeinterleavedView<U>& other)
: num_channels_(other.num_channels()),
samples_per_channel_(other.samples_per_channel()),
data_(other.data()) {}
// Returns a deinterleaved channel where `idx` is the zero based index,
// in the range [0 .. num_channels()-1].
MonoView<T> operator[](size_t idx) const {
RTC_DCHECK_LT(idx, num_channels_);
return MonoView<T>(&data_[idx * samples_per_channel_],
samples_per_channel_);
}
size_t num_channels() const { return num_channels_; }
size_t samples_per_channel() const { return samples_per_channel_; }
rtc::ArrayView<T> data() const { return data_; }
bool empty() const { return data_.empty(); }
size_t size() const { return data_.size(); }
// Returns the first (and possibly only) channel.
MonoView<T> AsMono() const {
RTC_DCHECK_GE(num_channels(), 1u);
return (*this)[0];
}
private:
// TODO(tommi): Consider having these be stored as uint16_t to save a few
// bytes per view. Use `dchecked_cast` to support size_t during construction.
size_t num_channels_ = 0u;
size_t samples_per_channel_ = 0u;
rtc::ArrayView<T> data_;
};
template <typename T>
constexpr size_t NumChannels(const MonoView<T>& view) {
return 1u;
}
template <typename T>
size_t NumChannels(const InterleavedView<T>& view) {
return view.num_channels();
}
template <typename T>
size_t NumChannels(const DeinterleavedView<T>& view) {
return view.num_channels();
}
template <typename T>
constexpr bool IsMono(const MonoView<T>& view) {
return true;
}
template <typename T>
constexpr bool IsInterleavedView(const MonoView<T>& view) {
return true;
}
template <typename T>
constexpr bool IsInterleavedView(const InterleavedView<T>& view) {
return true;
}
template <typename T>
constexpr bool IsInterleavedView(const DeinterleavedView<const T>& view) {
return false;
}
template <typename T>
bool IsMono(const InterleavedView<T>& view) {
return NumChannels(view) == 1u;
}
template <typename T>
bool IsMono(const DeinterleavedView<T>& view) {
return NumChannels(view) == 1u;
}
template <typename T>
size_t SamplesPerChannel(const MonoView<T>& view) {
return view.size();
}
template <typename T>
size_t SamplesPerChannel(const InterleavedView<T>& view) {
return view.samples_per_channel();
}
template <typename T>
size_t SamplesPerChannel(const DeinterleavedView<T>& view) {
return view.samples_per_channel();
}
// A simple wrapper around memcpy that includes checks for properties.
// The parameter order is the same as for memcpy(), first destination then
// source.
template <typename D, typename S>
void CopySamples(D& destination, const S& source) {
static_assert(
sizeof(typename D::value_type) == sizeof(typename S::value_type), "");
// Here we'd really like to do
// static_assert(IsInterleavedView(destination) == IsInterleavedView(source),
// "");
// but the compiler doesn't like it inside this template function for
// some reason. The following check is an approximation but unfortunately
// means that copying between a MonoView and single channel interleaved or
// deinterleaved views wouldn't work.
// static_assert(sizeof(destination) == sizeof(source),
// "Incompatible view types");
RTC_DCHECK_EQ(NumChannels(destination), NumChannels(source));
RTC_DCHECK_EQ(SamplesPerChannel(destination), SamplesPerChannel(source));
RTC_DCHECK_GE(destination.size(), source.size());
memcpy(&destination[0], &source[0],
source.size() * sizeof(typename S::value_type));
}
// Sets all the samples in a view to 0. This template function is a simple
// wrapper around `memset()` but adds the benefit of automatically calculating
// the byte size from the number of samples and sample type.
template <typename T>
void ClearSamples(T& view) {
memset(&view[0], 0, view.size() * sizeof(typename T::value_type));
}
// Same as `ClearSamples()` above but allows for clearing only the first
// `sample_count` number of samples.
template <typename T>
void ClearSamples(T& view, size_t sample_count) {
RTC_DCHECK_LE(sample_count, view.size());
memset(&view[0], 0, sample_count * sizeof(typename T::value_type));
}
} // namespace webrtc
#endif // API_AUDIO_AUDIO_VIEW_H_

View File

@ -0,0 +1,282 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/channel_layout.h"
#include <stddef.h>
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
static const int kLayoutToChannels[] = {
0, // CHANNEL_LAYOUT_NONE
0, // CHANNEL_LAYOUT_UNSUPPORTED
1, // CHANNEL_LAYOUT_MONO
2, // CHANNEL_LAYOUT_STEREO
3, // CHANNEL_LAYOUT_2_1
3, // CHANNEL_LAYOUT_SURROUND
4, // CHANNEL_LAYOUT_4_0
4, // CHANNEL_LAYOUT_2_2
4, // CHANNEL_LAYOUT_QUAD
5, // CHANNEL_LAYOUT_5_0
6, // CHANNEL_LAYOUT_5_1
5, // CHANNEL_LAYOUT_5_0_BACK
6, // CHANNEL_LAYOUT_5_1_BACK
7, // CHANNEL_LAYOUT_7_0
8, // CHANNEL_LAYOUT_7_1
8, // CHANNEL_LAYOUT_7_1_WIDE
2, // CHANNEL_LAYOUT_STEREO_DOWNMIX
3, // CHANNEL_LAYOUT_2POINT1
4, // CHANNEL_LAYOUT_3_1
5, // CHANNEL_LAYOUT_4_1
6, // CHANNEL_LAYOUT_6_0
6, // CHANNEL_LAYOUT_6_0_FRONT
6, // CHANNEL_LAYOUT_HEXAGONAL
7, // CHANNEL_LAYOUT_6_1
7, // CHANNEL_LAYOUT_6_1_BACK
7, // CHANNEL_LAYOUT_6_1_FRONT
7, // CHANNEL_LAYOUT_7_0_FRONT
8, // CHANNEL_LAYOUT_7_1_WIDE_BACK
8, // CHANNEL_LAYOUT_OCTAGONAL
0, // CHANNEL_LAYOUT_DISCRETE
3, // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
5, // CHANNEL_LAYOUT_4_1_QUAD_SIDE
0, // CHANNEL_LAYOUT_BITSTREAM
};
// The channel orderings for each layout as specified by FFmpeg. Each value
// represents the index of each channel in each layout. Values of -1 mean the
// channel at that index is not used for that layout. For example, the left side
// surround sound channel in FFmpeg's 5.1 layout is in the 5th position (because
// the order is L, R, C, LFE, LS, RS), so
// kChannelOrderings[CHANNEL_LAYOUT_5_1][SIDE_LEFT] = 4;
static const int kChannelOrderings[CHANNEL_LAYOUT_MAX + 1][CHANNELS_MAX + 1] = {
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
// CHANNEL_LAYOUT_NONE
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_UNSUPPORTED
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_MONO
{-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_STEREO
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_2_1
{0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1},
// CHANNEL_LAYOUT_SURROUND
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_4_0
{0, 1, 2, -1, -1, -1, -1, -1, 3, -1, -1},
// CHANNEL_LAYOUT_2_2
{0, 1, -1, -1, -1, -1, -1, -1, -1, 2, 3},
// CHANNEL_LAYOUT_QUAD
{0, 1, -1, -1, 2, 3, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_5_0
{0, 1, 2, -1, -1, -1, -1, -1, -1, 3, 4},
// CHANNEL_LAYOUT_5_1
{0, 1, 2, 3, -1, -1, -1, -1, -1, 4, 5},
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
// CHANNEL_LAYOUT_5_0_BACK
{0, 1, 2, -1, 3, 4, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_5_1_BACK
{0, 1, 2, 3, 4, 5, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_7_0
{0, 1, 2, -1, 5, 6, -1, -1, -1, 3, 4},
// CHANNEL_LAYOUT_7_1
{0, 1, 2, 3, 6, 7, -1, -1, -1, 4, 5},
// CHANNEL_LAYOUT_7_1_WIDE
{0, 1, 2, 3, -1, -1, 6, 7, -1, 4, 5},
// CHANNEL_LAYOUT_STEREO_DOWNMIX
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_2POINT1
{0, 1, -1, 2, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_3_1
{0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_4_1
{0, 1, 2, 4, -1, -1, -1, -1, 3, -1, -1},
// CHANNEL_LAYOUT_6_0
{0, 1, 2, -1, -1, -1, -1, -1, 5, 3, 4},
// CHANNEL_LAYOUT_6_0_FRONT
{0, 1, -1, -1, -1, -1, 4, 5, -1, 2, 3},
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
// CHANNEL_LAYOUT_HEXAGONAL
{0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1},
// CHANNEL_LAYOUT_6_1
{0, 1, 2, 3, -1, -1, -1, -1, 6, 4, 5},
// CHANNEL_LAYOUT_6_1_BACK
{0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1},
// CHANNEL_LAYOUT_6_1_FRONT
{0, 1, -1, 6, -1, -1, 4, 5, -1, 2, 3},
// CHANNEL_LAYOUT_7_0_FRONT
{0, 1, 2, -1, -1, -1, 5, 6, -1, 3, 4},
// CHANNEL_LAYOUT_7_1_WIDE_BACK
{0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1},
// CHANNEL_LAYOUT_OCTAGONAL
{0, 1, 2, -1, 5, 6, -1, -1, 7, 3, 4},
// CHANNEL_LAYOUT_DISCRETE
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_4_1_QUAD_SIDE
{0, 1, -1, 4, -1, -1, -1, -1, -1, 2, 3},
// CHANNEL_LAYOUT_BITSTREAM
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
};
int ChannelLayoutToChannelCount(ChannelLayout layout) {
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kLayoutToChannels));
RTC_DCHECK_LE(kLayoutToChannels[layout], kMaxConcurrentChannels);
return kLayoutToChannels[layout];
}
// Converts a channel count into a channel layout.
ChannelLayout GuessChannelLayout(int channels) {
switch (channels) {
case 1:
return CHANNEL_LAYOUT_MONO;
case 2:
return CHANNEL_LAYOUT_STEREO;
case 3:
return CHANNEL_LAYOUT_SURROUND;
case 4:
return CHANNEL_LAYOUT_QUAD;
case 5:
return CHANNEL_LAYOUT_5_0;
case 6:
return CHANNEL_LAYOUT_5_1;
case 7:
return CHANNEL_LAYOUT_6_1;
case 8:
return CHANNEL_LAYOUT_7_1;
default:
RTC_DLOG(LS_WARNING) << "Unsupported channel count: " << channels;
}
return CHANNEL_LAYOUT_UNSUPPORTED;
}
int ChannelOrder(ChannelLayout layout, Channels channel) {
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kChannelOrderings));
RTC_DCHECK_LT(static_cast<size_t>(channel), arraysize(kChannelOrderings[0]));
return kChannelOrderings[layout][channel];
}
const char* ChannelLayoutToString(ChannelLayout layout) {
switch (layout) {
case CHANNEL_LAYOUT_NONE:
return "NONE";
case CHANNEL_LAYOUT_UNSUPPORTED:
return "UNSUPPORTED";
case CHANNEL_LAYOUT_MONO:
return "MONO";
case CHANNEL_LAYOUT_STEREO:
return "STEREO";
case CHANNEL_LAYOUT_2_1:
return "2.1";
case CHANNEL_LAYOUT_SURROUND:
return "SURROUND";
case CHANNEL_LAYOUT_4_0:
return "4.0";
case CHANNEL_LAYOUT_2_2:
return "QUAD_SIDE";
case CHANNEL_LAYOUT_QUAD:
return "QUAD";
case CHANNEL_LAYOUT_5_0:
return "5.0";
case CHANNEL_LAYOUT_5_1:
return "5.1";
case CHANNEL_LAYOUT_5_0_BACK:
return "5.0_BACK";
case CHANNEL_LAYOUT_5_1_BACK:
return "5.1_BACK";
case CHANNEL_LAYOUT_7_0:
return "7.0";
case CHANNEL_LAYOUT_7_1:
return "7.1";
case CHANNEL_LAYOUT_7_1_WIDE:
return "7.1_WIDE";
case CHANNEL_LAYOUT_STEREO_DOWNMIX:
return "STEREO_DOWNMIX";
case CHANNEL_LAYOUT_2POINT1:
return "2POINT1";
case CHANNEL_LAYOUT_3_1:
return "3.1";
case CHANNEL_LAYOUT_4_1:
return "4.1";
case CHANNEL_LAYOUT_6_0:
return "6.0";
case CHANNEL_LAYOUT_6_0_FRONT:
return "6.0_FRONT";
case CHANNEL_LAYOUT_HEXAGONAL:
return "HEXAGONAL";
case CHANNEL_LAYOUT_6_1:
return "6.1";
case CHANNEL_LAYOUT_6_1_BACK:
return "6.1_BACK";
case CHANNEL_LAYOUT_6_1_FRONT:
return "6.1_FRONT";
case CHANNEL_LAYOUT_7_0_FRONT:
return "7.0_FRONT";
case CHANNEL_LAYOUT_7_1_WIDE_BACK:
return "7.1_WIDE_BACK";
case CHANNEL_LAYOUT_OCTAGONAL:
return "OCTAGONAL";
case CHANNEL_LAYOUT_DISCRETE:
return "DISCRETE";
case CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
return "STEREO_AND_KEYBOARD_MIC";
case CHANNEL_LAYOUT_4_1_QUAD_SIDE:
return "4.1_QUAD_SIDE";
case CHANNEL_LAYOUT_BITSTREAM:
return "BITSTREAM";
}
RTC_DCHECK_NOTREACHED() << "Invalid channel layout provided: " << layout;
return "";
}
} // namespace webrtc

View File

@ -0,0 +1,165 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_CHANNEL_LAYOUT_H_
#define API_AUDIO_CHANNEL_LAYOUT_H_
namespace webrtc {
// This file is derived from Chromium's base/channel_layout.h.
// Enumerates the various representations of the ordering of audio channels.
// Logged to UMA, so never reuse a value, always add new/greater ones!
enum ChannelLayout {
CHANNEL_LAYOUT_NONE = 0,
CHANNEL_LAYOUT_UNSUPPORTED = 1,
// Front C
CHANNEL_LAYOUT_MONO = 2,
// Front L, Front R
CHANNEL_LAYOUT_STEREO = 3,
// Front L, Front R, Back C
CHANNEL_LAYOUT_2_1 = 4,
// Front L, Front R, Front C
CHANNEL_LAYOUT_SURROUND = 5,
// Front L, Front R, Front C, Back C
CHANNEL_LAYOUT_4_0 = 6,
// Front L, Front R, Side L, Side R
CHANNEL_LAYOUT_2_2 = 7,
// Front L, Front R, Back L, Back R
CHANNEL_LAYOUT_QUAD = 8,
// Front L, Front R, Front C, Side L, Side R
CHANNEL_LAYOUT_5_0 = 9,
// Front L, Front R, Front C, LFE, Side L, Side R
CHANNEL_LAYOUT_5_1 = 10,
// Front L, Front R, Front C, Back L, Back R
CHANNEL_LAYOUT_5_0_BACK = 11,
// Front L, Front R, Front C, LFE, Back L, Back R
CHANNEL_LAYOUT_5_1_BACK = 12,
// Front L, Front R, Front C, Side L, Side R, Back L, Back R
CHANNEL_LAYOUT_7_0 = 13,
// Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R
CHANNEL_LAYOUT_7_1 = 14,
// Front L, Front R, Front C, LFE, Side L, Side R, Front LofC, Front RofC
CHANNEL_LAYOUT_7_1_WIDE = 15,
// Stereo L, Stereo R
CHANNEL_LAYOUT_STEREO_DOWNMIX = 16,
// Stereo L, Stereo R, LFE
CHANNEL_LAYOUT_2POINT1 = 17,
// Stereo L, Stereo R, Front C, LFE
CHANNEL_LAYOUT_3_1 = 18,
// Stereo L, Stereo R, Front C, Rear C, LFE
CHANNEL_LAYOUT_4_1 = 19,
// Stereo L, Stereo R, Front C, Side L, Side R, Back C
CHANNEL_LAYOUT_6_0 = 20,
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC
CHANNEL_LAYOUT_6_0_FRONT = 21,
// Stereo L, Stereo R, Front C, Rear L, Rear R, Rear C
CHANNEL_LAYOUT_HEXAGONAL = 22,
// Stereo L, Stereo R, Front C, LFE, Side L, Side R, Rear Center
CHANNEL_LAYOUT_6_1 = 23,
// Stereo L, Stereo R, Front C, LFE, Back L, Back R, Rear Center
CHANNEL_LAYOUT_6_1_BACK = 24,
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC, LFE
CHANNEL_LAYOUT_6_1_FRONT = 25,
// Front L, Front R, Front C, Side L, Side R, Front LofC, Front RofC
CHANNEL_LAYOUT_7_0_FRONT = 26,
// Front L, Front R, Front C, LFE, Back L, Back R, Front LofC, Front RofC
CHANNEL_LAYOUT_7_1_WIDE_BACK = 27,
// Front L, Front R, Front C, Side L, Side R, Rear L, Back R, Back C.
CHANNEL_LAYOUT_OCTAGONAL = 28,
// Channels are not explicitly mapped to speakers.
CHANNEL_LAYOUT_DISCRETE = 29,
// Front L, Front R, Front C. Front C contains the keyboard mic audio. This
// layout is only intended for input for WebRTC. The Front C channel
// is stripped away in the WebRTC audio input pipeline and never seen outside
// of that.
CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC = 30,
// Front L, Front R, Side L, Side R, LFE
CHANNEL_LAYOUT_4_1_QUAD_SIDE = 31,
// Actual channel layout is specified in the bitstream and the actual channel
// count is unknown at Chromium media pipeline level (useful for audio
// pass-through mode).
CHANNEL_LAYOUT_BITSTREAM = 32,
// Max value, must always equal the largest entry ever logged.
CHANNEL_LAYOUT_MAX = CHANNEL_LAYOUT_BITSTREAM
};
// Note: Do not reorder or reassign these values; other code depends on their
// ordering to operate correctly. E.g., CoreAudio channel layout computations.
enum Channels {
LEFT = 0,
RIGHT,
CENTER,
LFE,
BACK_LEFT,
BACK_RIGHT,
LEFT_OF_CENTER,
RIGHT_OF_CENTER,
BACK_CENTER,
SIDE_LEFT,
SIDE_RIGHT,
CHANNELS_MAX =
SIDE_RIGHT, // Must always equal the largest value ever logged.
};
// The maximum number of concurrently active channels for all possible layouts.
// ChannelLayoutToChannelCount() will never return a value higher than this.
constexpr int kMaxConcurrentChannels = 8;
// Returns the expected channel position in an interleaved stream. Values of -1
// mean the channel at that index is not used for that layout. Values range
// from 0 to ChannelLayoutToChannelCount(layout) - 1.
int ChannelOrder(ChannelLayout layout, Channels channel);
// Returns the number of channels in a given ChannelLayout.
int ChannelLayoutToChannelCount(ChannelLayout layout);
// Given the number of channels, return the best layout,
// or return CHANNEL_LAYOUT_UNSUPPORTED if there is no good match.
ChannelLayout GuessChannelLayout(int channels);
// Returns a string representation of the channel layout.
const char* ChannelLayoutToString(ChannelLayout layout);
} // namespace webrtc
#endif // API_AUDIO_CHANNEL_LAYOUT_H_

View File

@ -0,0 +1,278 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/echo_canceller3_config.h"
#include <algorithm>
#include <cmath>
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
bool Limit(float* value, float min, float max) {
float clamped = rtc::SafeClamp(*value, min, max);
clamped = std::isfinite(clamped) ? clamped : min;
bool res = *value == clamped;
*value = clamped;
return res;
}
bool Limit(size_t* value, size_t min, size_t max) {
size_t clamped = rtc::SafeClamp(*value, min, max);
bool res = *value == clamped;
*value = clamped;
return res;
}
bool Limit(int* value, int min, int max) {
int clamped = rtc::SafeClamp(*value, min, max);
bool res = *value == clamped;
*value = clamped;
return res;
}
bool FloorLimit(size_t* value, size_t min) {
size_t clamped = *value >= min ? *value : min;
bool res = *value == clamped;
*value = clamped;
return res;
}
} // namespace
EchoCanceller3Config::EchoCanceller3Config() = default;
EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) =
default;
EchoCanceller3Config& EchoCanceller3Config::operator=(
const EchoCanceller3Config& e) = default;
EchoCanceller3Config::Delay::Delay() = default;
EchoCanceller3Config::Delay::Delay(const EchoCanceller3Config::Delay& e) =
default;
EchoCanceller3Config::Delay& EchoCanceller3Config::Delay::operator=(
const Delay& e) = default;
EchoCanceller3Config::EchoModel::EchoModel() = default;
EchoCanceller3Config::EchoModel::EchoModel(
const EchoCanceller3Config::EchoModel& e) = default;
EchoCanceller3Config::EchoModel& EchoCanceller3Config::EchoModel::operator=(
const EchoModel& e) = default;
EchoCanceller3Config::Suppressor::Suppressor() = default;
EchoCanceller3Config::Suppressor::Suppressor(
const EchoCanceller3Config::Suppressor& e) = default;
EchoCanceller3Config::Suppressor& EchoCanceller3Config::Suppressor::operator=(
const Suppressor& e) = default;
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
float enr_transparent,
float enr_suppress,
float emr_transparent)
: enr_transparent(enr_transparent),
enr_suppress(enr_suppress),
emr_transparent(emr_transparent) {}
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
const EchoCanceller3Config::Suppressor::MaskingThresholds& e) = default;
EchoCanceller3Config::Suppressor::MaskingThresholds&
EchoCanceller3Config::Suppressor::MaskingThresholds::operator=(
const MaskingThresholds& e) = default;
EchoCanceller3Config::Suppressor::Tuning::Tuning(MaskingThresholds mask_lf,
MaskingThresholds mask_hf,
float max_inc_factor,
float max_dec_factor_lf)
: mask_lf(mask_lf),
mask_hf(mask_hf),
max_inc_factor(max_inc_factor),
max_dec_factor_lf(max_dec_factor_lf) {}
EchoCanceller3Config::Suppressor::Tuning::Tuning(
const EchoCanceller3Config::Suppressor::Tuning& e) = default;
EchoCanceller3Config::Suppressor::Tuning&
EchoCanceller3Config::Suppressor::Tuning::operator=(const Tuning& e) = default;
bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
RTC_DCHECK(config);
EchoCanceller3Config* c = config;
bool res = true;
if (c->delay.down_sampling_factor != 4 &&
c->delay.down_sampling_factor != 8) {
c->delay.down_sampling_factor = 4;
res = false;
}
res = res & Limit(&c->delay.default_delay, 0, 5000);
res = res & Limit(&c->delay.num_filters, 0, 5000);
res = res & Limit(&c->delay.delay_headroom_samples, 0, 5000);
res = res & Limit(&c->delay.hysteresis_limit_blocks, 0, 5000);
res = res & Limit(&c->delay.fixed_capture_delay_samples, 0, 5000);
res = res & Limit(&c->delay.delay_estimate_smoothing, 0.f, 1.f);
res = res & Limit(&c->delay.delay_candidate_detection_threshold, 0.f, 1.f);
res = res & Limit(&c->delay.delay_selection_thresholds.initial, 1, 250);
res = res & Limit(&c->delay.delay_selection_thresholds.converged, 1, 250);
res = res & FloorLimit(&c->filter.refined.length_blocks, 1);
res = res & Limit(&c->filter.refined.leakage_converged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined.leakage_diverged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined.error_floor, 0.f, 1000.f);
res = res & Limit(&c->filter.refined.error_ceil, 0.f, 100000000.f);
res = res & Limit(&c->filter.refined.noise_gate, 0.f, 100000000.f);
res = res & FloorLimit(&c->filter.refined_initial.length_blocks, 1);
res = res & Limit(&c->filter.refined_initial.leakage_converged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined_initial.leakage_diverged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined_initial.error_floor, 0.f, 1000.f);
res = res & Limit(&c->filter.refined_initial.error_ceil, 0.f, 100000000.f);
res = res & Limit(&c->filter.refined_initial.noise_gate, 0.f, 100000000.f);
if (c->filter.refined.length_blocks <
c->filter.refined_initial.length_blocks) {
c->filter.refined_initial.length_blocks = c->filter.refined.length_blocks;
res = false;
}
res = res & FloorLimit(&c->filter.coarse.length_blocks, 1);
res = res & Limit(&c->filter.coarse.rate, 0.f, 1.f);
res = res & Limit(&c->filter.coarse.noise_gate, 0.f, 100000000.f);
res = res & FloorLimit(&c->filter.coarse_initial.length_blocks, 1);
res = res & Limit(&c->filter.coarse_initial.rate, 0.f, 1.f);
res = res & Limit(&c->filter.coarse_initial.noise_gate, 0.f, 100000000.f);
if (c->filter.coarse.length_blocks < c->filter.coarse_initial.length_blocks) {
c->filter.coarse_initial.length_blocks = c->filter.coarse.length_blocks;
res = false;
}
res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 250000);
res = res & Limit(&c->erle.min, 1.f, 100000.f);
res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
res = res & Limit(&c->erle.max_h, 1.f, 100000.f);
if (c->erle.min > c->erle.max_l || c->erle.min > c->erle.max_h) {
c->erle.min = std::min(c->erle.max_l, c->erle.max_h);
res = false;
}
res = res & Limit(&c->erle.num_sections, 1, c->filter.refined.length_blocks);
res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f);
res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f);
res = res & Limit(&c->ep_strength.nearend_len, -1.0f, 1.0f);
res =
res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f);
res = res &
Limit(&c->echo_audibility.normal_render_limit, 0.f, 32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.floor_power, 0.f, 32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.audibility_threshold_lf, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.audibility_threshold_mf, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.audibility_threshold_hf, 0.f,
32768.f * 32768.f);
res = res &
Limit(&c->render_levels.active_render_limit, 0.f, 32768.f * 32768.f);
res = res & Limit(&c->render_levels.poor_excitation_render_limit, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->render_levels.poor_excitation_render_limit_ds8, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->echo_model.noise_floor_hold, 0, 1000);
res = res & Limit(&c->echo_model.min_noise_floor_power, 0, 2000000.f);
res = res & Limit(&c->echo_model.stationary_gate_slope, 0, 1000000.f);
res = res & Limit(&c->echo_model.noise_gate_power, 0, 1000000.f);
res = res & Limit(&c->echo_model.noise_gate_slope, 0, 1000000.f);
res = res & Limit(&c->echo_model.render_pre_window_size, 0, 100);
res = res & Limit(&c->echo_model.render_post_window_size, 0, 100);
res = res & Limit(&c->comfort_noise.noise_floor_dbfs, -200.f, 0.f);
res = res & Limit(&c->suppressor.nearend_average_blocks, 1, 5000);
res = res &
Limit(&c->suppressor.normal_tuning.mask_lf.enr_transparent, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_lf.enr_suppress, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_lf.emr_transparent, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_hf.enr_transparent, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_hf.enr_suppress, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_hf.emr_transparent, 0.f, 100.f);
res = res & Limit(&c->suppressor.normal_tuning.max_inc_factor, 0.f, 100.f);
res = res & Limit(&c->suppressor.normal_tuning.max_dec_factor_lf, 0.f, 100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.enr_transparent, 0.f,
100.f);
res = res &
Limit(&c->suppressor.nearend_tuning.mask_lf.enr_suppress, 0.f, 100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.emr_transparent, 0.f,
100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.enr_transparent, 0.f,
100.f);
res = res &
Limit(&c->suppressor.nearend_tuning.mask_hf.enr_suppress, 0.f, 100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.emr_transparent, 0.f,
100.f);
res = res & Limit(&c->suppressor.nearend_tuning.max_inc_factor, 0.f, 100.f);
res =
res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f);
res = res & Limit(&c->suppressor.last_permanent_lf_smoothing_band, 0, 64);
res = res & Limit(&c->suppressor.last_lf_smoothing_band, 0, 64);
res = res & Limit(&c->suppressor.last_lf_band, 0, 63);
res = res &
Limit(&c->suppressor.first_hf_band, c->suppressor.last_lf_band + 1, 64);
res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold,
0.f, 1000000.f);
res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold,
0.f, 1000000.f);
res = res & Limit(&c->suppressor.dominant_nearend_detection.hold_duration, 0,
10000);
res = res & Limit(&c->suppressor.dominant_nearend_detection.trigger_threshold,
0, 10000);
res = res &
Limit(&c->suppressor.subband_nearend_detection.nearend_average_blocks,
1, 1024);
res =
res & Limit(&c->suppressor.subband_nearend_detection.subband1.low, 0, 65);
res = res & Limit(&c->suppressor.subband_nearend_detection.subband1.high,
c->suppressor.subband_nearend_detection.subband1.low, 65);
res =
res & Limit(&c->suppressor.subband_nearend_detection.subband2.low, 0, 65);
res = res & Limit(&c->suppressor.subband_nearend_detection.subband2.high,
c->suppressor.subband_nearend_detection.subband2.low, 65);
res = res & Limit(&c->suppressor.subband_nearend_detection.nearend_threshold,
0.f, 1.e24f);
res = res & Limit(&c->suppressor.subband_nearend_detection.snr_threshold, 0.f,
1.e24f);
res = res & Limit(&c->suppressor.high_bands_suppression.enr_threshold, 0.f,
1000000.f);
res = res & Limit(&c->suppressor.high_bands_suppression.max_gain_during_echo,
0.f, 1.f);
res = res & Limit(&c->suppressor.high_bands_suppression
.anti_howling_activation_threshold,
0.f, 32768.f * 32768.f);
res = res & Limit(&c->suppressor.high_bands_suppression.anti_howling_gain,
0.f, 1.f);
res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f);
return res;
}
} // namespace webrtc

View File

@ -0,0 +1,250 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
#define API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
#include <stddef.h> // size_t
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
// Configuration struct for EchoCanceller3
struct RTC_EXPORT EchoCanceller3Config {
// Checks and updates the config parameters to lie within (mostly) reasonable
// ranges. Returns true if and only of the config did not need to be changed.
static bool Validate(EchoCanceller3Config* config);
EchoCanceller3Config();
EchoCanceller3Config(const EchoCanceller3Config& e);
EchoCanceller3Config& operator=(const EchoCanceller3Config& other);
struct Buffering {
size_t excess_render_detection_interval_blocks = 250;
size_t max_allowed_excess_render_blocks = 8;
} buffering;
struct Delay {
Delay();
Delay(const Delay& e);
Delay& operator=(const Delay& e);
size_t default_delay = 5;
size_t down_sampling_factor = 4;
size_t num_filters = 5;
size_t delay_headroom_samples = 32;
size_t hysteresis_limit_blocks = 1;
size_t fixed_capture_delay_samples = 0;
float delay_estimate_smoothing = 0.7f;
float delay_estimate_smoothing_delay_found = 0.7f;
float delay_candidate_detection_threshold = 0.2f;
struct DelaySelectionThresholds {
int initial;
int converged;
} delay_selection_thresholds = {5, 20};
bool use_external_delay_estimator = false;
bool log_warning_on_delay_changes = false;
struct AlignmentMixing {
bool downmix;
bool adaptive_selection;
float activity_power_threshold;
bool prefer_first_two_channels;
};
AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true};
AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false};
bool detect_pre_echo = true;
} delay;
struct Filter {
struct RefinedConfiguration {
size_t length_blocks;
float leakage_converged;
float leakage_diverged;
float error_floor;
float error_ceil;
float noise_gate;
};
struct CoarseConfiguration {
size_t length_blocks;
float rate;
float noise_gate;
};
RefinedConfiguration refined = {13, 0.00005f, 0.05f,
0.001f, 2.f, 20075344.f};
CoarseConfiguration coarse = {13, 0.7f, 20075344.f};
RefinedConfiguration refined_initial = {12, 0.005f, 0.5f,
0.001f, 2.f, 20075344.f};
CoarseConfiguration coarse_initial = {12, 0.9f, 20075344.f};
size_t config_change_duration_blocks = 250;
float initial_state_seconds = 2.5f;
int coarse_reset_hangover_blocks = 25;
bool conservative_initial_phase = false;
bool enable_coarse_filter_output_usage = true;
bool use_linear_filter = true;
bool high_pass_filter_echo_reference = false;
bool export_linear_aec_output = false;
} filter;
struct Erle {
float min = 1.f;
float max_l = 4.f;
float max_h = 1.5f;
bool onset_detection = true;
size_t num_sections = 1;
bool clamp_quality_estimate_to_zero = true;
bool clamp_quality_estimate_to_one = true;
} erle;
struct EpStrength {
float default_gain = 1.f;
float default_len = 0.83f;
float nearend_len = 0.83f;
bool echo_can_saturate = true;
bool bounded_erl = false;
bool erle_onset_compensation_in_dominant_nearend = false;
bool use_conservative_tail_frequency_response = true;
} ep_strength;
struct EchoAudibility {
float low_render_limit = 4 * 64.f;
float normal_render_limit = 64.f;
float floor_power = 2 * 64.f;
float audibility_threshold_lf = 10;
float audibility_threshold_mf = 10;
float audibility_threshold_hf = 10;
bool use_stationarity_properties = false;
bool use_stationarity_properties_at_init = false;
} echo_audibility;
struct RenderLevels {
float active_render_limit = 100.f;
float poor_excitation_render_limit = 150.f;
float poor_excitation_render_limit_ds8 = 20.f;
float render_power_gain_db = 0.f;
} render_levels;
struct EchoRemovalControl {
bool has_clock_drift = false;
bool linear_and_stable_echo_path = false;
} echo_removal_control;
struct EchoModel {
EchoModel();
EchoModel(const EchoModel& e);
EchoModel& operator=(const EchoModel& e);
size_t noise_floor_hold = 50;
float min_noise_floor_power = 1638400.f;
float stationary_gate_slope = 10.f;
float noise_gate_power = 27509.42f;
float noise_gate_slope = 0.3f;
size_t render_pre_window_size = 1;
size_t render_post_window_size = 1;
bool model_reverb_in_nonlinear_mode = true;
} echo_model;
struct ComfortNoise {
float noise_floor_dbfs = -96.03406f;
} comfort_noise;
struct Suppressor {
Suppressor();
Suppressor(const Suppressor& e);
Suppressor& operator=(const Suppressor& e);
size_t nearend_average_blocks = 4;
struct MaskingThresholds {
MaskingThresholds(float enr_transparent,
float enr_suppress,
float emr_transparent);
MaskingThresholds(const MaskingThresholds& e);
MaskingThresholds& operator=(const MaskingThresholds& e);
float enr_transparent;
float enr_suppress;
float emr_transparent;
};
struct Tuning {
Tuning(MaskingThresholds mask_lf,
MaskingThresholds mask_hf,
float max_inc_factor,
float max_dec_factor_lf);
Tuning(const Tuning& e);
Tuning& operator=(const Tuning& e);
MaskingThresholds mask_lf;
MaskingThresholds mask_hf;
float max_inc_factor;
float max_dec_factor_lf;
};
Tuning normal_tuning = Tuning(MaskingThresholds(.3f, .4f, .3f),
MaskingThresholds(.07f, .1f, .3f),
2.0f,
0.25f);
Tuning nearend_tuning = Tuning(MaskingThresholds(1.09f, 1.1f, .3f),
MaskingThresholds(.1f, .3f, .3f),
2.0f,
0.25f);
bool lf_smoothing_during_initial_phase = true;
int last_permanent_lf_smoothing_band = 0;
int last_lf_smoothing_band = 5;
int last_lf_band = 5;
int first_hf_band = 8;
struct DominantNearendDetection {
float enr_threshold = .25f;
float enr_exit_threshold = 10.f;
float snr_threshold = 30.f;
int hold_duration = 50;
int trigger_threshold = 12;
bool use_during_initial_phase = true;
bool use_unbounded_echo_spectrum = true;
} dominant_nearend_detection;
struct SubbandNearendDetection {
size_t nearend_average_blocks = 1;
struct SubbandRegion {
size_t low;
size_t high;
};
SubbandRegion subband1 = {1, 1};
SubbandRegion subband2 = {1, 1};
float nearend_threshold = 1.f;
float snr_threshold = 1.f;
} subband_nearend_detection;
bool use_subband_nearend_detection = false;
struct HighBandsSuppression {
float enr_threshold = 1.f;
float max_gain_during_echo = 1.f;
float anti_howling_activation_threshold = 400.f;
float anti_howling_gain = 1.f;
} high_bands_suppression;
float floor_first_increase = 0.00001f;
bool conservative_hf_suppression = false;
} suppressor;
struct MultiChannel {
bool detect_stereo_content = true;
float stereo_detection_threshold = 0.0f;
int stereo_detection_timeout_threshold_seconds = 300;
float stereo_detection_hysteresis_seconds = 2.0f;
} multi_channel;
};
} // namespace webrtc
#endif // API_AUDIO_ECHO_CANCELLER3_CONFIG_H_

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_ECHO_CONTROL_H_
#define API_AUDIO_ECHO_CONTROL_H_
#include <memory>
#include "rtc_base/checks.h"
namespace webrtc {
class AudioBuffer;
// Interface for an acoustic echo cancellation (AEC) submodule.
class EchoControl {
public:
// Analysis (not changing) of the render signal.
virtual void AnalyzeRender(AudioBuffer* render) = 0;
// Analysis (not changing) of the capture signal.
virtual void AnalyzeCapture(AudioBuffer* capture) = 0;
// Processes the capture signal in order to remove the echo.
virtual void ProcessCapture(AudioBuffer* capture, bool level_change) = 0;
// As above, but also returns the linear filter output.
virtual void ProcessCapture(AudioBuffer* capture,
AudioBuffer* linear_output,
bool level_change) = 0;
struct Metrics {
double echo_return_loss;
double echo_return_loss_enhancement;
int delay_ms;
};
// Collect current metrics from the echo controller.
virtual Metrics GetMetrics() const = 0;
// Provides an optional external estimate of the audio buffer delay.
virtual void SetAudioBufferDelay(int delay_ms) = 0;
// Specifies whether the capture output will be used. The purpose of this is
// to allow the echo controller to deactivate some of the processing when the
// resulting output is anyway not used, for instance when the endpoint is
// muted.
// TODO(b/177830919): Make pure virtual.
virtual void SetCaptureOutputUsage(bool capture_output_used) {}
// Returns wheter the signal is altered.
virtual bool ActiveProcessing() const = 0;
virtual ~EchoControl() {}
};
// Interface for a factory that creates EchoControllers.
class EchoControlFactory {
public:
virtual std::unique_ptr<EchoControl> Create(int sample_rate_hz,
int num_render_channels,
int num_capture_channels) = 0;
virtual ~EchoControlFactory() = default;
};
} // namespace webrtc
#endif // API_AUDIO_ECHO_CONTROL_H_