add vad code.

This commit is contained in:
luocai
2024-09-06 18:26:45 +08:00
parent 35bf68338f
commit 2bed1dacf2
93 changed files with 12362 additions and 2 deletions

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/include/aec_dump.h"
namespace webrtc {
InternalAPMConfig::InternalAPMConfig() = default;
InternalAPMConfig::InternalAPMConfig(const InternalAPMConfig&) = default;
InternalAPMConfig::InternalAPMConfig(InternalAPMConfig&&) = default;
InternalAPMConfig& InternalAPMConfig::operator=(const InternalAPMConfig&) =
default;
bool InternalAPMConfig::operator==(const InternalAPMConfig& other) const {
return aec_enabled == other.aec_enabled &&
aec_delay_agnostic_enabled == other.aec_delay_agnostic_enabled &&
aec_drift_compensation_enabled ==
other.aec_drift_compensation_enabled &&
aec_extended_filter_enabled == other.aec_extended_filter_enabled &&
aec_suppression_level == other.aec_suppression_level &&
aecm_enabled == other.aecm_enabled &&
aecm_comfort_noise_enabled == other.aecm_comfort_noise_enabled &&
aecm_routing_mode == other.aecm_routing_mode &&
agc_enabled == other.agc_enabled && agc_mode == other.agc_mode &&
agc_limiter_enabled == other.agc_limiter_enabled &&
hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled &&
ns_level == other.ns_level &&
transient_suppression_enabled == other.transient_suppression_enabled &&
noise_robust_agc_enabled == other.noise_robust_agc_enabled &&
pre_amplifier_enabled == other.pre_amplifier_enabled &&
pre_amplifier_fixed_gain_factor ==
other.pre_amplifier_fixed_gain_factor &&
experiments_description == other.experiments_description;
}
} // namespace webrtc

View File

@ -0,0 +1,116 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_
#define MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_
#include <stdint.h>
#include <string>
#include "absl/base/attributes.h"
#include "absl/types/optional.h"
#include "api/audio/audio_processing.h"
#include "modules/audio_processing/include/audio_frame_view.h"
namespace webrtc {
// Struct for passing current config from APM without having to
// include protobuf headers.
struct InternalAPMConfig {
InternalAPMConfig();
InternalAPMConfig(const InternalAPMConfig&);
InternalAPMConfig(InternalAPMConfig&&);
InternalAPMConfig& operator=(const InternalAPMConfig&);
InternalAPMConfig& operator=(InternalAPMConfig&&) = delete;
bool operator==(const InternalAPMConfig& other) const;
bool aec_enabled = false;
bool aec_delay_agnostic_enabled = false;
bool aec_drift_compensation_enabled = false;
bool aec_extended_filter_enabled = false;
int aec_suppression_level = 0;
bool aecm_enabled = false;
bool aecm_comfort_noise_enabled = false;
int aecm_routing_mode = 0;
bool agc_enabled = false;
int agc_mode = 0;
bool agc_limiter_enabled = false;
bool hpf_enabled = false;
bool ns_enabled = false;
int ns_level = 0;
bool transient_suppression_enabled = false;
bool noise_robust_agc_enabled = false;
bool pre_amplifier_enabled = false;
float pre_amplifier_fixed_gain_factor = 1.f;
std::string experiments_description = "";
};
// An interface for recording configuration and input/output streams
// of the Audio Processing Module. The recordings are called
// 'aec-dumps' and are stored in a protobuf format defined in
// debug.proto.
// The Write* methods are always safe to call concurrently or
// otherwise for all implementing subclasses. The intended mode of
// operation is to create a protobuf object from the input, and send
// it away to be written to file asynchronously.
class AecDump {
public:
struct AudioProcessingState {
int delay;
int drift;
absl::optional<int> applied_input_volume;
bool keypress;
};
virtual ~AecDump() = default;
// Logs Event::Type INIT message.
virtual void WriteInitMessage(const ProcessingConfig& api_format,
int64_t time_now_ms) = 0;
ABSL_DEPRECATED("")
void WriteInitMessage(const ProcessingConfig& api_format) {
WriteInitMessage(api_format, 0);
}
// Logs Event::Type STREAM message. To log an input/output pair,
// call the AddCapture* and AddAudioProcessingState methods followed
// by a WriteCaptureStreamMessage call.
virtual void AddCaptureStreamInput(
const AudioFrameView<const float>& src) = 0;
virtual void AddCaptureStreamOutput(
const AudioFrameView<const float>& src) = 0;
virtual void AddCaptureStreamInput(const int16_t* const data,
int num_channels,
int samples_per_channel) = 0;
virtual void AddCaptureStreamOutput(const int16_t* const data,
int num_channels,
int samples_per_channel) = 0;
virtual void AddAudioProcessingState(const AudioProcessingState& state) = 0;
virtual void WriteCaptureStreamMessage() = 0;
// Logs Event::Type REVERSE_STREAM message.
virtual void WriteRenderStreamMessage(const int16_t* const data,
int num_channels,
int samples_per_channel) = 0;
virtual void WriteRenderStreamMessage(
const AudioFrameView<const float>& src) = 0;
virtual void WriteRuntimeSetting(
const AudioProcessing::RuntimeSetting& runtime_setting) = 0;
// Logs Event::Type CONFIG message.
virtual void WriteConfig(const InternalAPMConfig& config) = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/include/audio_frame_proxies.h"
#include "api/audio/audio_frame.h"
#include "api/audio/audio_processing.h"
namespace webrtc {
int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
if (!frame || !ap) {
return AudioProcessing::Error::kNullPointerError;
}
StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_);
StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_);
RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames());
int result = ap->ProcessStream(frame->data(), input_config, output_config,
frame->mutable_data());
AudioProcessingStats stats = ap->GetStatistics();
if (stats.voice_detected) {
frame->vad_activity_ = *stats.voice_detected
? AudioFrame::VADActivity::kVadActive
: AudioFrame::VADActivity::kVadPassive;
}
return result;
}
int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
if (!frame || !ap) {
return AudioProcessing::Error::kNullPointerError;
}
// Must be a native rate.
if (frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate8kHz &&
frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate16kHz &&
frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate32kHz &&
frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate48kHz) {
return AudioProcessing::Error::kBadSampleRateError;
}
if (frame->num_channels_ <= 0) {
return AudioProcessing::Error::kBadNumberChannelsError;
}
StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_);
StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_);
int result = ap->ProcessReverseStream(frame->data(), input_config,
output_config, frame->mutable_data());
return result;
}
} // namespace webrtc

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
namespace webrtc {
class AudioFrame;
class AudioProcessing;
// Processes a 10 ms `frame` of the primary audio stream using the provided
// AudioProcessing object. On the client-side, this is the near-end (or
// captured) audio. The `sample_rate_hz_`, `num_channels_`, and
// `samples_per_channel_` members of `frame` must be valid. If changed from the
// previous call to this function, it will trigger an initialization of the
// provided AudioProcessing object.
// The function returns any error codes passed from the AudioProcessing
// ProcessStream method.
int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame);
// Processes a 10 ms `frame` of the reverse direction audio stream using the
// provided AudioProcessing object. The frame may be modified. On the
// client-side, this is the far-end (or to be rendered) audio. The
// `sample_rate_hz_`, `num_channels_`, and `samples_per_channel_` members of
// `frame` must be valid. If changed from the previous call to this function, it
// will trigger an initialization of the provided AudioProcessing object.
// The function returns any error codes passed from the AudioProcessing
// ProcessReverseStream method.
int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame);
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_
#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_
#include "api/audio/audio_view.h"
namespace webrtc {
// Class to pass audio data in T** format, where T is a numeric type.
template <class T>
class AudioFrameView {
public:
// `num_channels` and `channel_size` describe the T**
// `audio_samples`. `audio_samples` is assumed to point to a
// two-dimensional |num_channels * channel_size| array of floats.
//
// Note: The implementation now only requires the first channel pointer.
// The previous implementation retained a pointer to externally owned array
// of channel pointers, but since the channel size and count are provided
// and the array is assumed to be a single two-dimensional array, the other
// channel pointers can be calculated based on that (which is what the class
// now uses `DeinterleavedView<>` internally for).
AudioFrameView(T* const* audio_samples, int num_channels, int channel_size)
: view_(num_channels && channel_size ? audio_samples[0] : nullptr,
channel_size,
num_channels) {
RTC_DCHECK_GE(view_.num_channels(), 0);
RTC_DCHECK_GE(view_.samples_per_channel(), 0);
}
// Implicit cast to allow converting AudioFrameView<float> to
// AudioFrameView<const float>.
template <class U>
AudioFrameView(AudioFrameView<U> other) : view_(other.view()) {}
// Allow constructing AudioFrameView from a DeinterleavedView.
template <class U>
explicit AudioFrameView(DeinterleavedView<U> view) : view_(view) {}
AudioFrameView() = delete;
int num_channels() const { return view_.num_channels(); }
int samples_per_channel() const { return view_.samples_per_channel(); }
MonoView<T> channel(int idx) { return view_[idx]; }
MonoView<const T> channel(int idx) const { return view_[idx]; }
MonoView<T> operator[](int idx) { return view_[idx]; }
MonoView<const T> operator[](int idx) const { return view_[idx]; }
DeinterleavedView<T> view() { return view_; }
DeinterleavedView<const T> view() const { return view_; }
private:
DeinterleavedView<T> view_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_