FaceAccess/VocieProcess/modules/audio_processing/vad/voice_activity_detector.cc

/*
 *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_processing/vad/voice_activity_detector.h"

#include <algorithm>

#include "rtc_base/checks.h"

namespace webrtc {
namespace {

const size_t kNumChannels = 1;

const double kDefaultVoiceValue = 1.0;
const double kNeutralProbability = 0.5;
const double kLowProbability = 0.01;

}  // namespace

VoiceActivityDetector::VoiceActivityDetector()
    : last_voice_probability_(kDefaultVoiceValue),
      standalone_vad_(StandaloneVad::Create()) {}

VoiceActivityDetector::~VoiceActivityDetector() = default;

// Because ISAC has a different chunk length, it updates
// `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data.
// Otherwise it clears them.
void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
                                         size_t length,
                                         int sample_rate_hz) {
  RTC_DCHECK_EQ(length, sample_rate_hz / 100);
  // TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio.
  // Resample to the required rate.
  const int16_t* resampled_ptr = audio;
  if (sample_rate_hz != kSampleRateHz) {
    RTC_CHECK_EQ(
        resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
        0);
    resampler_.Push(audio, length, resampled_, kLength10Ms, length);
    resampled_ptr = resampled_;
  }
  RTC_DCHECK_EQ(length, kLength10Ms);

  // Each chunk needs to be passed into `standalone_vad_`, because internally it
  // buffers the audio and processes it all at once when GetActivity() is
  // called.
  RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);

  audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);

  chunkwise_voice_probabilities_.resize(features_.num_frames);
  chunkwise_rms_.resize(features_.num_frames);
  std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
            chunkwise_rms_.begin());
  if (features_.num_frames > 0) {
    if (features_.silence) {
      // The other features are invalid, so set the voice probabilities to an
      // arbitrary low value.
      std::fill(chunkwise_voice_probabilities_.begin(),
                chunkwise_voice_probabilities_.end(), kLowProbability);
    } else {
      std::fill(chunkwise_voice_probabilities_.begin(),
                chunkwise_voice_probabilities_.end(), kNeutralProbability);
      RTC_CHECK_GE(
          standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
                                       chunkwise_voice_probabilities_.size()),
          0);
      RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
                       features_, &chunkwise_voice_probabilities_[0]),
                   0);
    }
    last_voice_probability_ = chunkwise_voice_probabilities_.back();
  }
}

}  // namespace webrtc
add vad code. 2024-09-06 18:26:45 +08:00			`/*`
			`* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.`
			`*`
			`* Use of this source code is governed by a BSD-style license`
			`* that can be found in the LICENSE file in the root of the source`
			`* tree. An additional intellectual property rights grant can be found`
			`* in the file PATENTS. All contributing project authors may`
			`* be found in the AUTHORS file in the root of the source tree.`
			`*/`

			`#include "modules/audio_processing/vad/voice_activity_detector.h"`

			`#include <algorithm>`

			`#include "rtc_base/checks.h"`

			`namespace webrtc {`
			`namespace {`

			`const size_t kNumChannels = 1;`

			`const double kDefaultVoiceValue = 1.0;`
			`const double kNeutralProbability = 0.5;`
			`const double kLowProbability = 0.01;`

			`} // namespace`

			`VoiceActivityDetector::VoiceActivityDetector()`
			`: last_voice_probability_(kDefaultVoiceValue),`
			`standalone_vad_(StandaloneVad::Create()) {}`

			`VoiceActivityDetector::~VoiceActivityDetector() = default;`

			`// Because ISAC has a different chunk length, it updates`
			// `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data.
			`// Otherwise it clears them.`
			`void VoiceActivityDetector::ProcessChunk(const int16_t* audio,`
			`size_t length,`
			`int sample_rate_hz) {`
			`RTC_DCHECK_EQ(length, sample_rate_hz / 100);`
			`// TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio.`
			`// Resample to the required rate.`
			`const int16_t* resampled_ptr = audio;`
			`if (sample_rate_hz != kSampleRateHz) {`
			`RTC_CHECK_EQ(`
			`resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),`
			`0);`
			`resampler_.Push(audio, length, resampled_, kLength10Ms, length);`
			`resampled_ptr = resampled_;`
			`}`
			`RTC_DCHECK_EQ(length, kLength10Ms);`

			// Each chunk needs to be passed into `standalone_vad_`, because internally it
			`// buffers the audio and processes it all at once when GetActivity() is`
			`// called.`
			`RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);`

			`audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);`

			`chunkwise_voice_probabilities_.resize(features_.num_frames);`
			`chunkwise_rms_.resize(features_.num_frames);`
			`std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),`
			`chunkwise_rms_.begin());`
			`if (features_.num_frames > 0) {`
			`if (features_.silence) {`
			`// The other features are invalid, so set the voice probabilities to an`
			`// arbitrary low value.`
			`std::fill(chunkwise_voice_probabilities_.begin(),`
			`chunkwise_voice_probabilities_.end(), kLowProbability);`
			`} else {`
			`std::fill(chunkwise_voice_probabilities_.begin(),`
			`chunkwise_voice_probabilities_.end(), kNeutralProbability);`
			`RTC_CHECK_GE(`
			`standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],`
			`chunkwise_voice_probabilities_.size()),`
			`0);`
			`RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(`
			`features_, &chunkwise_voice_probabilities_[0]),`
			`0);`
			`}`
			`last_voice_probability_ = chunkwise_voice_probabilities_.back();`
			`}`
			`}`

			`} // namespace webrtc`