update code.

2024-09-05 09:59:28 +08:00
parent 4f3dc015f7
commit ccf69909d6
223 changed files with 36168 additions and 0 deletions
--- a/VocieProcess/api/audio/audio_processing.cc
+++ b/VocieProcess/api/audio/audio_processing.cc
@ -0,0 +1,211 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "api/audio/audio_processing.h"
+#include <string>
+
+#include "rtc_base/checks.h"
+#include "rtc_base/strings/string_builder.h"
+
+namespace webrtc {
+namespace {
+
+using Agc1Config = AudioProcessing::Config::GainController1;
+using Agc2Config = AudioProcessing::Config::GainController2;
+
+std::string NoiseSuppressionLevelToString(
+    const AudioProcessing::Config::NoiseSuppression::Level& level) {
+  switch (level) {
+    case AudioProcessing::Config::NoiseSuppression::Level::kLow:
+      return "Low";
+    case AudioProcessing::Config::NoiseSuppression::Level::kModerate:
+      return "Moderate";
+    case AudioProcessing::Config::NoiseSuppression::Level::kHigh:
+      return "High";
+    case AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh:
+      return "VeryHigh";
+  }
+  RTC_CHECK_NOTREACHED();
+}
+
+std::string GainController1ModeToString(const Agc1Config::Mode& mode) {
+  switch (mode) {
+    case Agc1Config::Mode::kAdaptiveAnalog:
+      return "AdaptiveAnalog";
+    case Agc1Config::Mode::kAdaptiveDigital:
+      return "AdaptiveDigital";
+    case Agc1Config::Mode::kFixedDigital:
+      return "FixedDigital";
+  }
+  RTC_CHECK_NOTREACHED();
+}
+
+}  // namespace
+
+constexpr int AudioProcessing::kNativeSampleRatesHz[];
+
+void CustomProcessing::SetRuntimeSetting(
+    AudioProcessing::RuntimeSetting setting) {}
+
+bool Agc1Config::operator==(const Agc1Config& rhs) const {
+  const auto& analog_lhs = analog_gain_controller;
+  const auto& analog_rhs = rhs.analog_gain_controller;
+  return enabled == rhs.enabled && mode == rhs.mode &&
+         target_level_dbfs == rhs.target_level_dbfs &&
+         compression_gain_db == rhs.compression_gain_db &&
+         enable_limiter == rhs.enable_limiter &&
+         analog_lhs.enabled == analog_rhs.enabled &&
+         analog_lhs.startup_min_volume == analog_rhs.startup_min_volume &&
+         analog_lhs.clipped_level_min == analog_rhs.clipped_level_min &&
+         analog_lhs.enable_digital_adaptive ==
+             analog_rhs.enable_digital_adaptive &&
+         analog_lhs.clipped_level_step == analog_rhs.clipped_level_step &&
+         analog_lhs.clipped_ratio_threshold ==
+             analog_rhs.clipped_ratio_threshold &&
+         analog_lhs.clipped_wait_frames == analog_rhs.clipped_wait_frames &&
+         analog_lhs.clipping_predictor.mode ==
+             analog_rhs.clipping_predictor.mode &&
+         analog_lhs.clipping_predictor.window_length ==
+             analog_rhs.clipping_predictor.window_length &&
+         analog_lhs.clipping_predictor.reference_window_length ==
+             analog_rhs.clipping_predictor.reference_window_length &&
+         analog_lhs.clipping_predictor.reference_window_delay ==
+             analog_rhs.clipping_predictor.reference_window_delay &&
+         analog_lhs.clipping_predictor.clipping_threshold ==
+             analog_rhs.clipping_predictor.clipping_threshold &&
+         analog_lhs.clipping_predictor.crest_factor_margin ==
+             analog_rhs.clipping_predictor.crest_factor_margin &&
+         analog_lhs.clipping_predictor.use_predicted_step ==
+             analog_rhs.clipping_predictor.use_predicted_step;
+}
+
+bool Agc2Config::AdaptiveDigital::operator==(
+    const Agc2Config::AdaptiveDigital& rhs) const {
+  return enabled == rhs.enabled && headroom_db == rhs.headroom_db &&
+         max_gain_db == rhs.max_gain_db &&
+         initial_gain_db == rhs.initial_gain_db &&
+         max_gain_change_db_per_second == rhs.max_gain_change_db_per_second &&
+         max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs;
+}
+
+bool Agc2Config::InputVolumeController::operator==(
+    const Agc2Config::InputVolumeController& rhs) const {
+  return enabled == rhs.enabled;
+}
+
+bool Agc2Config::operator==(const Agc2Config& rhs) const {
+  return enabled == rhs.enabled &&
+         fixed_digital.gain_db == rhs.fixed_digital.gain_db &&
+         adaptive_digital == rhs.adaptive_digital &&
+         input_volume_controller == rhs.input_volume_controller;
+}
+
+bool AudioProcessing::Config::CaptureLevelAdjustment::operator==(
+    const AudioProcessing::Config::CaptureLevelAdjustment& rhs) const {
+  return enabled == rhs.enabled && pre_gain_factor == rhs.pre_gain_factor &&
+         post_gain_factor == rhs.post_gain_factor &&
+         analog_mic_gain_emulation == rhs.analog_mic_gain_emulation;
+}
+
+bool AudioProcessing::Config::CaptureLevelAdjustment::AnalogMicGainEmulation::
+operator==(const AudioProcessing::Config::CaptureLevelAdjustment::
+               AnalogMicGainEmulation& rhs) const {
+  return enabled == rhs.enabled && initial_level == rhs.initial_level;
+}
+
+std::string AudioProcessing::Config::ToString() const {
+  char buf[2048];
+  rtc::SimpleStringBuilder builder(buf);
+  builder << "AudioProcessing::Config{ "
+             "pipeline: { "
+             "maximum_internal_processing_rate: "
+          << pipeline.maximum_internal_processing_rate
+          << ", multi_channel_render: " << pipeline.multi_channel_render
+          << ", multi_channel_capture: " << pipeline.multi_channel_capture
+          << " }, pre_amplifier: { enabled: " << pre_amplifier.enabled
+          << ", fixed_gain_factor: " << pre_amplifier.fixed_gain_factor
+          << " },capture_level_adjustment: { enabled: "
+          << capture_level_adjustment.enabled
+          << ", pre_gain_factor: " << capture_level_adjustment.pre_gain_factor
+          << ", post_gain_factor: " << capture_level_adjustment.post_gain_factor
+          << ", analog_mic_gain_emulation: { enabled: "
+          << capture_level_adjustment.analog_mic_gain_emulation.enabled
+          << ", initial_level: "
+          << capture_level_adjustment.analog_mic_gain_emulation.initial_level
+          << " }}, high_pass_filter: { enabled: " << high_pass_filter.enabled
+          << " }, echo_canceller: { enabled: " << echo_canceller.enabled
+          << ", mobile_mode: " << echo_canceller.mobile_mode
+          << ", enforce_high_pass_filtering: "
+          << echo_canceller.enforce_high_pass_filtering
+          << " }, noise_suppression: { enabled: " << noise_suppression.enabled
+          << ", level: "
+          << NoiseSuppressionLevelToString(noise_suppression.level)
+          << " }, transient_suppression: { enabled: "
+          << transient_suppression.enabled
+          << " }, gain_controller1: { enabled: " << gain_controller1.enabled
+          << ", mode: " << GainController1ModeToString(gain_controller1.mode)
+          << ", target_level_dbfs: " << gain_controller1.target_level_dbfs
+          << ", compression_gain_db: " << gain_controller1.compression_gain_db
+          << ", enable_limiter: " << gain_controller1.enable_limiter
+          << ", analog_gain_controller { enabled: "
+          << gain_controller1.analog_gain_controller.enabled
+          << ", startup_min_volume: "
+          << gain_controller1.analog_gain_controller.startup_min_volume
+          << ", clipped_level_min: "
+          << gain_controller1.analog_gain_controller.clipped_level_min
+          << ", enable_digital_adaptive: "
+          << gain_controller1.analog_gain_controller.enable_digital_adaptive
+          << ", clipped_level_step: "
+          << gain_controller1.analog_gain_controller.clipped_level_step
+          << ", clipped_ratio_threshold: "
+          << gain_controller1.analog_gain_controller.clipped_ratio_threshold
+          << ", clipped_wait_frames: "
+          << gain_controller1.analog_gain_controller.clipped_wait_frames
+          << ", clipping_predictor:  { enabled: "
+          << gain_controller1.analog_gain_controller.clipping_predictor.enabled
+          << ", mode: "
+          << gain_controller1.analog_gain_controller.clipping_predictor.mode
+          << ", window_length: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .window_length
+          << ", reference_window_length: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .reference_window_length
+          << ", reference_window_delay: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .reference_window_delay
+          << ", clipping_threshold: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .clipping_threshold
+          << ", crest_factor_margin: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .crest_factor_margin
+          << ", use_predicted_step: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .use_predicted_step
+          << " }}}, gain_controller2: { enabled: " << gain_controller2.enabled
+          << ", fixed_digital: { gain_db: "
+          << gain_controller2.fixed_digital.gain_db
+          << " }, adaptive_digital: { enabled: "
+          << gain_controller2.adaptive_digital.enabled
+          << ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
+          << ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
+          << ", initial_gain_db: "
+          << gain_controller2.adaptive_digital.initial_gain_db
+          << ", max_gain_change_db_per_second: "
+          << gain_controller2.adaptive_digital.max_gain_change_db_per_second
+          << ", max_output_noise_level_dbfs: "
+          << gain_controller2.adaptive_digital.max_output_noise_level_dbfs
+          << " }, input_volume_control : { enabled "
+          << gain_controller2.input_volume_controller.enabled << "}}";
+  return builder.str();
+}
+
+}  // namespace webrtc
--- a/VocieProcess/api/audio/audio_processing.h
+++ b/VocieProcess/api/audio/audio_processing.h
@ -0,0 +1,944 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_AUDIO_AUDIO_PROCESSING_H_
+#define API_AUDIO_AUDIO_PROCESSING_H_
+
+// MSVC++ requires this to be set before any other includes to get M_PI.
+#ifndef _USE_MATH_DEFINES
+#define _USE_MATH_DEFINES
+#endif
+
+#include <math.h>
+#include <stddef.h>  // size_t
+#include <stdio.h>   // FILE
+#include <string.h>
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "absl/base/nullability.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/audio/audio_processing_statistics.h"
+#include "api/audio/echo_control.h"
+#include "api/ref_count.h"
+#include "api/scoped_refptr.h"
+#include "api/task_queue/task_queue_base.h"
+#include "rtc_base/arraysize.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/system/rtc_export.h"
+
+namespace webrtc {
+
+class AecDump;
+class AudioBuffer;
+
+class StreamConfig;
+class ProcessingConfig;
+
+class EchoDetector;
+
+// The Audio Processing Module (APM) provides a collection of voice processing
+// components designed for real-time communications software.
+//
+// APM operates on two audio streams on a frame-by-frame basis. Frames of the
+// primary stream, on which all processing is applied, are passed to
+// `ProcessStream()`. Frames of the reverse direction stream are passed to
+// `ProcessReverseStream()`. On the client-side, this will typically be the
+// near-end (capture) and far-end (render) streams, respectively. APM should be
+// placed in the signal chain as close to the audio hardware abstraction layer
+// (HAL) as possible.
+//
+// On the server-side, the reverse stream will normally not be used, with
+// processing occurring on each incoming stream.
+//
+// Component interfaces follow a similar pattern and are accessed through
+// corresponding getters in APM. All components are disabled at create-time,
+// with default settings that are recommended for most situations. New settings
+// can be applied without enabling a component. Enabling a component triggers
+// memory allocation and initialization to allow it to start processing the
+// streams.
+//
+// Thread safety is provided with the following assumptions to reduce locking
+// overhead:
+//   1. The stream getters and setters are called from the same thread as
+//      ProcessStream(). More precisely, stream functions are never called
+//      concurrently with ProcessStream().
+//   2. Parameter getters are never called concurrently with the corresponding
+//      setter.
+//
+// APM accepts only linear PCM audio data in chunks of ~10 ms (see
+// AudioProcessing::GetFrameSize() for details) and sample rates ranging from
+// 8000 Hz to 384000 Hz. The int16 interfaces use interleaved data, while the
+// float interfaces use deinterleaved data.
+//
+// Usage example, omitting error checking:
+// rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
+//
+// AudioProcessing::Config config;
+// config.echo_canceller.enabled = true;
+// config.echo_canceller.mobile_mode = false;
+//
+// config.gain_controller1.enabled = true;
+// config.gain_controller1.mode =
+// AudioProcessing::Config::GainController1::kAdaptiveAnalog;
+// config.gain_controller1.analog_level_minimum = 0;
+// config.gain_controller1.analog_level_maximum = 255;
+//
+// config.gain_controller2.enabled = true;
+//
+// config.high_pass_filter.enabled = true;
+//
+// apm->ApplyConfig(config)
+//
+// // Start a voice call...
+//
+// // ... Render frame arrives bound for the audio HAL ...
+// apm->ProcessReverseStream(render_frame);
+//
+// // ... Capture frame arrives from the audio HAL ...
+// // Call required set_stream_ functions.
+// apm->set_stream_delay_ms(delay_ms);
+// apm->set_stream_analog_level(analog_level);
+//
+// apm->ProcessStream(capture_frame);
+//
+// // Call required stream_ functions.
+// analog_level = apm->recommended_stream_analog_level();
+// has_voice = apm->stream_has_voice();
+//
+// // Repeat render and capture processing for the duration of the call...
+// // Start a new call...
+// apm->Initialize();
+//
+// // Close the application...
+// apm.reset();
+//
+class RTC_EXPORT AudioProcessing : public RefCountInterface {
+ public:
+  // The struct below constitutes the new parameter scheme for the audio
+  // processing. It is being introduced gradually and until it is fully
+  // introduced, it is prone to change.
+  // TODO(peah): Remove this comment once the new config scheme is fully rolled
+  // out.
+  //
+  // The parameters and behavior of the audio processing module are controlled
+  // by changing the default values in the AudioProcessing::Config struct.
+  // The config is applied by passing the struct to the ApplyConfig method.
+  //
+  // This config is intended to be used during setup, and to enable/disable
+  // top-level processing effects. Use during processing may cause undesired
+  // submodule resets, affecting the audio quality. Use the RuntimeSetting
+  // construct for runtime configuration.
+  struct RTC_EXPORT Config {
+    // Sets the properties of the audio processing pipeline.
+    struct RTC_EXPORT Pipeline {
+      // Ways to downmix a multi-channel track to mono.
+      enum class DownmixMethod {
+        kAverageChannels,  // Average across channels.
+        kUseFirstChannel   // Use the first channel.
+      };
+
+      // Maximum allowed processing rate used internally. May only be set to
+      // 32000 or 48000 and any differing values will be treated as 48000.
+      int maximum_internal_processing_rate = 48000;
+      // Allow multi-channel processing of render audio.
+      bool multi_channel_render = false;
+      // Allow multi-channel processing of capture audio when AEC3 is active
+      // or a custom AEC is injected..
+      bool multi_channel_capture = false;
+      // Indicates how to downmix multi-channel capture audio to mono (when
+      // needed).
+      DownmixMethod capture_downmix_method = DownmixMethod::kAverageChannels;
+    } pipeline;
+
+    // Enabled the pre-amplifier. It amplifies the capture signal
+    // before any other processing is done.
+    // TODO(webrtc:5298): Deprecate and use the pre-gain functionality in
+    // capture_level_adjustment instead.
+    struct PreAmplifier {
+      bool enabled = false;
+      float fixed_gain_factor = 1.0f;
+    } pre_amplifier;
+
+    // Functionality for general level adjustment in the capture pipeline. This
+    // should not be used together with the legacy PreAmplifier functionality.
+    struct CaptureLevelAdjustment {
+      bool operator==(const CaptureLevelAdjustment& rhs) const;
+      bool operator!=(const CaptureLevelAdjustment& rhs) const {
+        return !(*this == rhs);
+      }
+      bool enabled = false;
+      // The `pre_gain_factor` scales the signal before any processing is done.
+      float pre_gain_factor = 1.0f;
+      // The `post_gain_factor` scales the signal after all processing is done.
+      float post_gain_factor = 1.0f;
+      struct AnalogMicGainEmulation {
+        bool operator==(const AnalogMicGainEmulation& rhs) const;
+        bool operator!=(const AnalogMicGainEmulation& rhs) const {
+          return !(*this == rhs);
+        }
+        bool enabled = false;
+        // Initial analog gain level to use for the emulated analog gain. Must
+        // be in the range [0...255].
+        int initial_level = 255;
+      } analog_mic_gain_emulation;
+    } capture_level_adjustment;
+
+    struct HighPassFilter {
+      bool enabled = false;
+      bool apply_in_full_band = true;
+    } high_pass_filter;
+
+    struct EchoCanceller {
+      bool enabled = false;
+      bool mobile_mode = false;
+      bool export_linear_aec_output = false;
+      // Enforce the highpass filter to be on (has no effect for the mobile
+      // mode).
+      bool enforce_high_pass_filtering = true;
+    } echo_canceller;
+
+    // Enables background noise suppression.
+    struct NoiseSuppression {
+      bool enabled = false;
+      enum Level { kLow, kModerate, kHigh, kVeryHigh };
+      Level level = kModerate;
+      bool analyze_linear_aec_output_when_available = false;
+    } noise_suppression;
+
+    // TODO(bugs.webrtc.org/357281131): Deprecated. Stop using and remove.
+    // Enables transient suppression.
+    struct TransientSuppression {
+      bool enabled = false;
+    } transient_suppression;
+
+    // Enables automatic gain control (AGC) functionality.
+    // The automatic gain control (AGC) component brings the signal to an
+    // appropriate range. This is done by applying a digital gain directly and,
+    // in the analog mode, prescribing an analog gain to be applied at the audio
+    // HAL.
+    // Recommended to be enabled on the client-side.
+    struct RTC_EXPORT GainController1 {
+      bool operator==(const GainController1& rhs) const;
+      bool operator!=(const GainController1& rhs) const {
+        return !(*this == rhs);
+      }
+
+      bool enabled = false;
+      enum Mode {
+        // Adaptive mode intended for use if an analog volume control is
+        // available on the capture device. It will require the user to provide
+        // coupling between the OS mixer controls and AGC through the
+        // stream_analog_level() functions.
+        // It consists of an analog gain prescription for the audio device and a
+        // digital compression stage.
+        kAdaptiveAnalog,
+        // Adaptive mode intended for situations in which an analog volume
+        // control is unavailable. It operates in a similar fashion to the
+        // adaptive analog mode, but with scaling instead applied in the digital
+        // domain. As with the analog mode, it additionally uses a digital
+        // compression stage.
+        kAdaptiveDigital,
+        // Fixed mode which enables only the digital compression stage also used
+        // by the two adaptive modes.
+        // It is distinguished from the adaptive modes by considering only a
+        // short time-window of the input signal. It applies a fixed gain
+        // through most of the input level range, and compresses (gradually
+        // reduces gain with increasing level) the input signal at higher
+        // levels. This mode is preferred on embedded devices where the capture
+        // signal level is predictable, so that a known gain can be applied.
+        kFixedDigital
+      };
+      Mode mode = kAdaptiveAnalog;
+      // Sets the target peak level (or envelope) of the AGC in dBFs (decibels
+      // from digital full-scale). The convention is to use positive values. For
+      // instance, passing in a value of 3 corresponds to -3 dBFs, or a target
+      // level 3 dB below full-scale. Limited to [0, 31].
+      int target_level_dbfs = 3;
+      // Sets the maximum gain the digital compression stage may apply, in dB. A
+      // higher number corresponds to greater compression, while a value of 0
+      // will leave the signal uncompressed. Limited to [0, 90].
+      // For updates after APM setup, use a RuntimeSetting instead.
+      int compression_gain_db = 9;
+      // When enabled, the compression stage will hard limit the signal to the
+      // target level. Otherwise, the signal will be compressed but not limited
+      // above the target level.
+      bool enable_limiter = true;
+
+      // Enables the analog gain controller functionality.
+      struct AnalogGainController {
+        bool enabled = true;
+        // TODO(bugs.webrtc.org/7494): Deprecated. Stop using and remove.
+        int startup_min_volume = 0;
+        // Lowest analog microphone level that will be applied in response to
+        // clipping.
+        int clipped_level_min = 70;
+        // If true, an adaptive digital gain is applied.
+        bool enable_digital_adaptive = true;
+        // Amount the microphone level is lowered with every clipping event.
+        // Limited to (0, 255].
+        int clipped_level_step = 15;
+        // Proportion of clipped samples required to declare a clipping event.
+        // Limited to (0.f, 1.f).
+        float clipped_ratio_threshold = 0.1f;
+        // Time in frames to wait after a clipping event before checking again.
+        // Limited to values higher than 0.
+        int clipped_wait_frames = 300;
+
+        // Enables clipping prediction functionality.
+        struct ClippingPredictor {
+          bool enabled = false;
+          enum Mode {
+            // Clipping event prediction mode with fixed step estimation.
+            kClippingEventPrediction,
+            // Clipped peak estimation mode with adaptive step estimation.
+            kAdaptiveStepClippingPeakPrediction,
+            // Clipped peak estimation mode with fixed step estimation.
+            kFixedStepClippingPeakPrediction,
+          };
+          Mode mode = kClippingEventPrediction;
+          // Number of frames in the sliding analysis window.
+          int window_length = 5;
+          // Number of frames in the sliding reference window.
+          int reference_window_length = 5;
+          // Reference window delay (unit: number of frames).
+          int reference_window_delay = 5;
+          // Clipping prediction threshold (dBFS).
+          float clipping_threshold = -1.0f;
+          // Crest factor drop threshold (dB).
+          float crest_factor_margin = 3.0f;
+          // If true, the recommended clipped level step is used to modify the
+          // analog gain. Otherwise, the predictor runs without affecting the
+          // analog gain.
+          bool use_predicted_step = true;
+        } clipping_predictor;
+      } analog_gain_controller;
+    } gain_controller1;
+
+    // Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which
+    // replaces the AGC sub-module parametrized by `gain_controller1`.
+    // AGC2 brings the captured audio signal to the desired level by combining
+    // three different controllers (namely, input volume controller, adapative
+    // digital controller and fixed digital controller) and a limiter.
+    // TODO(bugs.webrtc.org:7494): Name `GainController` when AGC1 removed.
+    struct RTC_EXPORT GainController2 {
+      bool operator==(const GainController2& rhs) const;
+      bool operator!=(const GainController2& rhs) const {
+        return !(*this == rhs);
+      }
+
+      // AGC2 must be created if and only if `enabled` is true.
+      bool enabled = false;
+
+      // Parameters for the input volume controller, which adjusts the input
+      // volume applied when the audio is captured (e.g., microphone volume on
+      // a soundcard, input volume on HAL).
+      struct InputVolumeController {
+        bool operator==(const InputVolumeController& rhs) const;
+        bool operator!=(const InputVolumeController& rhs) const {
+          return !(*this == rhs);
+        }
+        bool enabled = false;
+      } input_volume_controller;
+
+      // Parameters for the adaptive digital controller, which adjusts and
+      // applies a digital gain after echo cancellation and after noise
+      // suppression.
+      struct RTC_EXPORT AdaptiveDigital {
+        bool operator==(const AdaptiveDigital& rhs) const;
+        bool operator!=(const AdaptiveDigital& rhs) const {
+          return !(*this == rhs);
+        }
+        bool enabled = false;
+        float headroom_db = 5.0f;
+        float max_gain_db = 50.0f;
+        float initial_gain_db = 15.0f;
+        float max_gain_change_db_per_second = 6.0f;
+        float max_output_noise_level_dbfs = -50.0f;
+      } adaptive_digital;
+
+      // Parameters for the fixed digital controller, which applies a fixed
+      // digital gain after the adaptive digital controller and before the
+      // limiter.
+      struct FixedDigital {
+        // By setting `gain_db` to a value greater than zero, the limiter can be
+        // turned into a compressor that first applies a fixed gain.
+        float gain_db = 0.0f;
+      } fixed_digital;
+    } gain_controller2;
+
+    std::string ToString() const;
+  };
+
+  // Specifies the properties of a setting to be passed to AudioProcessing at
+  // runtime.
+  class RuntimeSetting {
+   public:
+    enum class Type {
+      kNotSpecified,
+      kCapturePreGain,
+      kCaptureCompressionGain,
+      kCaptureFixedPostGain,
+      kPlayoutVolumeChange,
+      kCustomRenderProcessingRuntimeSetting,
+      kPlayoutAudioDeviceChange,
+      kCapturePostGain,
+      kCaptureOutputUsed
+    };
+
+    // Play-out audio device properties.
+    struct PlayoutAudioDeviceInfo {
+      int id;          // Identifies the audio device.
+      int max_volume;  // Maximum play-out volume.
+    };
+
+    RuntimeSetting() : type_(Type::kNotSpecified), value_(0.0f) {}
+    ~RuntimeSetting() = default;
+
+    static RuntimeSetting CreateCapturePreGain(float gain) {
+      return {Type::kCapturePreGain, gain};
+    }
+
+    static RuntimeSetting CreateCapturePostGain(float gain) {
+      return {Type::kCapturePostGain, gain};
+    }
+
+    // Corresponds to Config::GainController1::compression_gain_db, but for
+    // runtime configuration.
+    static RuntimeSetting CreateCompressionGainDb(int gain_db) {
+      RTC_DCHECK_GE(gain_db, 0);
+      RTC_DCHECK_LE(gain_db, 90);
+      return {Type::kCaptureCompressionGain, static_cast<float>(gain_db)};
+    }
+
+    // Corresponds to Config::GainController2::fixed_digital::gain_db, but for
+    // runtime configuration.
+    static RuntimeSetting CreateCaptureFixedPostGain(float gain_db) {
+      RTC_DCHECK_GE(gain_db, 0.0f);
+      RTC_DCHECK_LE(gain_db, 90.0f);
+      return {Type::kCaptureFixedPostGain, gain_db};
+    }
+
+    // Creates a runtime setting to notify play-out (aka render) audio device
+    // changes.
+    static RuntimeSetting CreatePlayoutAudioDeviceChange(
+        PlayoutAudioDeviceInfo audio_device) {
+      return {Type::kPlayoutAudioDeviceChange, audio_device};
+    }
+
+    // Creates a runtime setting to notify play-out (aka render) volume changes.
+    // `volume` is the unnormalized volume, the maximum of which
+    static RuntimeSetting CreatePlayoutVolumeChange(int volume) {
+      return {Type::kPlayoutVolumeChange, volume};
+    }
+
+    static RuntimeSetting CreateCustomRenderSetting(float payload) {
+      return {Type::kCustomRenderProcessingRuntimeSetting, payload};
+    }
+
+    static RuntimeSetting CreateCaptureOutputUsedSetting(
+        bool capture_output_used) {
+      return {Type::kCaptureOutputUsed, capture_output_used};
+    }
+
+    Type type() const { return type_; }
+    // Getters do not return a value but instead modify the argument to protect
+    // from implicit casting.
+    void GetFloat(float* value) const {
+      RTC_DCHECK(value);
+      *value = value_.float_value;
+    }
+    void GetInt(int* value) const {
+      RTC_DCHECK(value);
+      *value = value_.int_value;
+    }
+    void GetBool(bool* value) const {
+      RTC_DCHECK(value);
+      *value = value_.bool_value;
+    }
+    void GetPlayoutAudioDeviceInfo(PlayoutAudioDeviceInfo* value) const {
+      RTC_DCHECK(value);
+      *value = value_.playout_audio_device_info;
+    }
+
+   private:
+    RuntimeSetting(Type id, float value) : type_(id), value_(value) {}
+    RuntimeSetting(Type id, int value) : type_(id), value_(value) {}
+    RuntimeSetting(Type id, PlayoutAudioDeviceInfo value)
+        : type_(id), value_(value) {}
+    Type type_;
+    union U {
+      U() {}
+      U(int value) : int_value(value) {}
+      U(float value) : float_value(value) {}
+      U(PlayoutAudioDeviceInfo value) : playout_audio_device_info(value) {}
+      float float_value;
+      int int_value;
+      bool bool_value;
+      PlayoutAudioDeviceInfo playout_audio_device_info;
+    } value_;
+  };
+
+  ~AudioProcessing() override {}
+
+  // Initializes internal states, while retaining all user settings. This
+  // should be called before beginning to process a new audio stream. However,
+  // it is not necessary to call before processing the first stream after
+  // creation.
+  //
+  // It is also not necessary to call if the audio parameters (sample
+  // rate and number of channels) have changed. Passing updated parameters
+  // directly to `ProcessStream()` and `ProcessReverseStream()` is permissible.
+  // If the parameters are known at init-time though, they may be provided.
+  // TODO(webrtc:5298): Change to return void.
+  virtual int Initialize() = 0;
+
+  // The int16 interfaces require:
+  //   - only `NativeRate`s be used
+  //   - that the input, output and reverse rates must match
+  //   - that `processing_config.output_stream()` matches
+  //     `processing_config.input_stream()`.
+  //
+  // The float interfaces accept arbitrary rates and support differing input and
+  // output layouts, but the output must have either one channel or the same
+  // number of channels as the input.
+  virtual int Initialize(const ProcessingConfig& processing_config) = 0;
+
+  // TODO(peah): This method is a temporary solution used to take control
+  // over the parameters in the audio processing module and is likely to change.
+  virtual void ApplyConfig(const Config& config) = 0;
+
+  // TODO(ajm): Only intended for internal use. Make private and friend the
+  // necessary classes?
+  virtual int proc_sample_rate_hz() const = 0;
+  virtual int proc_split_sample_rate_hz() const = 0;
+  virtual size_t num_input_channels() const = 0;
+  virtual size_t num_proc_channels() const = 0;
+  virtual size_t num_output_channels() const = 0;
+  virtual size_t num_reverse_channels() const = 0;
+
+  // Set to true when the output of AudioProcessing will be muted or in some
+  // other way not used. Ideally, the captured audio would still be processed,
+  // but some components may change behavior based on this information.
+  // Default false. This method takes a lock. To achieve this in a lock-less
+  // manner the PostRuntimeSetting can instead be used.
+  virtual void set_output_will_be_muted(bool muted) = 0;
+
+  // Enqueues a runtime setting.
+  virtual void SetRuntimeSetting(RuntimeSetting setting) = 0;
+
+  // Enqueues a runtime setting. Returns a bool indicating whether the
+  // enqueueing was successfull.
+  virtual bool PostRuntimeSetting(RuntimeSetting setting) = 0;
+
+  // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio as
+  // specified in `input_config` and `output_config`. `src` and `dest` may use
+  // the same memory, if desired.
+  virtual int ProcessStream(const int16_t* const src,
+                            const StreamConfig& input_config,
+                            const StreamConfig& output_config,
+                            int16_t* const dest) = 0;
+
+  // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
+  // `src` points to a channel buffer, arranged according to `input_stream`. At
+  // output, the channels will be arranged according to `output_stream` in
+  // `dest`.
+  //
+  // The output must have one channel or as many channels as the input. `src`
+  // and `dest` may use the same memory, if desired.
+  virtual int ProcessStream(const float* const* src,
+                            const StreamConfig& input_config,
+                            const StreamConfig& output_config,
+                            float* const* dest) = 0;
+
+  // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio for
+  // the reverse direction audio stream as specified in `input_config` and
+  // `output_config`. `src` and `dest` may use the same memory, if desired.
+  virtual int ProcessReverseStream(const int16_t* const src,
+                                   const StreamConfig& input_config,
+                                   const StreamConfig& output_config,
+                                   int16_t* const dest) = 0;
+
+  // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
+  // `data` points to a channel buffer, arranged according to `reverse_config`.
+  virtual int ProcessReverseStream(const float* const* src,
+                                   const StreamConfig& input_config,
+                                   const StreamConfig& output_config,
+                                   float* const* dest) = 0;
+
+  // Accepts deinterleaved float audio with the range [-1, 1]. Each element
+  // of `data` points to a channel buffer, arranged according to
+  // `reverse_config`.
+  virtual int AnalyzeReverseStream(const float* const* data,
+                                   const StreamConfig& reverse_config) = 0;
+
+  // Returns the most recently produced ~10 ms of the linear AEC output at a
+  // rate of 16 kHz. If there is more than one capture channel, a mono
+  // representation of the input is returned. Returns true/false to indicate
+  // whether an output returned.
+  virtual bool GetLinearAecOutput(
+      rtc::ArrayView<std::array<float, 160>> linear_output) const = 0;
+
+  // This must be called prior to ProcessStream() if and only if adaptive analog
+  // gain control is enabled, to pass the current analog level from the audio
+  // HAL. Must be within the range [0, 255].
+  virtual void set_stream_analog_level(int level) = 0;
+
+  // When an analog mode is set, this should be called after
+  // `set_stream_analog_level()` and `ProcessStream()` to obtain the recommended
+  // new analog level for the audio HAL. It is the user's responsibility to
+  // apply this level.
+  virtual int recommended_stream_analog_level() const = 0;
+
+  // This must be called if and only if echo processing is enabled.
+  //
+  // Sets the `delay` in ms between ProcessReverseStream() receiving a far-end
+  // frame and ProcessStream() receiving a near-end frame containing the
+  // corresponding echo. On the client-side this can be expressed as
+  //   delay = (t_render - t_analyze) + (t_process - t_capture)
+  // where,
+  //   - t_analyze is the time a frame is passed to ProcessReverseStream() and
+  //     t_render is the time the first sample of the same frame is rendered by
+  //     the audio hardware.
+  //   - t_capture is the time the first sample of a frame is captured by the
+  //     audio hardware and t_process is the time the same frame is passed to
+  //     ProcessStream().
+  virtual int set_stream_delay_ms(int delay) = 0;
+  virtual int stream_delay_ms() const = 0;
+
+  // Call to signal that a key press occurred (true) or did not occur (false)
+  // with this chunk of audio.
+  virtual void set_stream_key_pressed(bool key_pressed) = 0;
+
+  // Creates and attaches an webrtc::AecDump for recording debugging
+  // information.
+  // The `worker_queue` may not be null and must outlive the created
+  // AecDump instance. |max_log_size_bytes == -1| means the log size
+  // will be unlimited. `handle` may not be null. The AecDump takes
+  // responsibility for `handle` and closes it in the destructor. A
+  // return value of true indicates that the file has been
+  // sucessfully opened, while a value of false indicates that
+  // opening the file failed.
+  virtual bool CreateAndAttachAecDump(
+      absl::string_view file_name,
+      int64_t max_log_size_bytes,
+      absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
+  virtual bool CreateAndAttachAecDump(
+      absl::Nonnull<FILE*> handle,
+      int64_t max_log_size_bytes,
+      absl::Nonnull<TaskQueueBase*> worker_queue) = 0;
+
+  // TODO(webrtc:5298) Deprecated variant.
+  // Attaches provided webrtc::AecDump for recording debugging
+  // information. Log file and maximum file size logic is supposed to
+  // be handled by implementing instance of AecDump. Calling this
+  // method when another AecDump is attached resets the active AecDump
+  // with a new one. This causes the d-tor of the earlier AecDump to
+  // be called. The d-tor call may block until all pending logging
+  // tasks are completed.
+  virtual void AttachAecDump(std::unique_ptr<AecDump> aec_dump) = 0;
+
+  // If no AecDump is attached, this has no effect. If an AecDump is
+  // attached, it's destructor is called. The d-tor may block until
+  // all pending logging tasks are completed.
+  virtual void DetachAecDump() = 0;
+
+  // Get audio processing statistics.
+  virtual AudioProcessingStats GetStatistics() = 0;
+  // TODO(webrtc:5298) Deprecated variant. The `has_remote_tracks` argument
+  // should be set if there are active remote tracks (this would usually be true
+  // during a call). If there are no remote tracks some of the stats will not be
+  // set by AudioProcessing, because they only make sense if there is at least
+  // one remote track.
+  virtual AudioProcessingStats GetStatistics(bool has_remote_tracks) = 0;
+
+  // Returns the last applied configuration.
+  virtual AudioProcessing::Config GetConfig() const = 0;
+
+  enum Error {
+    // Fatal errors.
+    kNoError = 0,
+    kUnspecifiedError = -1,
+    kCreationFailedError = -2,
+    kUnsupportedComponentError = -3,
+    kUnsupportedFunctionError = -4,
+    kNullPointerError = -5,
+    kBadParameterError = -6,
+    kBadSampleRateError = -7,
+    kBadDataLengthError = -8,
+    kBadNumberChannelsError = -9,
+    kFileError = -10,
+    kStreamParameterNotSetError = -11,
+    kNotEnabledError = -12,
+
+    // Warnings are non-fatal.
+    // This results when a set_stream_ parameter is out of range. Processing
+    // will continue, but the parameter may have been truncated.
+    kBadStreamParameterWarning = -13
+  };
+
+  // Native rates supported by the integer interfaces.
+  enum NativeRate {
+    kSampleRate8kHz = 8000,
+    kSampleRate16kHz = 16000,
+    kSampleRate32kHz = 32000,
+    kSampleRate48kHz = 48000
+  };
+
+  // TODO(kwiberg): We currently need to support a compiler (Visual C++) that
+  // complains if we don't explicitly state the size of the array here. Remove
+  // the size when that's no longer the case.
+  static constexpr int kNativeSampleRatesHz[4] = {
+      kSampleRate8kHz, kSampleRate16kHz, kSampleRate32kHz, kSampleRate48kHz};
+  static constexpr size_t kNumNativeSampleRates =
+      arraysize(kNativeSampleRatesHz);
+  static constexpr int kMaxNativeSampleRateHz =
+      kNativeSampleRatesHz[kNumNativeSampleRates - 1];
+
+  // APM processes audio in chunks of about 10 ms. See GetFrameSize() for
+  // details.
+  static constexpr int kChunkSizeMs = 10;
+
+  // Returns floor(sample_rate_hz/100): the number of samples per channel used
+  // as input and output to the audio processing module in calls to
+  // ProcessStream, ProcessReverseStream, AnalyzeReverseStream, and
+  // GetLinearAecOutput.
+  //
+  // This is exactly 10 ms for sample rates divisible by 100. For example:
+  //  - 48000 Hz (480 samples per channel),
+  //  - 44100 Hz (441 samples per channel),
+  //  - 16000 Hz (160 samples per channel).
+  //
+  // Sample rates not divisible by 100 are received/produced in frames of
+  // approximately 10 ms. For example:
+  //  - 22050 Hz (220 samples per channel, or ~9.98 ms per frame),
+  //  - 11025 Hz (110 samples per channel, or ~9.98 ms per frame).
+  // These nondivisible sample rates yield lower audio quality compared to
+  // multiples of 100. Internal resampling to 10 ms frames causes a simulated
+  // clock drift effect which impacts the performance of (for example) echo
+  // cancellation.
+  static int GetFrameSize(int sample_rate_hz) { return sample_rate_hz / 100; }
+};
+
+// Experimental interface for a custom analysis submodule.
+class CustomAudioAnalyzer {
+ public:
+  // (Re-) Initializes the submodule.
+  virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
+  // Analyzes the given capture or render signal.
+  virtual void Analyze(const AudioBuffer* audio) = 0;
+  // Returns a string representation of the module state.
+  virtual std::string ToString() const = 0;
+
+  virtual ~CustomAudioAnalyzer() {}
+};
+
+// Interface for a custom processing submodule.
+class CustomProcessing {
+ public:
+  // (Re-)Initializes the submodule.
+  virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
+  // Processes the given capture or render signal.
+  virtual void Process(AudioBuffer* audio) = 0;
+  // Returns a string representation of the module state.
+  virtual std::string ToString() const = 0;
+  // Handles RuntimeSettings. TODO(webrtc:9262): make pure virtual
+  // after updating dependencies.
+  virtual void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting);
+
+  virtual ~CustomProcessing() {}
+};
+
+class RTC_EXPORT AudioProcessingBuilder {
+ public:
+  AudioProcessingBuilder();
+  AudioProcessingBuilder(const AudioProcessingBuilder&) = delete;
+  AudioProcessingBuilder& operator=(const AudioProcessingBuilder&) = delete;
+  ~AudioProcessingBuilder();
+
+  // Sets the APM configuration.
+  AudioProcessingBuilder& SetConfig(const AudioProcessing::Config& config) {
+    config_ = config;
+    return *this;
+  }
+
+  // Sets the echo controller factory to inject when APM is created.
+  AudioProcessingBuilder& SetEchoControlFactory(
+      std::unique_ptr<EchoControlFactory> echo_control_factory) {
+    echo_control_factory_ = std::move(echo_control_factory);
+    return *this;
+  }
+
+  // Sets the capture post-processing sub-module to inject when APM is created.
+  AudioProcessingBuilder& SetCapturePostProcessing(
+      std::unique_ptr<CustomProcessing> capture_post_processing) {
+    capture_post_processing_ = std::move(capture_post_processing);
+    return *this;
+  }
+
+  // Sets the render pre-processing sub-module to inject when APM is created.
+  AudioProcessingBuilder& SetRenderPreProcessing(
+      std::unique_ptr<CustomProcessing> render_pre_processing) {
+    render_pre_processing_ = std::move(render_pre_processing);
+    return *this;
+  }
+
+  // Sets the echo detector to inject when APM is created.
+  AudioProcessingBuilder& SetEchoDetector(
+      rtc::scoped_refptr<EchoDetector> echo_detector) {
+    echo_detector_ = std::move(echo_detector);
+    return *this;
+  }
+
+  // Sets the capture analyzer sub-module to inject when APM is created.
+  AudioProcessingBuilder& SetCaptureAnalyzer(
+      std::unique_ptr<CustomAudioAnalyzer> capture_analyzer) {
+    capture_analyzer_ = std::move(capture_analyzer);
+    return *this;
+  }
+
+  // Creates an APM instance with the specified config or the default one if
+  // unspecified. Injects the specified components transferring the ownership
+  // to the newly created APM instance - i.e., except for the config, the
+  // builder is reset to its initial state.
+  rtc::scoped_refptr<AudioProcessing> Create();
+
+ private:
+  AudioProcessing::Config config_;
+  std::unique_ptr<EchoControlFactory> echo_control_factory_;
+  std::unique_ptr<CustomProcessing> capture_post_processing_;
+  std::unique_ptr<CustomProcessing> render_pre_processing_;
+  rtc::scoped_refptr<EchoDetector> echo_detector_;
+  std::unique_ptr<CustomAudioAnalyzer> capture_analyzer_;
+};
+
+class StreamConfig {
+ public:
+  // sample_rate_hz: The sampling rate of the stream.
+  // num_channels: The number of audio channels in the stream.
+  StreamConfig(int sample_rate_hz = 0,
+               size_t num_channels = 0)  // NOLINT(runtime/explicit)
+      : sample_rate_hz_(sample_rate_hz),
+        num_channels_(num_channels),
+        num_frames_(calculate_frames(sample_rate_hz)) {}
+
+  void set_sample_rate_hz(int value) {
+    sample_rate_hz_ = value;
+    num_frames_ = calculate_frames(value);
+  }
+  void set_num_channels(size_t value) { num_channels_ = value; }
+
+  int sample_rate_hz() const { return sample_rate_hz_; }
+
+  // The number of channels in the stream.
+  size_t num_channels() const { return num_channels_; }
+
+  size_t num_frames() const { return num_frames_; }
+  size_t num_samples() const { return num_channels_ * num_frames_; }
+
+  bool operator==(const StreamConfig& other) const {
+    return sample_rate_hz_ == other.sample_rate_hz_ &&
+           num_channels_ == other.num_channels_;
+  }
+
+  bool operator!=(const StreamConfig& other) const { return !(*this == other); }
+
+ private:
+  static size_t calculate_frames(int sample_rate_hz) {
+    return static_cast<size_t>(AudioProcessing::GetFrameSize(sample_rate_hz));
+  }
+
+  int sample_rate_hz_;
+  size_t num_channels_;
+  size_t num_frames_;
+};
+
+class ProcessingConfig {
+ public:
+  enum StreamName {
+    kInputStream,
+    kOutputStream,
+    kReverseInputStream,
+    kReverseOutputStream,
+    kNumStreamNames,
+  };
+
+  const StreamConfig& input_stream() const {
+    return streams[StreamName::kInputStream];
+  }
+  const StreamConfig& output_stream() const {
+    return streams[StreamName::kOutputStream];
+  }
+  const StreamConfig& reverse_input_stream() const {
+    return streams[StreamName::kReverseInputStream];
+  }
+  const StreamConfig& reverse_output_stream() const {
+    return streams[StreamName::kReverseOutputStream];
+  }
+
+  StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
+  StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
+  StreamConfig& reverse_input_stream() {
+    return streams[StreamName::kReverseInputStream];
+  }
+  StreamConfig& reverse_output_stream() {
+    return streams[StreamName::kReverseOutputStream];
+  }
+
+  bool operator==(const ProcessingConfig& other) const {
+    for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
+      if (this->streams[i] != other.streams[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool operator!=(const ProcessingConfig& other) const {
+    return !(*this == other);
+  }
+
+  StreamConfig streams[StreamName::kNumStreamNames];
+};
+
+// Interface for an echo detector submodule.
+class EchoDetector : public RefCountInterface {
+ public:
+  // (Re-)Initializes the submodule.
+  virtual void Initialize(int capture_sample_rate_hz,
+                          int num_capture_channels,
+                          int render_sample_rate_hz,
+                          int num_render_channels) = 0;
+
+  // Analysis (not changing) of the first channel of the render signal.
+  virtual void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio) = 0;
+
+  // Analysis (not changing) of the capture signal.
+  virtual void AnalyzeCaptureAudio(
+      rtc::ArrayView<const float> capture_audio) = 0;
+
+  struct Metrics {
+    absl::optional<double> echo_likelihood;
+    absl::optional<double> echo_likelihood_recent_max;
+  };
+
+  // Collect current metrics from the echo detector.
+  virtual Metrics GetMetrics() const = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // API_AUDIO_AUDIO_PROCESSING_H_
--- a/VocieProcess/api/audio/audio_processing_statistics.cc
+++ b/VocieProcess/api/audio/audio_processing_statistics.cc
@ -0,0 +1,22 @@
+/*
+ *  Copyright 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "api/audio/audio_processing_statistics.h"
+
+namespace webrtc {
+
+AudioProcessingStats::AudioProcessingStats() = default;
+
+AudioProcessingStats::AudioProcessingStats(const AudioProcessingStats& other) =
+    default;
+
+AudioProcessingStats::~AudioProcessingStats() = default;
+
+}  // namespace webrtc
--- a/VocieProcess/api/audio/audio_processing_statistics.h
+++ b/VocieProcess/api/audio/audio_processing_statistics.h
@ -0,0 +1,67 @@
+/*
+ *  Copyright 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
+#define API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
+
+#include <stdint.h>
+
+#include "absl/types/optional.h"
+#include "rtc_base/system/rtc_export.h"
+
+namespace webrtc {
+// This version of the stats uses Optionals, it will replace the regular
+// AudioProcessingStatistics struct.
+struct RTC_EXPORT AudioProcessingStats {
+  AudioProcessingStats();
+  AudioProcessingStats(const AudioProcessingStats& other);
+  ~AudioProcessingStats();
+
+  // Deprecated.
+  // TODO(bugs.webrtc.org/11226): Remove.
+  // True if voice is detected in the last capture frame, after processing.
+  // It is conservative in flagging audio as speech, with low likelihood of
+  // incorrectly flagging a frame as voice.
+  // Only reported if voice detection is enabled in AudioProcessing::Config.
+  absl::optional<bool> voice_detected;
+
+  // AEC Statistics.
+  // ERL = 10log_10(P_far / P_echo)
+  absl::optional<double> echo_return_loss;
+  // ERLE = 10log_10(P_echo / P_out)
+  absl::optional<double> echo_return_loss_enhancement;
+  // Fraction of time that the AEC linear filter is divergent, in a 1-second
+  // non-overlapped aggregation window.
+  absl::optional<double> divergent_filter_fraction;
+
+  // The delay metrics consists of the delay median and standard deviation. It
+  // also consists of the fraction of delay estimates that can make the echo
+  // cancellation perform poorly. The values are aggregated until the first
+  // call to `GetStatistics()` and afterwards aggregated and updated every
+  // second. Note that if there are several clients pulling metrics from
+  // `GetStatistics()` during a session the first call from any of them will
+  // change to one second aggregation window for all.
+  absl::optional<int32_t> delay_median_ms;
+  absl::optional<int32_t> delay_standard_deviation_ms;
+
+  // Residual echo detector likelihood.
+  absl::optional<double> residual_echo_likelihood;
+  // Maximum residual echo likelihood from the last time period.
+  absl::optional<double> residual_echo_likelihood_recent_max;
+
+  // The instantaneous delay estimate produced in the AEC. The unit is in
+  // milliseconds and the value is the instantaneous value at the time of the
+  // call to `GetStatistics()`.
+  absl::optional<int32_t> delay_ms;
+};
+
+}  // namespace webrtc
+
+#endif  // API_AUDIO_AUDIO_PROCESSING_STATISTICS_H_
--- a/VocieProcess/api/audio/audio_view.h
+++ b/VocieProcess/api/audio/audio_view.h
@ -0,0 +1,269 @@
+/*
+ *  Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_AUDIO_AUDIO_VIEW_H_
+#define API_AUDIO_AUDIO_VIEW_H_
+
+#include "api/array_view.h"
+#include "api/audio/channel_layout.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+// This file contains 3 types of view classes:
+//
+// * MonoView<>: A single channel contiguous buffer of samples.
+//
+// * InterleavedView<>: Channel samples are interleaved (side-by-side) in
+//   the buffer. A single channel InterleavedView<> is the same thing as a
+//   MonoView<>
+//
+// * DeinterleavedView<>: Each channel's samples are contiguous within the
+//   buffer. Channels can be enumerated and accessing the individual channel
+//   data is done via MonoView<>.
+//
+// The views are comparable to and built on rtc::ArrayView<> but add
+// audio specific properties for the dimensions of the buffer and the above
+// specialized [de]interleaved support.
+//
+// There are also a few generic utility functions that can simplify
+// generic code for supporting more than one type of view.
+
+// MonoView<> represents a view over a single contiguous, audio buffer. This
+// can be either an single channel (mono) interleaved buffer (e.g. AudioFrame),
+// or a de-interleaved channel (e.g. from AudioBuffer).
+template <typename T>
+using MonoView = rtc::ArrayView<T>;
+
+// InterleavedView<> is a view over an interleaved audio buffer (e.g. from
+// AudioFrame).
+template <typename T>
+class InterleavedView {
+ public:
+  using value_type = T;
+
+  InterleavedView() = default;
+
+  template <typename U>
+  InterleavedView(U* data, size_t samples_per_channel, size_t num_channels)
+      : num_channels_(num_channels),
+        samples_per_channel_(samples_per_channel),
+        data_(data, num_channels * samples_per_channel) {
+    RTC_DCHECK_LE(num_channels_, kMaxConcurrentChannels);
+    RTC_DCHECK(num_channels_ == 0u || samples_per_channel_ != 0u);
+  }
+
+  // Construct an InterleavedView from a C-style array. Samples per channels
+  // is calculated based on the array size / num_channels.
+  template <typename U, size_t N>
+  InterleavedView(U (&array)[N],  // NOLINT
+                  size_t num_channels)
+      : InterleavedView(array, N / num_channels, num_channels) {
+    RTC_DCHECK_EQ(N % num_channels, 0u);
+  }
+
+  template <typename U>
+  InterleavedView(const InterleavedView<U>& other)
+      : num_channels_(other.num_channels()),
+        samples_per_channel_(other.samples_per_channel()),
+        data_(other.data()) {}
+
+  size_t num_channels() const { return num_channels_; }
+  size_t samples_per_channel() const { return samples_per_channel_; }
+  rtc::ArrayView<T> data() const { return data_; }
+  bool empty() const { return data_.empty(); }
+  size_t size() const { return data_.size(); }
+
+  MonoView<T> AsMono() const {
+    RTC_DCHECK_EQ(num_channels(), 1u);
+    RTC_DCHECK_EQ(data_.size(), samples_per_channel_);
+    return data_;
+  }
+
+  // A simple wrapper around memcpy that includes checks for properties.
+  // TODO(tommi): Consider if this can be utility function for both interleaved
+  // and deinterleaved views.
+  template <typename U>
+  void CopyFrom(const InterleavedView<U>& source) {
+    static_assert(sizeof(T) == sizeof(U), "");
+    RTC_DCHECK_EQ(num_channels(), source.num_channels());
+    RTC_DCHECK_EQ(samples_per_channel(), source.samples_per_channel());
+    RTC_DCHECK_GE(data_.size(), source.data().size());
+    const auto data = source.data();
+    memcpy(&data_[0], &data[0], data.size() * sizeof(U));
+  }
+
+  T& operator[](size_t idx) const { return data_[idx]; }
+  T* begin() const { return data_.begin(); }
+  T* end() const { return data_.end(); }
+  const T* cbegin() const { return data_.cbegin(); }
+  const T* cend() const { return data_.cend(); }
+  std::reverse_iterator<T*> rbegin() const { return data_.rbegin(); }
+  std::reverse_iterator<T*> rend() const { return data_.rend(); }
+  std::reverse_iterator<const T*> crbegin() const { return data_.crbegin(); }
+  std::reverse_iterator<const T*> crend() const { return data_.crend(); }
+
+ private:
+  // TODO(tommi): Consider having these both be stored as uint16_t to
+  // save a few bytes per view. Use `dchecked_cast` to support size_t during
+  // construction.
+  size_t num_channels_ = 0u;
+  size_t samples_per_channel_ = 0u;
+  rtc::ArrayView<T> data_;
+};
+
+template <typename T>
+class DeinterleavedView {
+ public:
+  using value_type = T;
+
+  DeinterleavedView() = default;
+
+  template <typename U>
+  DeinterleavedView(U* data, size_t samples_per_channel, size_t num_channels)
+      : num_channels_(num_channels),
+        samples_per_channel_(samples_per_channel),
+        data_(data, num_channels * samples_per_channel_) {}
+
+  template <typename U>
+  DeinterleavedView(const DeinterleavedView<U>& other)
+      : num_channels_(other.num_channels()),
+        samples_per_channel_(other.samples_per_channel()),
+        data_(other.data()) {}
+
+  // Returns a deinterleaved channel where `idx` is the zero based index,
+  // in the range [0 .. num_channels()-1].
+  MonoView<T> operator[](size_t idx) const {
+    RTC_DCHECK_LT(idx, num_channels_);
+    return MonoView<T>(&data_[idx * samples_per_channel_],
+                       samples_per_channel_);
+  }
+
+  size_t num_channels() const { return num_channels_; }
+  size_t samples_per_channel() const { return samples_per_channel_; }
+  rtc::ArrayView<T> data() const { return data_; }
+  bool empty() const { return data_.empty(); }
+  size_t size() const { return data_.size(); }
+
+  // Returns the first (and possibly only) channel.
+  MonoView<T> AsMono() const {
+    RTC_DCHECK_GE(num_channels(), 1u);
+    return (*this)[0];
+  }
+
+ private:
+  // TODO(tommi): Consider having these be stored as uint16_t to save a few
+  // bytes per view. Use `dchecked_cast` to support size_t during construction.
+  size_t num_channels_ = 0u;
+  size_t samples_per_channel_ = 0u;
+  rtc::ArrayView<T> data_;
+};
+
+template <typename T>
+constexpr size_t NumChannels(const MonoView<T>& view) {
+  return 1u;
+}
+
+template <typename T>
+size_t NumChannels(const InterleavedView<T>& view) {
+  return view.num_channels();
+}
+
+template <typename T>
+size_t NumChannels(const DeinterleavedView<T>& view) {
+  return view.num_channels();
+}
+
+template <typename T>
+constexpr bool IsMono(const MonoView<T>& view) {
+  return true;
+}
+
+template <typename T>
+constexpr bool IsInterleavedView(const MonoView<T>& view) {
+  return true;
+}
+
+template <typename T>
+constexpr bool IsInterleavedView(const InterleavedView<T>& view) {
+  return true;
+}
+
+template <typename T>
+constexpr bool IsInterleavedView(const DeinterleavedView<const T>& view) {
+  return false;
+}
+
+template <typename T>
+bool IsMono(const InterleavedView<T>& view) {
+  return NumChannels(view) == 1u;
+}
+
+template <typename T>
+bool IsMono(const DeinterleavedView<T>& view) {
+  return NumChannels(view) == 1u;
+}
+
+template <typename T>
+size_t SamplesPerChannel(const MonoView<T>& view) {
+  return view.size();
+}
+
+template <typename T>
+size_t SamplesPerChannel(const InterleavedView<T>& view) {
+  return view.samples_per_channel();
+}
+
+template <typename T>
+size_t SamplesPerChannel(const DeinterleavedView<T>& view) {
+  return view.samples_per_channel();
+}
+// A simple wrapper around memcpy that includes checks for properties.
+// The parameter order is the same as for memcpy(), first destination then
+// source.
+template <typename D, typename S>
+void CopySamples(D& destination, const S& source) {
+  static_assert(
+      sizeof(typename D::value_type) == sizeof(typename S::value_type), "");
+  // Here we'd really like to do
+  // static_assert(IsInterleavedView(destination) == IsInterleavedView(source),
+  //               "");
+  // but the compiler doesn't like it inside this template function for
+  // some reason. The following check is an approximation but unfortunately
+  // means that copying between a MonoView and single channel interleaved or
+  // deinterleaved views wouldn't work.
+  // static_assert(sizeof(destination) == sizeof(source),
+  //               "Incompatible view types");
+  RTC_DCHECK_EQ(NumChannels(destination), NumChannels(source));
+  RTC_DCHECK_EQ(SamplesPerChannel(destination), SamplesPerChannel(source));
+  RTC_DCHECK_GE(destination.size(), source.size());
+  memcpy(&destination[0], &source[0],
+         source.size() * sizeof(typename S::value_type));
+}
+
+// Sets all the samples in a view to 0. This template function is a simple
+// wrapper around `memset()` but adds the benefit of automatically calculating
+// the byte size from the number of samples and sample type.
+template <typename T>
+void ClearSamples(T& view) {
+  memset(&view[0], 0, view.size() * sizeof(typename T::value_type));
+}
+
+// Same as `ClearSamples()` above but allows for clearing only the first
+// `sample_count` number of samples.
+template <typename T>
+void ClearSamples(T& view, size_t sample_count) {
+  RTC_DCHECK_LE(sample_count, view.size());
+  memset(&view[0], 0, sample_count * sizeof(typename T::value_type));
+}
+
+}  // namespace webrtc
+
+#endif  // API_AUDIO_AUDIO_VIEW_H_
--- a/VocieProcess/api/audio/channel_layout.cc
+++ b/VocieProcess/api/audio/channel_layout.cc
@ -0,0 +1,282 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "api/audio/channel_layout.h"
+
+#include <stddef.h>
+
+#include "rtc_base/arraysize.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+
+static const int kLayoutToChannels[] = {
+    0,  // CHANNEL_LAYOUT_NONE
+    0,  // CHANNEL_LAYOUT_UNSUPPORTED
+    1,  // CHANNEL_LAYOUT_MONO
+    2,  // CHANNEL_LAYOUT_STEREO
+    3,  // CHANNEL_LAYOUT_2_1
+    3,  // CHANNEL_LAYOUT_SURROUND
+    4,  // CHANNEL_LAYOUT_4_0
+    4,  // CHANNEL_LAYOUT_2_2
+    4,  // CHANNEL_LAYOUT_QUAD
+    5,  // CHANNEL_LAYOUT_5_0
+    6,  // CHANNEL_LAYOUT_5_1
+    5,  // CHANNEL_LAYOUT_5_0_BACK
+    6,  // CHANNEL_LAYOUT_5_1_BACK
+    7,  // CHANNEL_LAYOUT_7_0
+    8,  // CHANNEL_LAYOUT_7_1
+    8,  // CHANNEL_LAYOUT_7_1_WIDE
+    2,  // CHANNEL_LAYOUT_STEREO_DOWNMIX
+    3,  // CHANNEL_LAYOUT_2POINT1
+    4,  // CHANNEL_LAYOUT_3_1
+    5,  // CHANNEL_LAYOUT_4_1
+    6,  // CHANNEL_LAYOUT_6_0
+    6,  // CHANNEL_LAYOUT_6_0_FRONT
+    6,  // CHANNEL_LAYOUT_HEXAGONAL
+    7,  // CHANNEL_LAYOUT_6_1
+    7,  // CHANNEL_LAYOUT_6_1_BACK
+    7,  // CHANNEL_LAYOUT_6_1_FRONT
+    7,  // CHANNEL_LAYOUT_7_0_FRONT
+    8,  // CHANNEL_LAYOUT_7_1_WIDE_BACK
+    8,  // CHANNEL_LAYOUT_OCTAGONAL
+    0,  // CHANNEL_LAYOUT_DISCRETE
+    3,  // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
+    5,  // CHANNEL_LAYOUT_4_1_QUAD_SIDE
+    0,  // CHANNEL_LAYOUT_BITSTREAM
+};
+
+// The channel orderings for each layout as specified by FFmpeg. Each value
+// represents the index of each channel in each layout.  Values of -1 mean the
+// channel at that index is not used for that layout. For example, the left side
+// surround sound channel in FFmpeg's 5.1 layout is in the 5th position (because
+// the order is L, R, C, LFE, LS, RS), so
+// kChannelOrderings[CHANNEL_LAYOUT_5_1][SIDE_LEFT] = 4;
+static const int kChannelOrderings[CHANNEL_LAYOUT_MAX + 1][CHANNELS_MAX + 1] = {
+    // FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
+
+    // CHANNEL_LAYOUT_NONE
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_UNSUPPORTED
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_MONO
+    {-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_STEREO
+    {0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_2_1
+    {0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1},
+
+    // CHANNEL_LAYOUT_SURROUND
+    {0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_4_0
+    {0, 1, 2, -1, -1, -1, -1, -1, 3, -1, -1},
+
+    // CHANNEL_LAYOUT_2_2
+    {0, 1, -1, -1, -1, -1, -1, -1, -1, 2, 3},
+
+    // CHANNEL_LAYOUT_QUAD
+    {0, 1, -1, -1, 2, 3, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_5_0
+    {0, 1, 2, -1, -1, -1, -1, -1, -1, 3, 4},
+
+    // CHANNEL_LAYOUT_5_1
+    {0, 1, 2, 3, -1, -1, -1, -1, -1, 4, 5},
+
+    // FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
+
+    // CHANNEL_LAYOUT_5_0_BACK
+    {0, 1, 2, -1, 3, 4, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_5_1_BACK
+    {0, 1, 2, 3, 4, 5, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_7_0
+    {0, 1, 2, -1, 5, 6, -1, -1, -1, 3, 4},
+
+    // CHANNEL_LAYOUT_7_1
+    {0, 1, 2, 3, 6, 7, -1, -1, -1, 4, 5},
+
+    // CHANNEL_LAYOUT_7_1_WIDE
+    {0, 1, 2, 3, -1, -1, 6, 7, -1, 4, 5},
+
+    // CHANNEL_LAYOUT_STEREO_DOWNMIX
+    {0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_2POINT1
+    {0, 1, -1, 2, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_3_1
+    {0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_4_1
+    {0, 1, 2, 4, -1, -1, -1, -1, 3, -1, -1},
+
+    // CHANNEL_LAYOUT_6_0
+    {0, 1, 2, -1, -1, -1, -1, -1, 5, 3, 4},
+
+    // CHANNEL_LAYOUT_6_0_FRONT
+    {0, 1, -1, -1, -1, -1, 4, 5, -1, 2, 3},
+
+    // FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
+
+    // CHANNEL_LAYOUT_HEXAGONAL
+    {0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1},
+
+    // CHANNEL_LAYOUT_6_1
+    {0, 1, 2, 3, -1, -1, -1, -1, 6, 4, 5},
+
+    // CHANNEL_LAYOUT_6_1_BACK
+    {0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1},
+
+    // CHANNEL_LAYOUT_6_1_FRONT
+    {0, 1, -1, 6, -1, -1, 4, 5, -1, 2, 3},
+
+    // CHANNEL_LAYOUT_7_0_FRONT
+    {0, 1, 2, -1, -1, -1, 5, 6, -1, 3, 4},
+
+    // CHANNEL_LAYOUT_7_1_WIDE_BACK
+    {0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_OCTAGONAL
+    {0, 1, 2, -1, 5, 6, -1, -1, 7, 3, 4},
+
+    // CHANNEL_LAYOUT_DISCRETE
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
+    {0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
+
+    // CHANNEL_LAYOUT_4_1_QUAD_SIDE
+    {0, 1, -1, 4, -1, -1, -1, -1, -1, 2, 3},
+
+    // CHANNEL_LAYOUT_BITSTREAM
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+
+    // FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
+};
+
+int ChannelLayoutToChannelCount(ChannelLayout layout) {
+  RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kLayoutToChannels));
+  RTC_DCHECK_LE(kLayoutToChannels[layout], kMaxConcurrentChannels);
+  return kLayoutToChannels[layout];
+}
+
+// Converts a channel count into a channel layout.
+ChannelLayout GuessChannelLayout(int channels) {
+  switch (channels) {
+    case 1:
+      return CHANNEL_LAYOUT_MONO;
+    case 2:
+      return CHANNEL_LAYOUT_STEREO;
+    case 3:
+      return CHANNEL_LAYOUT_SURROUND;
+    case 4:
+      return CHANNEL_LAYOUT_QUAD;
+    case 5:
+      return CHANNEL_LAYOUT_5_0;
+    case 6:
+      return CHANNEL_LAYOUT_5_1;
+    case 7:
+      return CHANNEL_LAYOUT_6_1;
+    case 8:
+      return CHANNEL_LAYOUT_7_1;
+    default:
+      RTC_DLOG(LS_WARNING) << "Unsupported channel count: " << channels;
+  }
+  return CHANNEL_LAYOUT_UNSUPPORTED;
+}
+
+int ChannelOrder(ChannelLayout layout, Channels channel) {
+  RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kChannelOrderings));
+  RTC_DCHECK_LT(static_cast<size_t>(channel), arraysize(kChannelOrderings[0]));
+  return kChannelOrderings[layout][channel];
+}
+
+const char* ChannelLayoutToString(ChannelLayout layout) {
+  switch (layout) {
+    case CHANNEL_LAYOUT_NONE:
+      return "NONE";
+    case CHANNEL_LAYOUT_UNSUPPORTED:
+      return "UNSUPPORTED";
+    case CHANNEL_LAYOUT_MONO:
+      return "MONO";
+    case CHANNEL_LAYOUT_STEREO:
+      return "STEREO";
+    case CHANNEL_LAYOUT_2_1:
+      return "2.1";
+    case CHANNEL_LAYOUT_SURROUND:
+      return "SURROUND";
+    case CHANNEL_LAYOUT_4_0:
+      return "4.0";
+    case CHANNEL_LAYOUT_2_2:
+      return "QUAD_SIDE";
+    case CHANNEL_LAYOUT_QUAD:
+      return "QUAD";
+    case CHANNEL_LAYOUT_5_0:
+      return "5.0";
+    case CHANNEL_LAYOUT_5_1:
+      return "5.1";
+    case CHANNEL_LAYOUT_5_0_BACK:
+      return "5.0_BACK";
+    case CHANNEL_LAYOUT_5_1_BACK:
+      return "5.1_BACK";
+    case CHANNEL_LAYOUT_7_0:
+      return "7.0";
+    case CHANNEL_LAYOUT_7_1:
+      return "7.1";
+    case CHANNEL_LAYOUT_7_1_WIDE:
+      return "7.1_WIDE";
+    case CHANNEL_LAYOUT_STEREO_DOWNMIX:
+      return "STEREO_DOWNMIX";
+    case CHANNEL_LAYOUT_2POINT1:
+      return "2POINT1";
+    case CHANNEL_LAYOUT_3_1:
+      return "3.1";
+    case CHANNEL_LAYOUT_4_1:
+      return "4.1";
+    case CHANNEL_LAYOUT_6_0:
+      return "6.0";
+    case CHANNEL_LAYOUT_6_0_FRONT:
+      return "6.0_FRONT";
+    case CHANNEL_LAYOUT_HEXAGONAL:
+      return "HEXAGONAL";
+    case CHANNEL_LAYOUT_6_1:
+      return "6.1";
+    case CHANNEL_LAYOUT_6_1_BACK:
+      return "6.1_BACK";
+    case CHANNEL_LAYOUT_6_1_FRONT:
+      return "6.1_FRONT";
+    case CHANNEL_LAYOUT_7_0_FRONT:
+      return "7.0_FRONT";
+    case CHANNEL_LAYOUT_7_1_WIDE_BACK:
+      return "7.1_WIDE_BACK";
+    case CHANNEL_LAYOUT_OCTAGONAL:
+      return "OCTAGONAL";
+    case CHANNEL_LAYOUT_DISCRETE:
+      return "DISCRETE";
+    case CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
+      return "STEREO_AND_KEYBOARD_MIC";
+    case CHANNEL_LAYOUT_4_1_QUAD_SIDE:
+      return "4.1_QUAD_SIDE";
+    case CHANNEL_LAYOUT_BITSTREAM:
+      return "BITSTREAM";
+  }
+  RTC_DCHECK_NOTREACHED() << "Invalid channel layout provided: " << layout;
+  return "";
+}
+
+}  // namespace webrtc
--- a/VocieProcess/api/audio/channel_layout.h
+++ b/VocieProcess/api/audio/channel_layout.h
@ -0,0 +1,165 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_AUDIO_CHANNEL_LAYOUT_H_
+#define API_AUDIO_CHANNEL_LAYOUT_H_
+
+namespace webrtc {
+
+// This file is derived from Chromium's base/channel_layout.h.
+
+// Enumerates the various representations of the ordering of audio channels.
+// Logged to UMA, so never reuse a value, always add new/greater ones!
+enum ChannelLayout {
+  CHANNEL_LAYOUT_NONE = 0,
+  CHANNEL_LAYOUT_UNSUPPORTED = 1,
+
+  // Front C
+  CHANNEL_LAYOUT_MONO = 2,
+
+  // Front L, Front R
+  CHANNEL_LAYOUT_STEREO = 3,
+
+  // Front L, Front R, Back C
+  CHANNEL_LAYOUT_2_1 = 4,
+
+  // Front L, Front R, Front C
+  CHANNEL_LAYOUT_SURROUND = 5,
+
+  // Front L, Front R, Front C, Back C
+  CHANNEL_LAYOUT_4_0 = 6,
+
+  // Front L, Front R, Side L, Side R
+  CHANNEL_LAYOUT_2_2 = 7,
+
+  // Front L, Front R, Back L, Back R
+  CHANNEL_LAYOUT_QUAD = 8,
+
+  // Front L, Front R, Front C, Side L, Side R
+  CHANNEL_LAYOUT_5_0 = 9,
+
+  // Front L, Front R, Front C, LFE, Side L, Side R
+  CHANNEL_LAYOUT_5_1 = 10,
+
+  // Front L, Front R, Front C, Back L, Back R
+  CHANNEL_LAYOUT_5_0_BACK = 11,
+
+  // Front L, Front R, Front C, LFE, Back L, Back R
+  CHANNEL_LAYOUT_5_1_BACK = 12,
+
+  // Front L, Front R, Front C, Side L, Side R, Back L, Back R
+  CHANNEL_LAYOUT_7_0 = 13,
+
+  // Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R
+  CHANNEL_LAYOUT_7_1 = 14,
+
+  // Front L, Front R, Front C, LFE, Side L, Side R, Front LofC, Front RofC
+  CHANNEL_LAYOUT_7_1_WIDE = 15,
+
+  // Stereo L, Stereo R
+  CHANNEL_LAYOUT_STEREO_DOWNMIX = 16,
+
+  // Stereo L, Stereo R, LFE
+  CHANNEL_LAYOUT_2POINT1 = 17,
+
+  // Stereo L, Stereo R, Front C, LFE
+  CHANNEL_LAYOUT_3_1 = 18,
+
+  // Stereo L, Stereo R, Front C, Rear C, LFE
+  CHANNEL_LAYOUT_4_1 = 19,
+
+  // Stereo L, Stereo R, Front C, Side L, Side R, Back C
+  CHANNEL_LAYOUT_6_0 = 20,
+
+  // Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC
+  CHANNEL_LAYOUT_6_0_FRONT = 21,
+
+  // Stereo L, Stereo R, Front C, Rear L, Rear R, Rear C
+  CHANNEL_LAYOUT_HEXAGONAL = 22,
+
+  // Stereo L, Stereo R, Front C, LFE, Side L, Side R, Rear Center
+  CHANNEL_LAYOUT_6_1 = 23,
+
+  // Stereo L, Stereo R, Front C, LFE, Back L, Back R, Rear Center
+  CHANNEL_LAYOUT_6_1_BACK = 24,
+
+  // Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC, LFE
+  CHANNEL_LAYOUT_6_1_FRONT = 25,
+
+  // Front L, Front R, Front C, Side L, Side R, Front LofC, Front RofC
+  CHANNEL_LAYOUT_7_0_FRONT = 26,
+
+  // Front L, Front R, Front C, LFE, Back L, Back R, Front LofC, Front RofC
+  CHANNEL_LAYOUT_7_1_WIDE_BACK = 27,
+
+  // Front L, Front R, Front C, Side L, Side R, Rear L, Back R, Back C.
+  CHANNEL_LAYOUT_OCTAGONAL = 28,
+
+  // Channels are not explicitly mapped to speakers.
+  CHANNEL_LAYOUT_DISCRETE = 29,
+
+  // Front L, Front R, Front C. Front C contains the keyboard mic audio. This
+  // layout is only intended for input for WebRTC. The Front C channel
+  // is stripped away in the WebRTC audio input pipeline and never seen outside
+  // of that.
+  CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC = 30,
+
+  // Front L, Front R, Side L, Side R, LFE
+  CHANNEL_LAYOUT_4_1_QUAD_SIDE = 31,
+
+  // Actual channel layout is specified in the bitstream and the actual channel
+  // count is unknown at Chromium media pipeline level (useful for audio
+  // pass-through mode).
+  CHANNEL_LAYOUT_BITSTREAM = 32,
+
+  // Max value, must always equal the largest entry ever logged.
+  CHANNEL_LAYOUT_MAX = CHANNEL_LAYOUT_BITSTREAM
+};
+
+// Note: Do not reorder or reassign these values; other code depends on their
+// ordering to operate correctly. E.g., CoreAudio channel layout computations.
+enum Channels {
+  LEFT = 0,
+  RIGHT,
+  CENTER,
+  LFE,
+  BACK_LEFT,
+  BACK_RIGHT,
+  LEFT_OF_CENTER,
+  RIGHT_OF_CENTER,
+  BACK_CENTER,
+  SIDE_LEFT,
+  SIDE_RIGHT,
+  CHANNELS_MAX =
+      SIDE_RIGHT,  // Must always equal the largest value ever logged.
+};
+
+// The maximum number of concurrently active channels for all possible layouts.
+// ChannelLayoutToChannelCount() will never return a value higher than this.
+constexpr int kMaxConcurrentChannels = 8;
+
+// Returns the expected channel position in an interleaved stream.  Values of -1
+// mean the channel at that index is not used for that layout.  Values range
+// from 0 to ChannelLayoutToChannelCount(layout) - 1.
+int ChannelOrder(ChannelLayout layout, Channels channel);
+
+// Returns the number of channels in a given ChannelLayout.
+int ChannelLayoutToChannelCount(ChannelLayout layout);
+
+// Given the number of channels, return the best layout,
+// or return CHANNEL_LAYOUT_UNSUPPORTED if there is no good match.
+ChannelLayout GuessChannelLayout(int channels);
+
+// Returns a string representation of the channel layout.
+const char* ChannelLayoutToString(ChannelLayout layout);
+
+}  // namespace webrtc
+
+#endif  // API_AUDIO_CHANNEL_LAYOUT_H_
--- a/VocieProcess/api/audio/echo_canceller3_config.cc
+++ b/VocieProcess/api/audio/echo_canceller3_config.cc
@ -0,0 +1,278 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "api/audio/echo_canceller3_config.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "rtc_base/checks.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+namespace {
+bool Limit(float* value, float min, float max) {
+  float clamped = rtc::SafeClamp(*value, min, max);
+  clamped = std::isfinite(clamped) ? clamped : min;
+  bool res = *value == clamped;
+  *value = clamped;
+  return res;
+}
+
+bool Limit(size_t* value, size_t min, size_t max) {
+  size_t clamped = rtc::SafeClamp(*value, min, max);
+  bool res = *value == clamped;
+  *value = clamped;
+  return res;
+}
+
+bool Limit(int* value, int min, int max) {
+  int clamped = rtc::SafeClamp(*value, min, max);
+  bool res = *value == clamped;
+  *value = clamped;
+  return res;
+}
+
+bool FloorLimit(size_t* value, size_t min) {
+  size_t clamped = *value >= min ? *value : min;
+  bool res = *value == clamped;
+  *value = clamped;
+  return res;
+}
+
+}  // namespace
+
+EchoCanceller3Config::EchoCanceller3Config() = default;
+EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) =
+    default;
+EchoCanceller3Config& EchoCanceller3Config::operator=(
+    const EchoCanceller3Config& e) = default;
+EchoCanceller3Config::Delay::Delay() = default;
+EchoCanceller3Config::Delay::Delay(const EchoCanceller3Config::Delay& e) =
+    default;
+EchoCanceller3Config::Delay& EchoCanceller3Config::Delay::operator=(
+    const Delay& e) = default;
+
+EchoCanceller3Config::EchoModel::EchoModel() = default;
+EchoCanceller3Config::EchoModel::EchoModel(
+    const EchoCanceller3Config::EchoModel& e) = default;
+EchoCanceller3Config::EchoModel& EchoCanceller3Config::EchoModel::operator=(
+    const EchoModel& e) = default;
+
+EchoCanceller3Config::Suppressor::Suppressor() = default;
+EchoCanceller3Config::Suppressor::Suppressor(
+    const EchoCanceller3Config::Suppressor& e) = default;
+EchoCanceller3Config::Suppressor& EchoCanceller3Config::Suppressor::operator=(
+    const Suppressor& e) = default;
+
+EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
+    float enr_transparent,
+    float enr_suppress,
+    float emr_transparent)
+    : enr_transparent(enr_transparent),
+      enr_suppress(enr_suppress),
+      emr_transparent(emr_transparent) {}
+EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
+    const EchoCanceller3Config::Suppressor::MaskingThresholds& e) = default;
+EchoCanceller3Config::Suppressor::MaskingThresholds&
+EchoCanceller3Config::Suppressor::MaskingThresholds::operator=(
+    const MaskingThresholds& e) = default;
+
+EchoCanceller3Config::Suppressor::Tuning::Tuning(MaskingThresholds mask_lf,
+                                                 MaskingThresholds mask_hf,
+                                                 float max_inc_factor,
+                                                 float max_dec_factor_lf)
+    : mask_lf(mask_lf),
+      mask_hf(mask_hf),
+      max_inc_factor(max_inc_factor),
+      max_dec_factor_lf(max_dec_factor_lf) {}
+EchoCanceller3Config::Suppressor::Tuning::Tuning(
+    const EchoCanceller3Config::Suppressor::Tuning& e) = default;
+EchoCanceller3Config::Suppressor::Tuning&
+EchoCanceller3Config::Suppressor::Tuning::operator=(const Tuning& e) = default;
+
+bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
+  RTC_DCHECK(config);
+  EchoCanceller3Config* c = config;
+  bool res = true;
+
+  if (c->delay.down_sampling_factor != 4 &&
+      c->delay.down_sampling_factor != 8) {
+    c->delay.down_sampling_factor = 4;
+    res = false;
+  }
+
+  res = res & Limit(&c->delay.default_delay, 0, 5000);
+  res = res & Limit(&c->delay.num_filters, 0, 5000);
+  res = res & Limit(&c->delay.delay_headroom_samples, 0, 5000);
+  res = res & Limit(&c->delay.hysteresis_limit_blocks, 0, 5000);
+  res = res & Limit(&c->delay.fixed_capture_delay_samples, 0, 5000);
+  res = res & Limit(&c->delay.delay_estimate_smoothing, 0.f, 1.f);
+  res = res & Limit(&c->delay.delay_candidate_detection_threshold, 0.f, 1.f);
+  res = res & Limit(&c->delay.delay_selection_thresholds.initial, 1, 250);
+  res = res & Limit(&c->delay.delay_selection_thresholds.converged, 1, 250);
+
+  res = res & FloorLimit(&c->filter.refined.length_blocks, 1);
+  res = res & Limit(&c->filter.refined.leakage_converged, 0.f, 1000.f);
+  res = res & Limit(&c->filter.refined.leakage_diverged, 0.f, 1000.f);
+  res = res & Limit(&c->filter.refined.error_floor, 0.f, 1000.f);
+  res = res & Limit(&c->filter.refined.error_ceil, 0.f, 100000000.f);
+  res = res & Limit(&c->filter.refined.noise_gate, 0.f, 100000000.f);
+
+  res = res & FloorLimit(&c->filter.refined_initial.length_blocks, 1);
+  res = res & Limit(&c->filter.refined_initial.leakage_converged, 0.f, 1000.f);
+  res = res & Limit(&c->filter.refined_initial.leakage_diverged, 0.f, 1000.f);
+  res = res & Limit(&c->filter.refined_initial.error_floor, 0.f, 1000.f);
+  res = res & Limit(&c->filter.refined_initial.error_ceil, 0.f, 100000000.f);
+  res = res & Limit(&c->filter.refined_initial.noise_gate, 0.f, 100000000.f);
+
+  if (c->filter.refined.length_blocks <
+      c->filter.refined_initial.length_blocks) {
+    c->filter.refined_initial.length_blocks = c->filter.refined.length_blocks;
+    res = false;
+  }
+
+  res = res & FloorLimit(&c->filter.coarse.length_blocks, 1);
+  res = res & Limit(&c->filter.coarse.rate, 0.f, 1.f);
+  res = res & Limit(&c->filter.coarse.noise_gate, 0.f, 100000000.f);
+
+  res = res & FloorLimit(&c->filter.coarse_initial.length_blocks, 1);
+  res = res & Limit(&c->filter.coarse_initial.rate, 0.f, 1.f);
+  res = res & Limit(&c->filter.coarse_initial.noise_gate, 0.f, 100000000.f);
+
+  if (c->filter.coarse.length_blocks < c->filter.coarse_initial.length_blocks) {
+    c->filter.coarse_initial.length_blocks = c->filter.coarse.length_blocks;
+    res = false;
+  }
+
+  res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
+  res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
+  res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 250000);
+
+  res = res & Limit(&c->erle.min, 1.f, 100000.f);
+  res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
+  res = res & Limit(&c->erle.max_h, 1.f, 100000.f);
+  if (c->erle.min > c->erle.max_l || c->erle.min > c->erle.max_h) {
+    c->erle.min = std::min(c->erle.max_l, c->erle.max_h);
+    res = false;
+  }
+  res = res & Limit(&c->erle.num_sections, 1, c->filter.refined.length_blocks);
+
+  res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f);
+  res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f);
+  res = res & Limit(&c->ep_strength.nearend_len, -1.0f, 1.0f);
+
+  res =
+      res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f);
+  res = res &
+        Limit(&c->echo_audibility.normal_render_limit, 0.f, 32768.f * 32768.f);
+  res = res & Limit(&c->echo_audibility.floor_power, 0.f, 32768.f * 32768.f);
+  res = res & Limit(&c->echo_audibility.audibility_threshold_lf, 0.f,
+                    32768.f * 32768.f);
+  res = res & Limit(&c->echo_audibility.audibility_threshold_mf, 0.f,
+                    32768.f * 32768.f);
+  res = res & Limit(&c->echo_audibility.audibility_threshold_hf, 0.f,
+                    32768.f * 32768.f);
+
+  res = res &
+        Limit(&c->render_levels.active_render_limit, 0.f, 32768.f * 32768.f);
+  res = res & Limit(&c->render_levels.poor_excitation_render_limit, 0.f,
+                    32768.f * 32768.f);
+  res = res & Limit(&c->render_levels.poor_excitation_render_limit_ds8, 0.f,
+                    32768.f * 32768.f);
+
+  res = res & Limit(&c->echo_model.noise_floor_hold, 0, 1000);
+  res = res & Limit(&c->echo_model.min_noise_floor_power, 0, 2000000.f);
+  res = res & Limit(&c->echo_model.stationary_gate_slope, 0, 1000000.f);
+  res = res & Limit(&c->echo_model.noise_gate_power, 0, 1000000.f);
+  res = res & Limit(&c->echo_model.noise_gate_slope, 0, 1000000.f);
+  res = res & Limit(&c->echo_model.render_pre_window_size, 0, 100);
+  res = res & Limit(&c->echo_model.render_post_window_size, 0, 100);
+
+  res = res & Limit(&c->comfort_noise.noise_floor_dbfs, -200.f, 0.f);
+
+  res = res & Limit(&c->suppressor.nearend_average_blocks, 1, 5000);
+
+  res = res &
+        Limit(&c->suppressor.normal_tuning.mask_lf.enr_transparent, 0.f, 100.f);
+  res = res &
+        Limit(&c->suppressor.normal_tuning.mask_lf.enr_suppress, 0.f, 100.f);
+  res = res &
+        Limit(&c->suppressor.normal_tuning.mask_lf.emr_transparent, 0.f, 100.f);
+  res = res &
+        Limit(&c->suppressor.normal_tuning.mask_hf.enr_transparent, 0.f, 100.f);
+  res = res &
+        Limit(&c->suppressor.normal_tuning.mask_hf.enr_suppress, 0.f, 100.f);
+  res = res &
+        Limit(&c->suppressor.normal_tuning.mask_hf.emr_transparent, 0.f, 100.f);
+  res = res & Limit(&c->suppressor.normal_tuning.max_inc_factor, 0.f, 100.f);
+  res = res & Limit(&c->suppressor.normal_tuning.max_dec_factor_lf, 0.f, 100.f);
+
+  res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.enr_transparent, 0.f,
+                    100.f);
+  res = res &
+        Limit(&c->suppressor.nearend_tuning.mask_lf.enr_suppress, 0.f, 100.f);
+  res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.emr_transparent, 0.f,
+                    100.f);
+  res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.enr_transparent, 0.f,
+                    100.f);
+  res = res &
+        Limit(&c->suppressor.nearend_tuning.mask_hf.enr_suppress, 0.f, 100.f);
+  res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.emr_transparent, 0.f,
+                    100.f);
+  res = res & Limit(&c->suppressor.nearend_tuning.max_inc_factor, 0.f, 100.f);
+  res =
+      res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f);
+
+  res = res & Limit(&c->suppressor.last_permanent_lf_smoothing_band, 0, 64);
+  res = res & Limit(&c->suppressor.last_lf_smoothing_band, 0, 64);
+  res = res & Limit(&c->suppressor.last_lf_band, 0, 63);
+  res = res &
+        Limit(&c->suppressor.first_hf_band, c->suppressor.last_lf_band + 1, 64);
+
+  res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold,
+                    0.f, 1000000.f);
+  res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold,
+                    0.f, 1000000.f);
+  res = res & Limit(&c->suppressor.dominant_nearend_detection.hold_duration, 0,
+                    10000);
+  res = res & Limit(&c->suppressor.dominant_nearend_detection.trigger_threshold,
+                    0, 10000);
+
+  res = res &
+        Limit(&c->suppressor.subband_nearend_detection.nearend_average_blocks,
+              1, 1024);
+  res =
+      res & Limit(&c->suppressor.subband_nearend_detection.subband1.low, 0, 65);
+  res = res & Limit(&c->suppressor.subband_nearend_detection.subband1.high,
+                    c->suppressor.subband_nearend_detection.subband1.low, 65);
+  res =
+      res & Limit(&c->suppressor.subband_nearend_detection.subband2.low, 0, 65);
+  res = res & Limit(&c->suppressor.subband_nearend_detection.subband2.high,
+                    c->suppressor.subband_nearend_detection.subband2.low, 65);
+  res = res & Limit(&c->suppressor.subband_nearend_detection.nearend_threshold,
+                    0.f, 1.e24f);
+  res = res & Limit(&c->suppressor.subband_nearend_detection.snr_threshold, 0.f,
+                    1.e24f);
+
+  res = res & Limit(&c->suppressor.high_bands_suppression.enr_threshold, 0.f,
+                    1000000.f);
+  res = res & Limit(&c->suppressor.high_bands_suppression.max_gain_during_echo,
+                    0.f, 1.f);
+  res = res & Limit(&c->suppressor.high_bands_suppression
+                         .anti_howling_activation_threshold,
+                    0.f, 32768.f * 32768.f);
+  res = res & Limit(&c->suppressor.high_bands_suppression.anti_howling_gain,
+                    0.f, 1.f);
+
+  res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f);
+
+  return res;
+}
+}  // namespace webrtc
--- a/VocieProcess/api/audio/echo_canceller3_config.h
+++ b/VocieProcess/api/audio/echo_canceller3_config.h
@ -0,0 +1,250 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
+#define API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
+
+#include <stddef.h>  // size_t
+
+#include "rtc_base/system/rtc_export.h"
+
+namespace webrtc {
+
+// Configuration struct for EchoCanceller3
+struct RTC_EXPORT EchoCanceller3Config {
+  // Checks and updates the config parameters to lie within (mostly) reasonable
+  // ranges. Returns true if and only of the config did not need to be changed.
+  static bool Validate(EchoCanceller3Config* config);
+
+  EchoCanceller3Config();
+  EchoCanceller3Config(const EchoCanceller3Config& e);
+  EchoCanceller3Config& operator=(const EchoCanceller3Config& other);
+
+  struct Buffering {
+    size_t excess_render_detection_interval_blocks = 250;
+    size_t max_allowed_excess_render_blocks = 8;
+  } buffering;
+
+  struct Delay {
+    Delay();
+    Delay(const Delay& e);
+    Delay& operator=(const Delay& e);
+    size_t default_delay = 5;
+    size_t down_sampling_factor = 4;
+    size_t num_filters = 5;
+    size_t delay_headroom_samples = 32;
+    size_t hysteresis_limit_blocks = 1;
+    size_t fixed_capture_delay_samples = 0;
+    float delay_estimate_smoothing = 0.7f;
+    float delay_estimate_smoothing_delay_found = 0.7f;
+    float delay_candidate_detection_threshold = 0.2f;
+    struct DelaySelectionThresholds {
+      int initial;
+      int converged;
+    } delay_selection_thresholds = {5, 20};
+    bool use_external_delay_estimator = false;
+    bool log_warning_on_delay_changes = false;
+    struct AlignmentMixing {
+      bool downmix;
+      bool adaptive_selection;
+      float activity_power_threshold;
+      bool prefer_first_two_channels;
+    };
+    AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true};
+    AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false};
+    bool detect_pre_echo = true;
+  } delay;
+
+  struct Filter {
+    struct RefinedConfiguration {
+      size_t length_blocks;
+      float leakage_converged;
+      float leakage_diverged;
+      float error_floor;
+      float error_ceil;
+      float noise_gate;
+    };
+
+    struct CoarseConfiguration {
+      size_t length_blocks;
+      float rate;
+      float noise_gate;
+    };
+
+    RefinedConfiguration refined = {13,     0.00005f, 0.05f,
+                                    0.001f, 2.f,      20075344.f};
+    CoarseConfiguration coarse = {13, 0.7f, 20075344.f};
+
+    RefinedConfiguration refined_initial = {12,     0.005f, 0.5f,
+                                            0.001f, 2.f,    20075344.f};
+    CoarseConfiguration coarse_initial = {12, 0.9f, 20075344.f};
+
+    size_t config_change_duration_blocks = 250;
+    float initial_state_seconds = 2.5f;
+    int coarse_reset_hangover_blocks = 25;
+    bool conservative_initial_phase = false;
+    bool enable_coarse_filter_output_usage = true;
+    bool use_linear_filter = true;
+    bool high_pass_filter_echo_reference = false;
+    bool export_linear_aec_output = false;
+  } filter;
+
+  struct Erle {
+    float min = 1.f;
+    float max_l = 4.f;
+    float max_h = 1.5f;
+    bool onset_detection = true;
+    size_t num_sections = 1;
+    bool clamp_quality_estimate_to_zero = true;
+    bool clamp_quality_estimate_to_one = true;
+  } erle;
+
+  struct EpStrength {
+    float default_gain = 1.f;
+    float default_len = 0.83f;
+    float nearend_len = 0.83f;
+    bool echo_can_saturate = true;
+    bool bounded_erl = false;
+    bool erle_onset_compensation_in_dominant_nearend = false;
+    bool use_conservative_tail_frequency_response = true;
+  } ep_strength;
+
+  struct EchoAudibility {
+    float low_render_limit = 4 * 64.f;
+    float normal_render_limit = 64.f;
+    float floor_power = 2 * 64.f;
+    float audibility_threshold_lf = 10;
+    float audibility_threshold_mf = 10;
+    float audibility_threshold_hf = 10;
+    bool use_stationarity_properties = false;
+    bool use_stationarity_properties_at_init = false;
+  } echo_audibility;
+
+  struct RenderLevels {
+    float active_render_limit = 100.f;
+    float poor_excitation_render_limit = 150.f;
+    float poor_excitation_render_limit_ds8 = 20.f;
+    float render_power_gain_db = 0.f;
+  } render_levels;
+
+  struct EchoRemovalControl {
+    bool has_clock_drift = false;
+    bool linear_and_stable_echo_path = false;
+  } echo_removal_control;
+
+  struct EchoModel {
+    EchoModel();
+    EchoModel(const EchoModel& e);
+    EchoModel& operator=(const EchoModel& e);
+    size_t noise_floor_hold = 50;
+    float min_noise_floor_power = 1638400.f;
+    float stationary_gate_slope = 10.f;
+    float noise_gate_power = 27509.42f;
+    float noise_gate_slope = 0.3f;
+    size_t render_pre_window_size = 1;
+    size_t render_post_window_size = 1;
+    bool model_reverb_in_nonlinear_mode = true;
+  } echo_model;
+
+  struct ComfortNoise {
+    float noise_floor_dbfs = -96.03406f;
+  } comfort_noise;
+
+  struct Suppressor {
+    Suppressor();
+    Suppressor(const Suppressor& e);
+    Suppressor& operator=(const Suppressor& e);
+
+    size_t nearend_average_blocks = 4;
+
+    struct MaskingThresholds {
+      MaskingThresholds(float enr_transparent,
+                        float enr_suppress,
+                        float emr_transparent);
+      MaskingThresholds(const MaskingThresholds& e);
+      MaskingThresholds& operator=(const MaskingThresholds& e);
+      float enr_transparent;
+      float enr_suppress;
+      float emr_transparent;
+    };
+
+    struct Tuning {
+      Tuning(MaskingThresholds mask_lf,
+             MaskingThresholds mask_hf,
+             float max_inc_factor,
+             float max_dec_factor_lf);
+      Tuning(const Tuning& e);
+      Tuning& operator=(const Tuning& e);
+      MaskingThresholds mask_lf;
+      MaskingThresholds mask_hf;
+      float max_inc_factor;
+      float max_dec_factor_lf;
+    };
+
+    Tuning normal_tuning = Tuning(MaskingThresholds(.3f, .4f, .3f),
+                                  MaskingThresholds(.07f, .1f, .3f),
+                                  2.0f,
+                                  0.25f);
+    Tuning nearend_tuning = Tuning(MaskingThresholds(1.09f, 1.1f, .3f),
+                                   MaskingThresholds(.1f, .3f, .3f),
+                                   2.0f,
+                                   0.25f);
+
+    bool lf_smoothing_during_initial_phase = true;
+    int last_permanent_lf_smoothing_band = 0;
+    int last_lf_smoothing_band = 5;
+    int last_lf_band = 5;
+    int first_hf_band = 8;
+
+    struct DominantNearendDetection {
+      float enr_threshold = .25f;
+      float enr_exit_threshold = 10.f;
+      float snr_threshold = 30.f;
+      int hold_duration = 50;
+      int trigger_threshold = 12;
+      bool use_during_initial_phase = true;
+      bool use_unbounded_echo_spectrum = true;
+    } dominant_nearend_detection;
+
+    struct SubbandNearendDetection {
+      size_t nearend_average_blocks = 1;
+      struct SubbandRegion {
+        size_t low;
+        size_t high;
+      };
+      SubbandRegion subband1 = {1, 1};
+      SubbandRegion subband2 = {1, 1};
+      float nearend_threshold = 1.f;
+      float snr_threshold = 1.f;
+    } subband_nearend_detection;
+
+    bool use_subband_nearend_detection = false;
+
+    struct HighBandsSuppression {
+      float enr_threshold = 1.f;
+      float max_gain_during_echo = 1.f;
+      float anti_howling_activation_threshold = 400.f;
+      float anti_howling_gain = 1.f;
+    } high_bands_suppression;
+
+    float floor_first_increase = 0.00001f;
+    bool conservative_hf_suppression = false;
+  } suppressor;
+
+  struct MultiChannel {
+    bool detect_stereo_content = true;
+    float stereo_detection_threshold = 0.0f;
+    int stereo_detection_timeout_threshold_seconds = 300;
+    float stereo_detection_hysteresis_seconds = 2.0f;
+  } multi_channel;
+};
+}  // namespace webrtc
+
+#endif  // API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
--- a/VocieProcess/api/audio/echo_control.h
+++ b/VocieProcess/api/audio/echo_control.h
@ -0,0 +1,75 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_AUDIO_ECHO_CONTROL_H_
+#define API_AUDIO_ECHO_CONTROL_H_
+
+#include <memory>
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+class AudioBuffer;
+
+// Interface for an acoustic echo cancellation (AEC) submodule.
+class EchoControl {
+ public:
+  // Analysis (not changing) of the render signal.
+  virtual void AnalyzeRender(AudioBuffer* render) = 0;
+
+  // Analysis (not changing) of the capture signal.
+  virtual void AnalyzeCapture(AudioBuffer* capture) = 0;
+
+  // Processes the capture signal in order to remove the echo.
+  virtual void ProcessCapture(AudioBuffer* capture, bool level_change) = 0;
+
+  // As above, but also returns the linear filter output.
+  virtual void ProcessCapture(AudioBuffer* capture,
+                              AudioBuffer* linear_output,
+                              bool level_change) = 0;
+
+  struct Metrics {
+    double echo_return_loss;
+    double echo_return_loss_enhancement;
+    int delay_ms;
+  };
+
+  // Collect current metrics from the echo controller.
+  virtual Metrics GetMetrics() const = 0;
+
+  // Provides an optional external estimate of the audio buffer delay.
+  virtual void SetAudioBufferDelay(int delay_ms) = 0;
+
+  // Specifies whether the capture output will be used. The purpose of this is
+  // to allow the echo controller to deactivate some of the processing when the
+  // resulting output is anyway not used, for instance when the endpoint is
+  // muted.
+  // TODO(b/177830919): Make pure virtual.
+  virtual void SetCaptureOutputUsage(bool capture_output_used) {}
+
+  // Returns wheter the signal is altered.
+  virtual bool ActiveProcessing() const = 0;
+
+  virtual ~EchoControl() {}
+};
+
+// Interface for a factory that creates EchoControllers.
+class EchoControlFactory {
+ public:
+  virtual std::unique_ptr<EchoControl> Create(int sample_rate_hz,
+                                              int num_render_channels,
+                                              int num_capture_channels) = 0;
+
+  virtual ~EchoControlFactory() = default;
+};
+}  // namespace webrtc
+
+#endif  // API_AUDIO_ECHO_CONTROL_H_