update code.

This commit is contained in:
luocai
2024-09-05 09:59:28 +08:00
parent 4f3dc015f7
commit ccf69909d6
223 changed files with 36168 additions and 0 deletions

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/channel_buffer.h"
#include <cstdint>
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
namespace webrtc {
IFChannelBuffer::IFChannelBuffer(size_t num_frames,
size_t num_channels,
size_t num_bands)
: ivalid_(true),
ibuf_(num_frames, num_channels, num_bands),
fvalid_(true),
fbuf_(num_frames, num_channels, num_bands) {}
IFChannelBuffer::~IFChannelBuffer() = default;
ChannelBuffer<int16_t>* IFChannelBuffer::ibuf() {
RefreshI();
fvalid_ = false;
return &ibuf_;
}
ChannelBuffer<float>* IFChannelBuffer::fbuf() {
RefreshF();
ivalid_ = false;
return &fbuf_;
}
const ChannelBuffer<int16_t>* IFChannelBuffer::ibuf_const() const {
RefreshI();
return &ibuf_;
}
const ChannelBuffer<float>* IFChannelBuffer::fbuf_const() const {
RefreshF();
return &fbuf_;
}
void IFChannelBuffer::RefreshF() const {
if (!fvalid_) {
RTC_DCHECK(ivalid_);
fbuf_.set_num_channels(ibuf_.num_channels());
const int16_t* const* int_channels = ibuf_.channels();
float* const* float_channels = fbuf_.channels();
for (size_t i = 0; i < ibuf_.num_channels(); ++i) {
for (size_t j = 0; j < ibuf_.num_frames(); ++j) {
float_channels[i][j] = int_channels[i][j];
}
}
fvalid_ = true;
}
}
void IFChannelBuffer::RefreshI() const {
if (!ivalid_) {
RTC_DCHECK(fvalid_);
int16_t* const* int_channels = ibuf_.channels();
ibuf_.set_num_channels(fbuf_.num_channels());
const float* const* float_channels = fbuf_.channels();
for (size_t i = 0; i < fbuf_.num_channels(); ++i) {
FloatS16ToS16(float_channels[i], ibuf_.num_frames(), int_channels[i]);
}
ivalid_ = true;
}
}
} // namespace webrtc

View File

@ -0,0 +1,253 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_CHANNEL_BUFFER_H_
#define COMMON_AUDIO_CHANNEL_BUFFER_H_
#include <string.h>
#include <memory>
#include <vector>
#include "api/array_view.h"
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
#include "rtc_base/gtest_prod_util.h"
namespace webrtc {
// TODO: b/335805780 - Remove this method. Instead, use Deinterleave() from
// audio_util.h which requires size checked buffer views.
template <typename T>
void Deinterleave(const T* interleaved,
size_t samples_per_channel,
size_t num_channels,
T* const* deinterleaved) {
for (size_t i = 0; i < num_channels; ++i) {
T* channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < samples_per_channel; ++j) {
channel[j] = interleaved[interleaved_idx];
interleaved_idx += num_channels;
}
}
}
// `Interleave()` variant for cases where the deinterleaved channels aren't
// represented by a `DeinterleavedView`.
// TODO: b/335805780 - Remove this method. Instead, use Deinterleave() from
// audio_util.h which requires size checked buffer views.
template <typename T>
void Interleave(const T* const* deinterleaved,
size_t samples_per_channel,
size_t num_channels,
InterleavedView<T>& interleaved) {
RTC_DCHECK_EQ(NumChannels(interleaved), num_channels);
RTC_DCHECK_EQ(SamplesPerChannel(interleaved), samples_per_channel);
for (size_t i = 0; i < num_channels; ++i) {
const T* channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < samples_per_channel; ++j) {
interleaved[interleaved_idx] = channel[j];
interleaved_idx += num_channels;
}
}
}
// Helper to encapsulate a contiguous data buffer, full or split into frequency
// bands, with access to a pointer arrays of the deinterleaved channels and
// bands. The buffer is zero initialized at creation.
//
// The buffer structure is showed below for a 2 channel and 2 bands case:
//
// `data_`:
// { [ --- b1ch1 --- ] [ --- b2ch1 --- ] [ --- b1ch2 --- ] [ --- b2ch2 --- ] }
//
// The pointer arrays for the same example are as follows:
//
// `channels_`:
// { [ b1ch1* ] [ b1ch2* ] [ b2ch1* ] [ b2ch2* ] }
//
// `bands_`:
// { [ b1ch1* ] [ b2ch1* ] [ b1ch2* ] [ b2ch2* ] }
template <typename T>
class ChannelBuffer {
public:
ChannelBuffer(size_t num_frames, size_t num_channels, size_t num_bands = 1)
: data_(new T[num_frames * num_channels]()),
channels_(new T*[num_channels * num_bands]),
bands_(new T*[num_channels * num_bands]),
num_frames_(num_frames),
num_frames_per_band_(num_frames / num_bands),
num_allocated_channels_(num_channels),
num_channels_(num_channels),
num_bands_(num_bands),
bands_view_(num_allocated_channels_,
std::vector<rtc::ArrayView<T>>(num_bands_)),
channels_view_(
num_bands_,
std::vector<rtc::ArrayView<T>>(num_allocated_channels_)) {
// Temporarily cast away const_ness to allow populating the array views.
auto* bands_view =
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(&bands_view_);
auto* channels_view =
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(
&channels_view_);
for (size_t ch = 0; ch < num_allocated_channels_; ++ch) {
for (size_t band = 0; band < num_bands_; ++band) {
(*channels_view)[band][ch] = rtc::ArrayView<T>(
&data_[ch * num_frames_ + band * num_frames_per_band_],
num_frames_per_band_);
(*bands_view)[ch][band] = channels_view_[band][ch];
channels_[band * num_allocated_channels_ + ch] =
channels_view_[band][ch].data();
bands_[ch * num_bands_ + band] =
channels_[band * num_allocated_channels_ + ch];
}
}
}
// Returns a pointer array to the channels.
// If band is explicitly specificed, the channels for a specific band are
// returned and the usage becomes: channels(band)[channel][sample].
// Where:
// 0 <= band < `num_bands_`
// 0 <= channel < `num_allocated_channels_`
// 0 <= sample < `num_frames_per_band_`
// If band is not explicitly specified, the full-band channels (or lower band
// channels) are returned and the usage becomes: channels()[channel][sample].
// Where:
// 0 <= channel < `num_allocated_channels_`
// 0 <= sample < `num_frames_`
const T* const* channels(size_t band = 0) const {
RTC_DCHECK_LT(band, num_bands_);
return &channels_[band * num_allocated_channels_];
}
T* const* channels(size_t band = 0) {
const ChannelBuffer<T>* t = this;
return const_cast<T* const*>(t->channels(band));
}
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) {
return channels_view_[band];
}
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) const {
return channels_view_[band];
}
// Returns a pointer array to the bands for a specific channel.
// Usage:
// bands(channel)[band][sample].
// Where:
// 0 <= channel < `num_channels_`
// 0 <= band < `num_bands_`
// 0 <= sample < `num_frames_per_band_`
const T* const* bands(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
RTC_DCHECK_GE(channel, 0);
return &bands_[channel * num_bands_];
}
T* const* bands(size_t channel) {
const ChannelBuffer<T>* t = this;
return const_cast<T* const*>(t->bands(channel));
}
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) {
return bands_view_[channel];
}
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) const {
return bands_view_[channel];
}
// Sets the `slice` pointers to the `start_frame` position for each channel.
// Returns `slice` for convenience.
const T* const* Slice(T** slice, size_t start_frame) const {
RTC_DCHECK_LT(start_frame, num_frames_);
for (size_t i = 0; i < num_channels_; ++i)
slice[i] = &channels_[i][start_frame];
return slice;
}
T** Slice(T** slice, size_t start_frame) {
const ChannelBuffer<T>* t = this;
return const_cast<T**>(t->Slice(slice, start_frame));
}
size_t num_frames() const { return num_frames_; }
size_t num_frames_per_band() const { return num_frames_per_band_; }
size_t num_channels() const { return num_channels_; }
size_t num_bands() const { return num_bands_; }
size_t size() const { return num_frames_ * num_allocated_channels_; }
void set_num_channels(size_t num_channels) {
RTC_DCHECK_LE(num_channels, num_allocated_channels_);
num_channels_ = num_channels;
}
void SetDataForTesting(const T* data, size_t size) {
RTC_CHECK_EQ(size, this->size());
memcpy(data_.get(), data, size * sizeof(*data));
}
private:
std::unique_ptr<T[]> data_;
std::unique_ptr<T*[]> channels_;
std::unique_ptr<T*[]> bands_;
const size_t num_frames_;
const size_t num_frames_per_band_;
// Number of channels the internal buffer holds.
const size_t num_allocated_channels_;
// Number of channels the user sees.
size_t num_channels_;
const size_t num_bands_;
const std::vector<std::vector<rtc::ArrayView<T>>> bands_view_;
const std::vector<std::vector<rtc::ArrayView<T>>> channels_view_;
};
// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
// broken when someone requests write access to either ChannelBuffer, and
// reestablished when someone requests the outdated ChannelBuffer. It is
// therefore safe to use the return value of ibuf_const() and fbuf_const()
// until the next call to ibuf() or fbuf(), and the return value of ibuf() and
// fbuf() until the next call to any of the other functions.
class IFChannelBuffer {
public:
IFChannelBuffer(size_t num_frames, size_t num_channels, size_t num_bands = 1);
~IFChannelBuffer();
ChannelBuffer<int16_t>* ibuf();
ChannelBuffer<float>* fbuf();
const ChannelBuffer<int16_t>* ibuf_const() const;
const ChannelBuffer<float>* fbuf_const() const;
size_t num_frames() const { return ibuf_.num_frames(); }
size_t num_frames_per_band() const { return ibuf_.num_frames_per_band(); }
size_t num_channels() const {
return ivalid_ ? ibuf_.num_channels() : fbuf_.num_channels();
}
void set_num_channels(size_t num_channels) {
ibuf_.set_num_channels(num_channels);
fbuf_.set_num_channels(num_channels);
}
size_t num_bands() const { return ibuf_.num_bands(); }
private:
void RefreshF() const;
void RefreshI() const;
mutable bool ivalid_;
mutable ChannelBuffer<int16_t> ibuf_;
mutable bool fvalid_;
mutable ChannelBuffer<float> fbuf_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_CHANNEL_BUFFER_H_

View File

@ -0,0 +1,204 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
#define COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
#include <stdint.h>
#include <algorithm>
#include <cmath>
#include <cstring>
#include <limits>
#include "api/audio/audio_view.h"
#include "rtc_base/checks.h"
namespace webrtc {
typedef std::numeric_limits<int16_t> limits_int16;
// TODO(tommi, peah): Move these constants to their own header, e.g.
// `audio_constants.h`. Also consider if they should be in api/.
// Absolute highest acceptable sample rate supported for audio processing,
// capture and codecs. Note that for some components some cases a lower limit
// applies which typically is 48000 but in some cases is lower.
constexpr int kMaxSampleRateHz = 384000;
// Number of samples per channel for 10ms of audio at the highest sample rate.
constexpr size_t kMaxSamplesPerChannel10ms = kMaxSampleRateHz / 100u;
// The conversion functions use the following naming convention:
// S16: int16_t [-32768, 32767]
// Float: float [-1.0, 1.0]
// FloatS16: float [-32768.0, 32768.0]
// Dbfs: float [-20.0*log(10, 32768), 0] = [-90.3, 0]
// The ratio conversion functions use this naming convention:
// Ratio: float (0, +inf)
// Db: float (-inf, +inf)
static inline float S16ToFloat(int16_t v) {
constexpr float kScaling = 1.f / 32768.f;
return v * kScaling;
}
static inline int16_t FloatS16ToS16(float v) {
v = std::min(v, 32767.f);
v = std::max(v, -32768.f);
return static_cast<int16_t>(v + std::copysign(0.5f, v));
}
static inline int16_t FloatToS16(float v) {
v *= 32768.f;
v = std::min(v, 32767.f);
v = std::max(v, -32768.f);
return static_cast<int16_t>(v + std::copysign(0.5f, v));
}
static inline float FloatToFloatS16(float v) {
v = std::min(v, 1.f);
v = std::max(v, -1.f);
return v * 32768.f;
}
static inline float FloatS16ToFloat(float v) {
v = std::min(v, 32768.f);
v = std::max(v, -32768.f);
constexpr float kScaling = 1.f / 32768.f;
return v * kScaling;
}
void FloatToS16(const float* src, size_t size, int16_t* dest);
void S16ToFloat(const int16_t* src, size_t size, float* dest);
void S16ToFloatS16(const int16_t* src, size_t size, float* dest);
void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
void FloatToFloatS16(const float* src, size_t size, float* dest);
void FloatS16ToFloat(const float* src, size_t size, float* dest);
inline float DbToRatio(float v) {
return std::pow(10.0f, v / 20.0f);
}
inline float DbfsToFloatS16(float v) {
static constexpr float kMaximumAbsFloatS16 = -limits_int16::min();
return DbToRatio(v) * kMaximumAbsFloatS16;
}
inline float FloatS16ToDbfs(float v) {
RTC_DCHECK_GE(v, 0);
// kMinDbfs is equal to -20.0 * log10(-limits_int16::min())
static constexpr float kMinDbfs = -90.30899869919436f;
if (v <= 1.0f) {
return kMinDbfs;
}
// Equal to 20 * log10(v / (-limits_int16::min()))
return 20.0f * std::log10(v) + kMinDbfs;
}
// Copy audio from `src` channels to `dest` channels unless `src` and `dest`
// point to the same address. `src` and `dest` must have the same number of
// channels, and there must be sufficient space allocated in `dest`.
// TODO: b/335805780 - Accept ArrayView.
template <typename T>
void CopyAudioIfNeeded(const T* const* src,
int num_frames,
int num_channels,
T* const* dest) {
for (int i = 0; i < num_channels; ++i) {
if (src[i] != dest[i]) {
std::copy(src[i], src[i] + num_frames, dest[i]);
}
}
}
// Deinterleave audio from `interleaved` to the channel buffers pointed to
// by `deinterleaved`. There must be sufficient space allocated in the
// `deinterleaved` buffers (`num_channel` buffers with `samples_per_channel`
// per buffer).
template <typename T>
void Deinterleave(const InterleavedView<const T>& interleaved,
const DeinterleavedView<T>& deinterleaved) {
RTC_DCHECK_EQ(NumChannels(interleaved), NumChannels(deinterleaved));
RTC_DCHECK_EQ(SamplesPerChannel(interleaved),
SamplesPerChannel(deinterleaved));
const auto num_channels = NumChannels(interleaved);
const auto samples_per_channel = SamplesPerChannel(interleaved);
for (size_t i = 0; i < num_channels; ++i) {
MonoView<T> channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < samples_per_channel; ++j) {
channel[j] = interleaved[interleaved_idx];
interleaved_idx += num_channels;
}
}
}
// Interleave audio from the channel buffers pointed to by `deinterleaved` to
// `interleaved`. There must be sufficient space allocated in `interleaved`
// (`samples_per_channel` * `num_channels`).
template <typename T>
void Interleave(const DeinterleavedView<const T>& deinterleaved,
const InterleavedView<T>& interleaved) {
RTC_DCHECK_EQ(NumChannels(interleaved), NumChannels(deinterleaved));
RTC_DCHECK_EQ(SamplesPerChannel(interleaved),
SamplesPerChannel(deinterleaved));
for (size_t i = 0; i < deinterleaved.num_channels(); ++i) {
const auto channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < deinterleaved.samples_per_channel(); ++j) {
interleaved[interleaved_idx] = channel[j];
interleaved_idx += deinterleaved.num_channels();
}
}
}
// Downmixes an interleaved multichannel signal to a single channel by averaging
// all channels.
// TODO: b/335805780 - Accept InterleavedView and DeinterleavedView.
template <typename T, typename Intermediate>
void DownmixInterleavedToMonoImpl(const T* interleaved,
size_t num_frames,
int num_channels,
T* deinterleaved) {
RTC_DCHECK_GT(num_channels, 0);
RTC_DCHECK_GT(num_frames, 0);
const T* const end = interleaved + num_frames * num_channels;
while (interleaved < end) {
const T* const frame_end = interleaved + num_channels;
Intermediate value = *interleaved++;
while (interleaved < frame_end) {
value += *interleaved++;
}
*deinterleaved++ = value / num_channels;
}
}
// TODO: b/335805780 - Accept InterleavedView and DeinterleavedView.
template <typename T>
void DownmixInterleavedToMono(const T* interleaved,
size_t num_frames,
int num_channels,
T* deinterleaved);
// TODO: b/335805780 - Accept InterleavedView and DeinterleavedView.
template <>
void DownmixInterleavedToMono<int16_t>(const int16_t* interleaved,
size_t num_frames,
int num_channels,
int16_t* deinterleaved);
} // namespace webrtc
#endif // COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/resampler/push_sinc_resampler.h"
#include <cstring>
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
namespace webrtc {
PushSincResampler::PushSincResampler(size_t source_frames,
size_t destination_frames)
: resampler_(new SincResampler(source_frames * 1.0 / destination_frames,
source_frames,
this)),
source_ptr_(nullptr),
source_ptr_int_(nullptr),
destination_frames_(destination_frames),
first_pass_(true),
source_available_(0) {}
PushSincResampler::~PushSincResampler() {}
size_t PushSincResampler::Resample(const int16_t* source,
size_t source_length,
int16_t* destination,
size_t destination_capacity) {
if (!float_buffer_.get())
float_buffer_.reset(new float[destination_frames_]);
source_ptr_int_ = source;
// Pass nullptr as the float source to have Run() read from the int16 source.
Resample(nullptr, source_length, float_buffer_.get(), destination_frames_);
FloatS16ToS16(float_buffer_.get(), destination_frames_, destination);
source_ptr_int_ = nullptr;
return destination_frames_;
}
size_t PushSincResampler::Resample(const float* source,
size_t source_length,
float* destination,
size_t destination_capacity) {
RTC_CHECK_EQ(source_length, resampler_->request_frames());
RTC_CHECK_GE(destination_capacity, destination_frames_);
// Cache the source pointer. Calling Resample() will immediately trigger
// the Run() callback whereupon we provide the cached value.
source_ptr_ = source;
source_available_ = source_length;
// On the first pass, we call Resample() twice. During the first call, we
// provide dummy input and discard the output. This is done to prime the
// SincResampler buffer with the correct delay (half the kernel size), thereby
// ensuring that all later Resample() calls will only result in one input
// request through Run().
//
// If this wasn't done, SincResampler would call Run() twice on the first
// pass, and we'd have to introduce an entire `source_frames` of delay, rather
// than the minimum half kernel.
//
// It works out that ChunkSize() is exactly the amount of output we need to
// request in order to prime the buffer with a single Run() request for
// `source_frames`.
if (first_pass_)
resampler_->Resample(resampler_->ChunkSize(), destination);
resampler_->Resample(destination_frames_, destination);
source_ptr_ = nullptr;
return destination_frames_;
}
void PushSincResampler::Run(size_t frames, float* destination) {
// Ensure we are only asked for the available samples. This would fail if
// Run() was triggered more than once per Resample() call.
RTC_CHECK_EQ(source_available_, frames);
if (first_pass_) {
// Provide dummy input on the first pass, the output of which will be
// discarded, as described in Resample().
std::memset(destination, 0, frames * sizeof(*destination));
first_pass_ = false;
return;
}
if (source_ptr_) {
std::memcpy(destination, source_ptr_, frames * sizeof(*destination));
} else {
for (size_t i = 0; i < frames; ++i)
destination[i] = static_cast<float>(source_ptr_int_[i]);
}
source_available_ -= frames;
}
} // namespace webrtc

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_
#define COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "api/audio/audio_view.h"
#include "common_audio/resampler/sinc_resampler.h"
namespace webrtc {
// A thin wrapper over SincResampler to provide a push-based interface as
// required by WebRTC. SincResampler uses a pull-based interface, and will
// use SincResamplerCallback::Run() to request data upon a call to Resample().
// These Run() calls will happen on the same thread Resample() is called on.
class PushSincResampler : public SincResamplerCallback {
public:
// Provide the size of the source and destination blocks in samples. These
// must correspond to the same time duration (typically 10 ms) as the sample
// ratio is inferred from them.
PushSincResampler(size_t source_frames, size_t destination_frames);
~PushSincResampler() override;
PushSincResampler(const PushSincResampler&) = delete;
PushSincResampler& operator=(const PushSincResampler&) = delete;
// Perform the resampling. `source_frames` must always equal the
// `source_frames` provided at construction. `destination_capacity` must be
// at least as large as `destination_frames`. Returns the number of samples
// provided in destination (for convenience, since this will always be equal
// to `destination_frames`).
template <typename S, typename D>
size_t Resample(const MonoView<S>& source, const MonoView<D>& destination) {
return Resample(&source[0], SamplesPerChannel(source), &destination[0],
SamplesPerChannel(destination));
}
size_t Resample(const int16_t* source,
size_t source_frames,
int16_t* destination,
size_t destination_capacity);
size_t Resample(const float* source,
size_t source_frames,
float* destination,
size_t destination_capacity);
// Delay due to the filter kernel. Essentially, the time after which an input
// sample will appear in the resampled output.
static float AlgorithmicDelaySeconds(int source_rate_hz) {
return 1.f / source_rate_hz * SincResampler::kKernelSize / 2;
}
protected:
// Implements SincResamplerCallback.
void Run(size_t frames, float* destination) override;
private:
friend class PushSincResamplerTest;
SincResampler* get_resampler_for_testing() { return resampler_.get(); }
std::unique_ptr<SincResampler> resampler_;
std::unique_ptr<float[]> float_buffer_;
const float* source_ptr_;
const int16_t* source_ptr_int_;
const size_t destination_frames_;
// True on the first call to Resample(), to prime the SincResampler buffer.
bool first_pass_;
// Used to assert we are only requested for as much data as is available.
size_t source_available_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_

View File

@ -0,0 +1,366 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Modified from the Chromium original:
// src/media/base/sinc_resampler.cc
// Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_
// and r4_ will move after the first load):
//
// |----------------|-----------------------------------------|----------------|
//
// request_frames_
// <--------------------------------------------------------->
// r0_ (during first load)
//
// kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2
// <---------------> <---------------> <---------------> <--------------->
// r1_ r2_ r3_ r4_
//
// block_size_ == r4_ - r2_
// <--------------------------------------->
//
// request_frames_
// <------------------ ... ----------------->
// r0_ (during second load)
//
// On the second request r0_ slides to the right by kKernelSize / 2 and r3_, r4_
// and block_size_ are reinitialized via step (3) in the algorithm below.
//
// These new regions remain constant until a Flush() occurs. While complicated,
// this allows us to reduce jitter by always requesting the same amount from the
// provided callback.
//
// The algorithm:
//
// 1) Allocate input_buffer of size: request_frames_ + kKernelSize; this ensures
// there's enough room to read request_frames_ from the callback into region
// r0_ (which will move between the first and subsequent passes).
//
// 2) Let r1_, r2_ each represent half the kernel centered around r0_:
//
// r0_ = input_buffer_ + kKernelSize / 2
// r1_ = input_buffer_
// r2_ = r0_
//
// r0_ is always request_frames_ in size. r1_, r2_ are kKernelSize / 2 in
// size. r1_ must be zero initialized to avoid convolution with garbage (see
// step (5) for why).
//
// 3) Let r3_, r4_ each represent half the kernel right aligned with the end of
// r0_ and choose block_size_ as the distance in frames between r4_ and r2_:
//
// r3_ = r0_ + request_frames_ - kKernelSize
// r4_ = r0_ + request_frames_ - kKernelSize / 2
// block_size_ = r4_ - r2_ = request_frames_ - kKernelSize / 2
//
// 4) Consume request_frames_ frames into r0_.
//
// 5) Position kernel centered at start of r2_ and generate output frames until
// the kernel is centered at the start of r4_ or we've finished generating
// all the output frames.
//
// 6) Wrap left over data from the r3_ to r1_ and r4_ to r2_.
//
// 7) If we're on the second load, in order to avoid overwriting the frames we
// just wrapped from r4_ we need to slide r0_ to the right by the size of
// r4_, which is kKernelSize / 2:
//
// r0_ = r0_ + kKernelSize / 2 = input_buffer_ + kKernelSize
//
// r3_, r4_, and block_size_ then need to be reinitialized, so goto (3).
//
// 8) Else, if we're not on the second load, goto (4).
//
// Note: we're glossing over how the sub-sample handling works with
// `virtual_source_idx_`, etc.
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include "common_audio/resampler/sinc_resampler.h"
#include <math.h>
#include <stdint.h>
#include <string.h>
#include <limits>
#include "rtc_base/checks.h"
#include "rtc_base/system/arch.h"
#include "system_wrappers/include/cpu_features_wrapper.h" // kSSE2, WebRtc_G...
namespace webrtc {
namespace {
double SincScaleFactor(double io_ratio) {
// `sinc_scale_factor` is basically the normalized cutoff frequency of the
// low-pass filter.
double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0;
// The sinc function is an idealized brick-wall filter, but since we're
// windowing it the transition from pass to stop does not happen right away.
// So we should adjust the low pass filter cutoff slightly downward to avoid
// some aliasing at the very high-end.
// TODO(crogers): this value is empirical and to be more exact should vary
// depending on kKernelSize.
sinc_scale_factor *= 0.9;
return sinc_scale_factor;
}
} // namespace
const size_t SincResampler::kKernelSize;
// If we know the minimum architecture at compile time, avoid CPU detection.
void SincResampler::InitializeCPUSpecificFeatures() {
#if defined(WEBRTC_HAS_NEON)
convolve_proc_ = Convolve_NEON;
#elif defined(WEBRTC_ARCH_X86_FAMILY)
// Using AVX2 instead of SSE2 when AVX2/FMA3 supported.
if (GetCPUInfo(kAVX2) && GetCPUInfo(kFMA3))
convolve_proc_ = Convolve_AVX2;
else if (GetCPUInfo(kSSE2))
convolve_proc_ = Convolve_SSE;
else
convolve_proc_ = Convolve_C;
#else
// Unknown architecture.
convolve_proc_ = Convolve_C;
#endif
}
SincResampler::SincResampler(double io_sample_rate_ratio,
size_t request_frames,
SincResamplerCallback* read_cb)
: io_sample_rate_ratio_(io_sample_rate_ratio),
read_cb_(read_cb),
request_frames_(request_frames),
input_buffer_size_(request_frames_ + kKernelSize),
// Create input buffers with a 32-byte alignment for SIMD optimizations.
kernel_storage_(static_cast<float*>(
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
kernel_pre_sinc_storage_(static_cast<float*>(
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
kernel_window_storage_(static_cast<float*>(
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
input_buffer_(static_cast<float*>(
AlignedMalloc(sizeof(float) * input_buffer_size_, 32))),
convolve_proc_(nullptr),
r1_(input_buffer_.get()),
r2_(input_buffer_.get() + kKernelSize / 2) {
InitializeCPUSpecificFeatures();
RTC_DCHECK(convolve_proc_);
RTC_DCHECK_GT(request_frames_, 0);
Flush();
RTC_DCHECK_GT(block_size_, kKernelSize);
memset(kernel_storage_.get(), 0,
sizeof(*kernel_storage_.get()) * kKernelStorageSize);
memset(kernel_pre_sinc_storage_.get(), 0,
sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize);
memset(kernel_window_storage_.get(), 0,
sizeof(*kernel_window_storage_.get()) * kKernelStorageSize);
InitializeKernel();
}
SincResampler::~SincResampler() {}
void SincResampler::UpdateRegions(bool second_load) {
// Setup various region pointers in the buffer (see diagram above). If we're
// on the second load we need to slide r0_ to the right by kKernelSize / 2.
r0_ = input_buffer_.get() + (second_load ? kKernelSize : kKernelSize / 2);
r3_ = r0_ + request_frames_ - kKernelSize;
r4_ = r0_ + request_frames_ - kKernelSize / 2;
block_size_ = r4_ - r2_;
// r1_ at the beginning of the buffer.
RTC_DCHECK_EQ(r1_, input_buffer_.get());
// r1_ left of r2_, r4_ left of r3_ and size correct.
RTC_DCHECK_EQ(r2_ - r1_, r4_ - r3_);
// r2_ left of r3.
RTC_DCHECK_LT(r2_, r3_);
}
void SincResampler::InitializeKernel() {
// Blackman window parameters.
static const double kAlpha = 0.16;
static const double kA0 = 0.5 * (1.0 - kAlpha);
static const double kA1 = 0.5;
static const double kA2 = 0.5 * kAlpha;
// Generates a set of windowed sinc() kernels.
// We generate a range of sub-sample offsets from 0.0 to 1.0.
const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
const float subsample_offset =
static_cast<float>(offset_idx) / kKernelOffsetCount;
for (size_t i = 0; i < kKernelSize; ++i) {
const size_t idx = i + offset_idx * kKernelSize;
const float pre_sinc = static_cast<float>(
M_PI * (static_cast<int>(i) - static_cast<int>(kKernelSize / 2) -
subsample_offset));
kernel_pre_sinc_storage_[idx] = pre_sinc;
// Compute Blackman window, matching the offset of the sinc().
const float x = (i - subsample_offset) / kKernelSize;
const float window = static_cast<float>(kA0 - kA1 * cos(2.0 * M_PI * x) +
kA2 * cos(4.0 * M_PI * x));
kernel_window_storage_[idx] = window;
// Compute the sinc with offset, then window the sinc() function and store
// at the correct offset.
kernel_storage_[idx] = static_cast<float>(
window * ((pre_sinc == 0)
? sinc_scale_factor
: (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));
}
}
}
void SincResampler::SetRatio(double io_sample_rate_ratio) {
if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) <
std::numeric_limits<double>::epsilon()) {
return;
}
io_sample_rate_ratio_ = io_sample_rate_ratio;
// Optimize reinitialization by reusing values which are independent of
// `sinc_scale_factor`. Provides a 3x speedup.
const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
for (size_t i = 0; i < kKernelSize; ++i) {
const size_t idx = i + offset_idx * kKernelSize;
const float window = kernel_window_storage_[idx];
const float pre_sinc = kernel_pre_sinc_storage_[idx];
kernel_storage_[idx] = static_cast<float>(
window * ((pre_sinc == 0)
? sinc_scale_factor
: (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));
}
}
}
void SincResampler::Resample(size_t frames, float* destination) {
size_t remaining_frames = frames;
// Step (1) -- Prime the input buffer at the start of the input stream.
if (!buffer_primed_ && remaining_frames) {
read_cb_->Run(request_frames_, r0_);
buffer_primed_ = true;
}
// Step (2) -- Resample! const what we can outside of the loop for speed. It
// actually has an impact on ARM performance. See inner loop comment below.
const double current_io_ratio = io_sample_rate_ratio_;
const float* const kernel_ptr = kernel_storage_.get();
while (remaining_frames) {
// `i` may be negative if the last Resample() call ended on an iteration
// that put `virtual_source_idx_` over the limit.
//
// Note: The loop construct here can severely impact performance on ARM
// or when built with clang. See https://codereview.chromium.org/18566009/
for (int i = static_cast<int>(
ceil((block_size_ - virtual_source_idx_) / current_io_ratio));
i > 0; --i) {
RTC_DCHECK_LT(virtual_source_idx_, block_size_);
// `virtual_source_idx_` lies in between two kernel offsets so figure out
// what they are.
const int source_idx = static_cast<int>(virtual_source_idx_);
const double subsample_remainder = virtual_source_idx_ - source_idx;
const double virtual_offset_idx =
subsample_remainder * kKernelOffsetCount;
const int offset_idx = static_cast<int>(virtual_offset_idx);
// We'll compute "convolutions" for the two kernels which straddle
// `virtual_source_idx_`.
const float* const k1 = kernel_ptr + offset_idx * kKernelSize;
const float* const k2 = k1 + kKernelSize;
// Ensure `k1`, `k2` are 32-byte aligned for SIMD usage. Should always be
// true so long as kKernelSize is a multiple of 32.
RTC_DCHECK_EQ(0, reinterpret_cast<uintptr_t>(k1) % 32);
RTC_DCHECK_EQ(0, reinterpret_cast<uintptr_t>(k2) % 32);
// Initialize input pointer based on quantized `virtual_source_idx_`.
const float* const input_ptr = r1_ + source_idx;
// Figure out how much to weight each kernel's "convolution".
const double kernel_interpolation_factor =
virtual_offset_idx - offset_idx;
*destination++ =
convolve_proc_(input_ptr, k1, k2, kernel_interpolation_factor);
// Advance the virtual index.
virtual_source_idx_ += current_io_ratio;
if (!--remaining_frames)
return;
}
// Wrap back around to the start.
virtual_source_idx_ -= block_size_;
// Step (3) -- Copy r3_, r4_ to r1_, r2_.
// This wraps the last input frames back to the start of the buffer.
memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * kKernelSize);
// Step (4) -- Reinitialize regions if necessary.
if (r0_ == r2_)
UpdateRegions(true);
// Step (5) -- Refresh the buffer with more input.
read_cb_->Run(request_frames_, r0_);
}
}
#undef CONVOLVE_FUNC
size_t SincResampler::ChunkSize() const {
return static_cast<size_t>(block_size_ / io_sample_rate_ratio_);
}
void SincResampler::Flush() {
virtual_source_idx_ = 0;
buffer_primed_ = false;
memset(input_buffer_.get(), 0,
sizeof(*input_buffer_.get()) * input_buffer_size_);
UpdateRegions(false);
}
float SincResampler::Convolve_C(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor) {
float sum1 = 0;
float sum2 = 0;
// Generate a single output sample. Unrolling this loop hurt performance in
// local testing.
size_t n = kKernelSize;
while (n--) {
sum1 += *input_ptr * *k1++;
sum2 += *input_ptr++ * *k2++;
}
// Linearly interpolate the two "convolutions".
return static_cast<float>((1.0 - kernel_interpolation_factor) * sum1 +
kernel_interpolation_factor * sum2);
}
} // namespace webrtc

View File

@ -0,0 +1,181 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Modified from the Chromium original here:
// src/media/base/sinc_resampler.h
#ifndef COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_
#define COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_
#include <stddef.h>
#include <memory>
#include "rtc_base/gtest_prod_util.h"
#include "rtc_base/memory/aligned_malloc.h"
#include "rtc_base/system/arch.h"
namespace webrtc {
// Callback class for providing more data into the resampler. Expects `frames`
// of data to be rendered into `destination`; zero padded if not enough frames
// are available to satisfy the request.
class SincResamplerCallback {
public:
virtual ~SincResamplerCallback() {}
virtual void Run(size_t frames, float* destination) = 0;
};
// SincResampler is a high-quality single-channel sample-rate converter.
class SincResampler {
public:
// The kernel size can be adjusted for quality (higher is better) at the
// expense of performance. Must be a multiple of 32.
// TODO(dalecurtis): Test performance to see if we can jack this up to 64+.
static const size_t kKernelSize = 32;
// Default request size. Affects how often and for how much SincResampler
// calls back for input. Must be greater than kKernelSize.
static const size_t kDefaultRequestSize = 512;
// The kernel offset count is used for interpolation and is the number of
// sub-sample kernel shifts. Can be adjusted for quality (higher is better)
// at the expense of allocating more memory.
static const size_t kKernelOffsetCount = 32;
static const size_t kKernelStorageSize =
kKernelSize * (kKernelOffsetCount + 1);
// Constructs a SincResampler with the specified `read_cb`, which is used to
// acquire audio data for resampling. `io_sample_rate_ratio` is the ratio
// of input / output sample rates. `request_frames` controls the size in
// frames of the buffer requested by each `read_cb` call. The value must be
// greater than kKernelSize. Specify kDefaultRequestSize if there are no
// request size constraints.
SincResampler(double io_sample_rate_ratio,
size_t request_frames,
SincResamplerCallback* read_cb);
virtual ~SincResampler();
SincResampler(const SincResampler&) = delete;
SincResampler& operator=(const SincResampler&) = delete;
// Resample `frames` of data from `read_cb_` into `destination`.
void Resample(size_t frames, float* destination);
// The maximum size in frames that guarantees Resample() will only make a
// single call to `read_cb_` for more data.
size_t ChunkSize() const;
size_t request_frames() const { return request_frames_; }
// Flush all buffered data and reset internal indices. Not thread safe, do
// not call while Resample() is in progress.
void Flush();
// Update `io_sample_rate_ratio_`. SetRatio() will cause a reconstruction of
// the kernels used for resampling. Not thread safe, do not call while
// Resample() is in progress.
//
// TODO(ajm): Use this in PushSincResampler rather than reconstructing
// SincResampler. We would also need a way to update `request_frames_`.
void SetRatio(double io_sample_rate_ratio);
float* get_kernel_for_testing() { return kernel_storage_.get(); }
private:
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, Convolve);
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, ConvolveBenchmark);
void InitializeKernel();
void UpdateRegions(bool second_load);
// Selects runtime specific CPU features like SSE. Must be called before
// using SincResampler.
// TODO(ajm): Currently managed by the class internally. See the note with
// `convolve_proc_` below.
void InitializeCPUSpecificFeatures();
// Compute convolution of `k1` and `k2` over `input_ptr`, resultant sums are
// linearly interpolated using `kernel_interpolation_factor`. On x86 and ARM
// the underlying implementation is chosen at run time.
static float Convolve_C(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
#if defined(WEBRTC_ARCH_X86_FAMILY)
static float Convolve_SSE(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
static float Convolve_AVX2(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
#elif defined(WEBRTC_HAS_NEON)
static float Convolve_NEON(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
#endif
// The ratio of input / output sample rates.
double io_sample_rate_ratio_;
// An index on the source input buffer with sub-sample precision. It must be
// double precision to avoid drift.
double virtual_source_idx_;
// The buffer is primed once at the very beginning of processing.
bool buffer_primed_;
// Source of data for resampling.
SincResamplerCallback* read_cb_;
// The size (in samples) to request from each `read_cb_` execution.
const size_t request_frames_;
// The number of source frames processed per pass.
size_t block_size_;
// The size (in samples) of the internal buffer used by the resampler.
const size_t input_buffer_size_;
// Contains kKernelOffsetCount kernels back-to-back, each of size kKernelSize.
// The kernel offsets are sub-sample shifts of a windowed sinc shifted from
// 0.0 to 1.0 sample.
std::unique_ptr<float[], AlignedFreeDeleter> kernel_storage_;
std::unique_ptr<float[], AlignedFreeDeleter> kernel_pre_sinc_storage_;
std::unique_ptr<float[], AlignedFreeDeleter> kernel_window_storage_;
// Data from the source is copied into this buffer for each processing pass.
std::unique_ptr<float[], AlignedFreeDeleter> input_buffer_;
// Stores the runtime selection of which Convolve function to use.
// TODO(ajm): Move to using a global static which must only be initialized
// once by the user. We're not doing this initially, because we don't have
// e.g. a LazyInstance helper in webrtc.
typedef float (*ConvolveProc)(const float*,
const float*,
const float*,
double);
ConvolveProc convolve_proc_;
// Pointers to the various regions inside `input_buffer_`. See the diagram at
// the top of the .cc file for more information.
float* r0_;
float* const r1_;
float* const r2_;
float* r3_;
float* r4_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/dot_product_with_scale.h"
#include "rtc_base/numerics/safe_conversions.h"
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
const int16_t* vector2,
size_t length,
int scaling) {
int64_t sum = 0;
size_t i = 0;
/* Unroll the loop to improve performance. */
for (i = 0; i + 3 < length; i += 4) {
sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
}
for (; i < length; i++) {
sum += (vector1[i] * vector2[i]) >> scaling;
}
return rtc::saturated_cast<int32_t>(sum);
}

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
#include <stdint.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
// Calculates the dot product between two (int16_t) vectors.
//
// Input:
// - vector1 : Vector 1
// - vector2 : Vector 2
// - vector_length : Number of samples used in the dot product
// - scaling : The number of right bit shifts to apply on each term
// during calculation to avoid overflow, i.e., the
// output will be in Q(-`scaling`)
//
// Return value : The dot product in Q(-scaling)
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
const int16_t* vector2,
size_t length,
int scaling);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,155 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This header file includes the inline functions in
// the fix point signal processing library.
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#include <stdint.h>
#include "rtc_base/compile_assert_c.h"
extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];
// Don't call this directly except in tests!
static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) {
// Normalize n by rounding up to the nearest number that is a sequence of 0
// bits followed by a sequence of 1 bits. This number has the same number of
// leading zeros as the original n. There are exactly 33 such values.
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
// Multiply the modified n with a constant selected (by exhaustive search)
// such that each of the 33 possible values of n give a product whose 6 most
// significant bits are unique. Then look up the answer in the table.
return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
}
// Don't call this directly except in tests!
static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) {
const int leading_zeros = n >> 32 == 0 ? 32 : 0;
return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin(
(uint32_t)(n >> (32 - leading_zeros)));
}
// Returns the number of leading zero bits in the argument.
static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) {
#ifdef __GNUC__
RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
return n == 0 ? 32 : __builtin_clz(n);
#else
return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n);
#endif
}
// Returns the number of leading zero bits in the argument.
static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) {
#ifdef __GNUC__
RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT
return n == 0 ? 64 : __builtin_clzll(n);
#else
return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n);
#endif
}
#ifdef WEBRTC_ARCH_ARM_V7
#include "common_audio/signal_processing/include/spl_inl_armv7.h"
#else
#if defined(MIPS32_LE)
#include "common_audio/signal_processing/include/spl_inl_mips.h"
#endif
#if !defined(MIPS_DSP_R1_LE)
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
int16_t out16 = (int16_t)value32;
if (value32 > 32767)
out16 = 32767;
else if (value32 < -32768)
out16 = -32768;
return out16;
}
static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) {
// Do the addition in unsigned numbers, since signed overflow is undefined
// behavior.
const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b);
// a + b can't overflow if a and b have different signs. If they have the
// same sign, a + b also has the same sign iff it didn't overflow.
if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) {
// The direction of the overflow is obvious from the sign of a + b.
return sum < 0 ? INT32_MAX : INT32_MIN;
}
return sum;
}
static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) {
// Do the subtraction in unsigned numbers, since signed overflow is undefined
// behavior.
const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b);
// a - b can't overflow if a and b have the same sign. If they have different
// signs, a - b has the same sign as a iff it didn't overflow.
if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) {
// The direction of the overflow is obvious from the sign of a - b.
return diff < 0 ? INT32_MAX : INT32_MIN;
}
return diff;
}
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b);
}
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2);
}
#endif // #if !defined(MIPS_DSP_R1_LE)
#if !defined(MIPS32_LE)
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
return 32 - WebRtcSpl_CountLeadingZeros32(n);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1;
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
const int32_t a32 = a;
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17;
}
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
return (a * b + c);
}
#endif // #if !defined(MIPS32_LE)
#endif // WEBRTC_ARCH_ARM_V7
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_

View File

@ -0,0 +1,548 @@
/*
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
* Copyright Takuya OOURA, 1996-2001
*
* You may use, copy, modify and distribute this code for any purpose (include
* commercial use) and without fee. Please refer to this package when you modify
* this code.
*
* Changes by the WebRTC authors:
* - Trivial type modifications.
* - Minimal code subset to do rdft of length 128.
* - Optimizations because of known length.
* - Removed the global variables by moving the code in to a class in order
* to make it thread safe.
*
* All changes are covered by the WebRTC license and IP grant:
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft_tables_common.h"
#include "rtc_base/system/arch.h"
#include "system_wrappers/include/cpu_features_wrapper.h"
namespace webrtc {
namespace {
#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON))
static void cft1st_128_C(float* a) {
const int n = 128;
int j, k1, k2;
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
// The processing of the first set of elements was simplified in C to avoid
// some operations (multiplication by zero or one, addition of two elements
// multiplied by the same weight, ...).
x0r = a[0] + a[2];
x0i = a[1] + a[3];
x1r = a[0] - a[2];
x1i = a[1] - a[3];
x2r = a[4] + a[6];
x2i = a[5] + a[7];
x3r = a[4] - a[6];
x3i = a[5] - a[7];
a[0] = x0r + x2r;
a[1] = x0i + x2i;
a[4] = x0r - x2r;
a[5] = x0i - x2i;
a[2] = x1r - x3i;
a[3] = x1i + x3r;
a[6] = x1r + x3i;
a[7] = x1i - x3r;
wk1r = rdft_w[2];
x0r = a[8] + a[10];
x0i = a[9] + a[11];
x1r = a[8] - a[10];
x1i = a[9] - a[11];
x2r = a[12] + a[14];
x2i = a[13] + a[15];
x3r = a[12] - a[14];
x3i = a[13] - a[15];
a[8] = x0r + x2r;
a[9] = x0i + x2i;
a[12] = x2i - x0i;
a[13] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[10] = wk1r * (x0r - x0i);
a[11] = wk1r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[14] = wk1r * (x0i - x0r);
a[15] = wk1r * (x0i + x0r);
k1 = 0;
for (j = 16; j < n; j += 16) {
k1 += 2;
k2 = 2 * k1;
wk2r = rdft_w[k1 + 0];
wk2i = rdft_w[k1 + 1];
wk1r = rdft_w[k2 + 0];
wk1i = rdft_w[k2 + 1];
wk3r = rdft_wk3ri_first[k1 + 0];
wk3i = rdft_wk3ri_first[k1 + 1];
x0r = a[j + 0] + a[j + 2];
x0i = a[j + 1] + a[j + 3];
x1r = a[j + 0] - a[j + 2];
x1i = a[j + 1] - a[j + 3];
x2r = a[j + 4] + a[j + 6];
x2i = a[j + 5] + a[j + 7];
x3r = a[j + 4] - a[j + 6];
x3i = a[j + 5] - a[j + 7];
a[j + 0] = x0r + x2r;
a[j + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 4] = wk2r * x0r - wk2i * x0i;
a[j + 5] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 2] = wk1r * x0r - wk1i * x0i;
a[j + 3] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 6] = wk3r * x0r - wk3i * x0i;
a[j + 7] = wk3r * x0i + wk3i * x0r;
wk1r = rdft_w[k2 + 2];
wk1i = rdft_w[k2 + 3];
wk3r = rdft_wk3ri_second[k1 + 0];
wk3i = rdft_wk3ri_second[k1 + 1];
x0r = a[j + 8] + a[j + 10];
x0i = a[j + 9] + a[j + 11];
x1r = a[j + 8] - a[j + 10];
x1i = a[j + 9] - a[j + 11];
x2r = a[j + 12] + a[j + 14];
x2i = a[j + 13] + a[j + 15];
x3r = a[j + 12] - a[j + 14];
x3i = a[j + 13] - a[j + 15];
a[j + 8] = x0r + x2r;
a[j + 9] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 12] = -wk2i * x0r - wk2r * x0i;
a[j + 13] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 10] = wk1r * x0r - wk1i * x0i;
a[j + 11] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 14] = wk3r * x0r - wk3i * x0i;
a[j + 15] = wk3r * x0i + wk3i * x0r;
}
}
static void cftmdl_128_C(float* a) {
const int l = 8;
const int n = 128;
const int m = 32;
int j0, j1, j2, j3, k, k1, k2, m2;
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
for (j0 = 0; j0 < l; j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
a[j2 + 0] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1 + 0] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3 + 0] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
wk1r = rdft_w[2];
for (j0 = m; j0 < l + m; j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
a[j2 + 0] = x2i - x0i;
a[j2 + 1] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1 + 0] = wk1r * (x0r - x0i);
a[j1 + 1] = wk1r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[j3 + 0] = wk1r * (x0i - x0r);
a[j3 + 1] = wk1r * (x0i + x0r);
}
k1 = 0;
m2 = 2 * m;
for (k = m2; k < n; k += m2) {
k1 += 2;
k2 = 2 * k1;
wk2r = rdft_w[k1 + 0];
wk2i = rdft_w[k1 + 1];
wk1r = rdft_w[k2 + 0];
wk1i = rdft_w[k2 + 1];
wk3r = rdft_wk3ri_first[k1 + 0];
wk3i = rdft_wk3ri_first[k1 + 1];
for (j0 = k; j0 < l + k; j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2 + 0] = wk2r * x0r - wk2i * x0i;
a[j2 + 1] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
wk1r = rdft_w[k2 + 2];
wk1i = rdft_w[k2 + 3];
wk3r = rdft_wk3ri_second[k1 + 0];
wk3i = rdft_wk3ri_second[k1 + 1];
for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
}
}
static void rftfsub_128_C(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
xi = a[j2 + 1] + a[k2 + 1];
yr = wkr * xr - wki * xi;
yi = wkr * xi + wki * xr;
a[j2 + 0] -= yr;
a[j2 + 1] -= yi;
a[k2 + 0] += yr;
a[k2 + 1] -= yi;
}
}
static void rftbsub_128_C(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
a[1] = -a[1];
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
xi = a[j2 + 1] + a[k2 + 1];
yr = wkr * xr + wki * xi;
yi = wkr * xi - wki * xr;
a[j2 + 0] = a[j2 + 0] - yr;
a[j2 + 1] = yi - a[j2 + 1];
a[k2 + 0] = yr + a[k2 + 0];
a[k2 + 1] = yi - a[k2 + 1];
}
a[65] = -a[65];
}
#endif
} // namespace
OouraFft::OouraFft(bool sse2_available) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
use_sse2_ = sse2_available;
#else
use_sse2_ = false;
#endif
}
OouraFft::OouraFft() {
#if defined(WEBRTC_ARCH_X86_FAMILY)
use_sse2_ = (GetCPUInfo(kSSE2) != 0);
#else
use_sse2_ = false;
#endif
}
OouraFft::~OouraFft() = default;
void OouraFft::Fft(float* a) const {
float xi;
bitrv2_128(a);
cftfsub_128(a);
rftfsub_128(a);
xi = a[0] - a[1];
a[0] += a[1];
a[1] = xi;
}
void OouraFft::InverseFft(float* a) const {
a[1] = 0.5f * (a[0] - a[1]);
a[0] -= a[1];
rftbsub_128(a);
bitrv2_128(a);
cftbsub_128(a);
}
void OouraFft::cft1st_128(float* a) const {
#if defined(MIPS_FPU_LE)
cft1st_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
cft1st_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
cft1st_128_SSE2(a);
} else {
cft1st_128_C(a);
}
#else
cft1st_128_C(a);
#endif
}
void OouraFft::cftmdl_128(float* a) const {
#if defined(MIPS_FPU_LE)
cftmdl_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
cftmdl_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
cftmdl_128_SSE2(a);
} else {
cftmdl_128_C(a);
}
#else
cftmdl_128_C(a);
#endif
}
void OouraFft::rftfsub_128(float* a) const {
#if defined(MIPS_FPU_LE)
rftfsub_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
rftfsub_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
rftfsub_128_SSE2(a);
} else {
rftfsub_128_C(a);
}
#else
rftfsub_128_C(a);
#endif
}
void OouraFft::rftbsub_128(float* a) const {
#if defined(MIPS_FPU_LE)
rftbsub_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
rftbsub_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
rftbsub_128_SSE2(a);
} else {
rftbsub_128_C(a);
}
#else
rftbsub_128_C(a);
#endif
}
void OouraFft::cftbsub_128(float* a) const {
int j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
cft1st_128(a);
cftmdl_128(a);
l = 32;
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = -a[j + 1] - a[j1 + 1];
x1r = a[j] - a[j1];
x1i = -a[j + 1] + a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i - x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i + x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i - x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i + x3r;
}
}
void OouraFft::cftfsub_128(float* a) const {
int j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
cft1st_128(a);
cftmdl_128(a);
l = 32;
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
}
void OouraFft::bitrv2_128(float* a) const {
/*
Following things have been attempted but are no faster:
(a) Storing the swap indexes in a LUT (index calculations are done
for 'free' while waiting on memory/L1).
(b) Consolidate the load/store of two consecutive floats by a 64 bit
integer (execution is memory/L1 bound).
(c) Do a mix of floats and 64 bit integer to maximize register
utilization (execution is memory/L1 bound).
(d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
(e) Hard-coding of the offsets to completely eliminates index
calculations.
*/
unsigned int j, j1, k, k1;
float xr, xi, yr, yi;
const int ip[4] = {0, 64, 32, 96};
for (k = 0; k < 4; k++) {
for (j = 0; j < k; j++) {
j1 = 2 * j + ip[k];
k1 = 2 * k + ip[j];
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += 8;
k1 += 16;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += 8;
k1 -= 8;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += 8;
k1 += 16;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
}
j1 = 2 * k + 8 + ip[k];
k1 = j1 + 8;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
}
}
} // namespace webrtc

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
#include "rtc_base/system/arch.h"
namespace webrtc {
#if defined(WEBRTC_ARCH_X86_FAMILY)
void cft1st_128_SSE2(float* a);
void cftmdl_128_SSE2(float* a);
void rftfsub_128_SSE2(float* a);
void rftbsub_128_SSE2(float* a);
#endif
#if defined(MIPS_FPU_LE)
void cft1st_128_mips(float* a);
void cftmdl_128_mips(float* a);
void rftfsub_128_mips(float* a);
void rftbsub_128_mips(float* a);
#endif
#if defined(WEBRTC_HAS_NEON)
void cft1st_128_neon(float* a);
void cftmdl_128_neon(float* a);
void rftfsub_128_neon(float* a);
void rftbsub_128_neon(float* a);
#endif
class OouraFft {
public:
// Ctor allowing the availability of SSE2 support to be specified.
explicit OouraFft(bool sse2_available);
// Deprecated: This Ctor will soon be removed.
OouraFft();
~OouraFft();
void Fft(float* a) const;
void InverseFft(float* a) const;
private:
void cft1st_128(float* a) const;
void cftmdl_128(float* a) const;
void rftfsub_128(float* a) const;
void rftbsub_128(float* a) const;
void cftfsub_128(float* a) const;
void cftbsub_128(float* a) const;
void bitrv2_128(float* a) const;
bool use_sse2_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
namespace webrtc {
// This tables used to be computed at run-time. For example, refer to:
// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564
// to see the initialization code.
// Constants shared by all paths (C, SSE2, NEON).
const float rdft_w[64] = {
1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, 0.9238795638f,
0.3826834559f, 0.3826834559f, 0.9238795638f, 0.9807852507f, 0.1950903237f,
0.5555702448f, 0.8314695954f, 0.8314695954f, 0.5555702448f, 0.1950903237f,
0.9807852507f, 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, 0.9569403529f,
0.2902846634f, 0.4713967443f, 0.8819212914f, 0.7730104327f, 0.6343933344f,
0.0980171412f, 0.9951847196f, 0.7071067691f, 0.4993977249f, 0.4975923598f,
0.4945882559f, 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, 0.4157347977f,
0.4016037583f, 0.3865052164f, 0.3704755902f, 0.3535533845f, 0.3357794881f,
0.3171966672f, 0.2978496552f, 0.2777851224f, 0.2570513785f, 0.2356983721f,
0.2137775421f, 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
};
// Constants used by the C and MIPS paths.
const float rdft_wk3ri_first[16] = {
1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
};
const float rdft_wk3ri_second[16] = {
-0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
-0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
-0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
-0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_

View File

@ -0,0 +1,77 @@
/*
* Written by Wilco Dijkstra, 1996. The following email exchange establishes the
* license.
*
* From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
* Date: Fri, Jun 24, 2011 at 3:20 AM
* Subject: Re: sqrt routine
* To: Kevin Ma <kma@google.com>
* Hi Kevin,
* Thanks for asking. Those routines are public domain (originally posted to
* comp.sys.arm a long time ago), so you can use them freely for any purpose.
* Cheers,
* Wilco
*
* ----- Original Message -----
* From: "Kevin Ma" <kma@google.com>
* To: <Wilco.Dijkstra@ntlworld.com>
* Sent: Thursday, June 23, 2011 11:44 PM
* Subject: Fwd: sqrt routine
* Hi Wilco,
* I saw your sqrt routine from several web sites, including
* http://www.finesse.demon.co.uk/steven/sqrt.html.
* Just wonder if there's any copyright information with your Successive
* approximation routines, or if I can freely use it for any purpose.
* Thanks.
* Kevin
*/
// Minor modifications in code style for WebRTC, 2012.
#include "common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
/*
* Algorithm:
* Successive approximation of the equation (root + delta) ^ 2 = N
* until delta < 1. If delta < 1 we have the integer part of SQRT (N).
* Use delta = 2^i for i = 15 .. 0.
*
* Output precision is 16 bits. Note for large input values (close to
* 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
* contains the MSB information (a non-sign value). Do with caution
* if you need to cast the output to int16_t type.
*
* If the input value is negative, it returns 0.
*/
#define WEBRTC_SPL_SQRT_ITER(N) \
try1 = root + (1 << (N)); \
if (value >= try1 << (N)) \
{ \
value -= try1 << (N); \
root |= 2 << (N); \
}
int32_t WebRtcSpl_SqrtFloor(int32_t value)
{
int32_t root = 0, try1;
WEBRTC_SPL_SQRT_ITER (15);
WEBRTC_SPL_SQRT_ITER (14);
WEBRTC_SPL_SQRT_ITER (13);
WEBRTC_SPL_SQRT_ITER (12);
WEBRTC_SPL_SQRT_ITER (11);
WEBRTC_SPL_SQRT_ITER (10);
WEBRTC_SPL_SQRT_ITER ( 9);
WEBRTC_SPL_SQRT_ITER ( 8);
WEBRTC_SPL_SQRT_ITER ( 7);
WEBRTC_SPL_SQRT_ITER ( 6);
WEBRTC_SPL_SQRT_ITER ( 5);
WEBRTC_SPL_SQRT_ITER ( 4);
WEBRTC_SPL_SQRT_ITER ( 3);
WEBRTC_SPL_SQRT_ITER ( 2);
WEBRTC_SPL_SQRT_ITER ( 1);
WEBRTC_SPL_SQRT_ITER ( 0);
return root >> 1;
}

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdint.h>
//
// WebRtcSpl_SqrtFloor(...)
//
// Returns the square root of the input value `value`. The precision of this
// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
// If `value` is a negative number then 0 is returned.
//
// Algorithm:
//
// An iterative 4 cylce/bit routine
//
// Input:
// - value : Value to calculate sqrt of
//
// Return value : Result of the sqrt calculation
//
int32_t WebRtcSpl_SqrtFloor(int32_t value);