aboutsummaryrefslogtreecommitdiffstats
path: root/core/mixer
diff options
context:
space:
mode:
authorSven Göthel <[email protected]>2024-01-05 13:52:12 +0100
committerSven Göthel <[email protected]>2024-01-05 13:52:12 +0100
commitec98cdacc85ff0202852472c7756586437912f22 (patch)
tree42414746a27ab35cb8cdbc95af521d74821e57f4 /core/mixer
parentfd5269bec9a5fe4815974b1786a037e6a247bfd2 (diff)
parentb82cd2e60edb8fbe5fdd3567105ae76a016a554c (diff)
Merge remote-tracking branch 'upstream/master'HEADmaster
Diffstat (limited to 'core/mixer')
-rw-r--r--core/mixer/defs.h21
-rw-r--r--core/mixer/mixer_c.cpp68
-rw-r--r--core/mixer/mixer_neon.cpp41
-rw-r--r--core/mixer/mixer_sse.cpp28
-rw-r--r--core/mixer/mixer_sse2.cpp4
-rw-r--r--core/mixer/mixer_sse41.cpp4
6 files changed, 94 insertions, 72 deletions
diff --git a/core/mixer/defs.h b/core/mixer/defs.h
index 48daca9b..4d0d19bf 100644
--- a/core/mixer/defs.h
+++ b/core/mixer/defs.h
@@ -2,7 +2,8 @@
#define CORE_MIXER_DEFS_H
#include <array>
-#include <stdlib.h>
+#include <cstdlib>
+#include <variant>
#include "alspan.h"
#include "core/bufferline.h"
@@ -17,12 +18,12 @@ using uint = unsigned int;
using float2 = std::array<float,2>;
-constexpr int MixerFracBits{16};
-constexpr int MixerFracOne{1 << MixerFracBits};
-constexpr int MixerFracMask{MixerFracOne - 1};
-constexpr int MixerFracHalf{MixerFracOne >> 1};
+inline constexpr int MixerFracBits{16};
+inline constexpr int MixerFracOne{1 << MixerFracBits};
+inline constexpr int MixerFracMask{MixerFracOne - 1};
+inline constexpr int MixerFracHalf{MixerFracOne >> 1};
-constexpr float GainSilenceThreshold{0.00001f}; /* -100dB */
+inline constexpr float GainSilenceThreshold{0.00001f}; /* -100dB */
enum class Resampler : uint8_t {
@@ -59,10 +60,7 @@ struct CubicState {
const CubicCoefficients *filter;
};
-union InterpState {
- CubicState cubic;
- BsincState bsinc;
-};
+using InterpState = std::variant<CubicState,BsincState>;
using ResamplerFunc = void(*)(const InterpState *state, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst);
@@ -94,7 +92,8 @@ void MixDirectHrtf_(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOu
/* Vectorized resampler helpers */
template<size_t N>
-inline void InitPosArrays(uint frac, uint increment, uint (&frac_arr)[N], uint (&pos_arr)[N])
+inline void InitPosArrays(uint frac, uint increment, const al::span<uint,N> frac_arr,
+ const al::span<uint,N> pos_arr)
{
pos_arr[0] = 0;
frac_arr[0] = frac;
diff --git a/core/mixer/mixer_c.cpp b/core/mixer/mixer_c.cpp
index 28a92ef7..93306bba 100644
--- a/core/mixer/mixer_c.cpp
+++ b/core/mixer/mixer_c.cpp
@@ -28,53 +28,53 @@ constexpr uint CubicPhaseDiffBits{MixerFracBits - CubicPhaseBits};
constexpr uint CubicPhaseDiffOne{1 << CubicPhaseDiffBits};
constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
-inline float do_point(const InterpState&, const float *RESTRICT vals, const uint)
+inline float do_point(const float *RESTRICT vals, const uint)
{ return vals[0]; }
-inline float do_lerp(const InterpState&, const float *RESTRICT vals, const uint frac)
+inline float do_lerp(const float *RESTRICT vals, const uint frac)
{ return lerpf(vals[0], vals[1], static_cast<float>(frac)*(1.0f/MixerFracOne)); }
-inline float do_cubic(const InterpState &istate, const float *RESTRICT vals, const uint frac)
+inline float do_cubic(const CubicState &istate, const float *RESTRICT vals, const uint frac)
{
/* Calculate the phase index and factor. */
const uint pi{frac >> CubicPhaseDiffBits};
const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
- const float *RESTRICT fil{al::assume_aligned<16>(istate.cubic.filter[pi].mCoeffs)};
- const float *RESTRICT phd{al::assume_aligned<16>(istate.cubic.filter[pi].mDeltas)};
+ const float *RESTRICT fil{al::assume_aligned<16>(istate.filter[pi].mCoeffs.data())};
+ const float *RESTRICT phd{al::assume_aligned<16>(istate.filter[pi].mDeltas.data())};
/* Apply the phase interpolated filter. */
return (fil[0] + pf*phd[0])*vals[0] + (fil[1] + pf*phd[1])*vals[1]
+ (fil[2] + pf*phd[2])*vals[2] + (fil[3] + pf*phd[3])*vals[3];
}
-inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac)
+inline float do_bsinc(const BsincState &istate, const float *RESTRICT vals, const uint frac)
{
- const size_t m{istate.bsinc.m};
+ const size_t m{istate.m};
ASSUME(m > 0);
/* Calculate the phase index and factor. */
const uint pi{frac >> BsincPhaseDiffBits};
const float pf{static_cast<float>(frac&BsincPhaseDiffMask) * (1.0f/BsincPhaseDiffOne)};
- const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
+ const float *RESTRICT fil{istate.filter + m*pi*2_uz};
const float *RESTRICT phd{fil + m};
- const float *RESTRICT scd{fil + BSincPhaseCount*2*m};
+ const float *RESTRICT scd{fil + BSincPhaseCount*2_uz*m};
const float *RESTRICT spd{scd + m};
/* Apply the scale and phase interpolated filter. */
float r{0.0f};
for(size_t j_f{0};j_f < m;j_f++)
- r += (fil[j_f] + istate.bsinc.sf*scd[j_f] + pf*(phd[j_f] + istate.bsinc.sf*spd[j_f])) * vals[j_f];
+ r += (fil[j_f] + istate.sf*scd[j_f] + pf*(phd[j_f] + istate.sf*spd[j_f])) * vals[j_f];
return r;
}
-inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac)
+inline float do_fastbsinc(const BsincState &istate, const float *RESTRICT vals, const uint frac)
{
- const size_t m{istate.bsinc.m};
+ const size_t m{istate.m};
ASSUME(m > 0);
/* Calculate the phase index and factor. */
const uint pi{frac >> BsincPhaseDiffBits};
const float pf{static_cast<float>(frac&BsincPhaseDiffMask) * (1.0f/BsincPhaseDiffOne)};
- const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
+ const float *RESTRICT fil{istate.filter + m*pi*2_uz};
const float *RESTRICT phd{fil + m};
/* Apply the phase interpolated filter. */
@@ -84,16 +84,30 @@ inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals,
return r;
}
-using SamplerT = float(&)(const InterpState&, const float*RESTRICT, const uint);
+using SamplerT = float(&)(const float*RESTRICT, const uint);
template<SamplerT Sampler>
-void DoResample(const InterpState *state, const float *RESTRICT src, uint frac,
+void DoResample(const float *RESTRICT src, uint frac, const uint increment,
+ const al::span<float> dst)
+{
+ ASSUME(frac < MixerFracOne);
+ for(float &out : dst)
+ {
+ out = Sampler(src, frac);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+}
+
+template<typename T, typename U>
+void DoResample(T sampler, const U istate, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst)
{
- const InterpState istate{*state};
ASSUME(frac < MixerFracOne);
for(float &out : dst)
{
- out = Sampler(istate, src, frac);
+ out = sampler(istate, src, frac);
frac += increment;
src += frac>>MixerFracBits;
@@ -146,29 +160,35 @@ force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT
} // namespace
template<>
-void Resample_<PointTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+void Resample_<PointTag,CTag>(const InterpState*, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst)
-{ DoResample<do_point>(state, src, frac, increment, dst); }
+{ DoResample<do_point>(src, frac, increment, dst); }
template<>
-void Resample_<LerpTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
+void Resample_<LerpTag,CTag>(const InterpState*, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst)
-{ DoResample<do_lerp>(state, src, frac, increment, dst); }
+{ DoResample<do_lerp>(src, frac, increment, dst); }
template<>
void Resample_<CubicTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst)
-{ DoResample<do_cubic>(state, src-1, frac, increment, dst); }
+{ DoResample(do_cubic, std::get<CubicState>(*state), src-1, frac, increment, dst); }
template<>
void Resample_<BSincTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst)
-{ DoResample<do_bsinc>(state, src-state->bsinc.l, frac, increment, dst); }
+{
+ const auto istate = std::get<BsincState>(*state);
+ DoResample(do_bsinc, istate, src-istate.l, frac, increment, dst);
+}
template<>
void Resample_<FastBSincTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst)
-{ DoResample<do_fastbsinc>(state, src-state->bsinc.l, frac, increment, dst); }
+{
+ const auto istate = std::get<BsincState>(*state);
+ DoResample(do_fastbsinc, istate, src-istate.l, frac, increment, dst);
+}
template<>
diff --git a/core/mixer/mixer_neon.cpp b/core/mixer/mixer_neon.cpp
index ead775af..9fa2425f 100644
--- a/core/mixer/mixer_neon.cpp
+++ b/core/mixer/mixer_neon.cpp
@@ -146,12 +146,11 @@ void Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRICT src, u
const int32x4_t increment4 = vdupq_n_s32(static_cast<int>(increment*4));
const float32x4_t fracOne4 = vdupq_n_f32(1.0f/MixerFracOne);
const int32x4_t fracMask4 = vdupq_n_s32(MixerFracMask);
- alignas(16) uint pos_[4], frac_[4];
- int32x4_t pos4, frac4;
- InitPosArrays(frac, increment, frac_, pos_);
- frac4 = vld1q_s32(reinterpret_cast<int*>(frac_));
- pos4 = vld1q_s32(reinterpret_cast<int*>(pos_));
+ alignas(16) std::array<uint,4> pos_, frac_;
+ InitPosArrays(frac, increment, al::span{frac_}, al::span{pos_});
+ int32x4_t frac4 = vld1q_s32(reinterpret_cast<int*>(frac_.data()));
+ int32x4_t pos4 = vld1q_s32(reinterpret_cast<int*>(pos_.data()));
auto dst_iter = dst.begin();
for(size_t todo{dst.size()>>2};todo;--todo)
@@ -197,7 +196,7 @@ void Resample_<CubicTag,NEONTag>(const InterpState *state, const float *RESTRICT
{
ASSUME(frac < MixerFracOne);
- const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter);
+ const auto *RESTRICT filter = al::assume_aligned<16>(std::get<CubicState>(*state).filter);
src -= 1;
for(float &out_sample : dst)
@@ -209,8 +208,8 @@ void Resample_<CubicTag,NEONTag>(const InterpState *state, const float *RESTRICT
/* Apply the phase interpolated filter. */
/* f = fil + pf*phd */
- const float32x4_t f4 = vmlaq_f32(vld1q_f32(filter[pi].mCoeffs), pf4,
- vld1q_f32(filter[pi].mDeltas));
+ const float32x4_t f4 = vmlaq_f32(vld1q_f32(filter[pi].mCoeffs.data()), pf4,
+ vld1q_f32(filter[pi].mDeltas.data()));
/* r = f*src */
float32x4_t r4{vmulq_f32(f4, vld1q_f32(src))};
@@ -227,13 +226,14 @@ template<>
void Resample_<BSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst)
{
- const float *const filter{state->bsinc.filter};
- const float32x4_t sf4{vdupq_n_f32(state->bsinc.sf)};
- const size_t m{state->bsinc.m};
+ const auto &bsinc = std::get<BsincState>(*state);
+ const float *const filter{bsinc.filter};
+ const float32x4_t sf4{vdupq_n_f32(bsinc.sf)};
+ const size_t m{bsinc.m};
ASSUME(m > 0);
ASSUME(frac < MixerFracOne);
- src -= state->bsinc.l;
+ src -= bsinc.l;
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
@@ -244,9 +244,9 @@ void Resample_<BSincTag,NEONTag>(const InterpState *state, const float *RESTRICT
float32x4_t r4{vdupq_n_f32(0.0f)};
{
const float32x4_t pf4{vdupq_n_f32(pf)};
- const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT fil{filter + m*pi*2_uz};
const float *RESTRICT phd{fil + m};
- const float *RESTRICT scd{fil + BSincPhaseCount*2*m};
+ const float *RESTRICT scd{fil + BSincPhaseCount*2_uz*m};
const float *RESTRICT spd{scd + m};
size_t td{m >> 2};
size_t j{0u};
@@ -271,15 +271,16 @@ void Resample_<BSincTag,NEONTag>(const InterpState *state, const float *RESTRICT
}
template<>
-void Resample_<FastBSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac,
- const uint increment, const al::span<float> dst)
+void Resample_<FastBSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src,
+ uint frac, const uint increment, const al::span<float> dst)
{
- const float *const filter{state->bsinc.filter};
- const size_t m{state->bsinc.m};
+ const auto &bsinc = std::get<BsincState>(*state);
+ const float *const filter{bsinc.filter};
+ const size_t m{bsinc.m};
ASSUME(m > 0);
ASSUME(frac < MixerFracOne);
- src -= state->bsinc.l;
+ src -= bsinc.l;
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
@@ -290,7 +291,7 @@ void Resample_<FastBSincTag,NEONTag>(const InterpState *state, const float *REST
float32x4_t r4{vdupq_n_f32(0.0f)};
{
const float32x4_t pf4{vdupq_n_f32(pf)};
- const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT fil{filter + m*pi*2_uz};
const float *RESTRICT phd{fil + m};
size_t td{m >> 2};
size_t j{0u};
diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp
index 70f77c14..809d585d 100644
--- a/core/mixer/mixer_sse.cpp
+++ b/core/mixer/mixer_sse.cpp
@@ -159,7 +159,7 @@ void Resample_<CubicTag,SSETag>(const InterpState *state, const float *RESTRICT
{
ASSUME(frac < MixerFracOne);
- const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter);
+ const auto *RESTRICT filter = al::assume_aligned<16>(std::get<CubicState>(*state).filter);
src -= 1;
for(float &out_sample : dst)
@@ -171,8 +171,8 @@ void Resample_<CubicTag,SSETag>(const InterpState *state, const float *RESTRICT
/* Apply the phase interpolated filter. */
/* f = fil + pf*phd */
- const __m128 f4 = MLA4(_mm_load_ps(filter[pi].mCoeffs), pf4,
- _mm_load_ps(filter[pi].mDeltas));
+ const __m128 f4 = MLA4(_mm_load_ps(filter[pi].mCoeffs.data()), pf4,
+ _mm_load_ps(filter[pi].mDeltas.data()));
/* r = f*src */
__m128 r4{_mm_mul_ps(f4, _mm_loadu_ps(src))};
@@ -190,13 +190,14 @@ template<>
void Resample_<BSincTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst)
{
- const float *const filter{state->bsinc.filter};
- const __m128 sf4{_mm_set1_ps(state->bsinc.sf)};
- const size_t m{state->bsinc.m};
+ const auto &bsinc = std::get<BsincState>(*state);
+ const float *const filter{bsinc.filter};
+ const __m128 sf4{_mm_set1_ps(bsinc.sf)};
+ const size_t m{bsinc.m};
ASSUME(m > 0);
ASSUME(frac < MixerFracOne);
- src -= state->bsinc.l;
+ src -= bsinc.l;
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
@@ -207,9 +208,9 @@ void Resample_<BSincTag,SSETag>(const InterpState *state, const float *RESTRICT
__m128 r4{_mm_setzero_ps()};
{
const __m128 pf4{_mm_set1_ps(pf)};
- const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT fil{filter + m*pi*2_uz};
const float *RESTRICT phd{fil + m};
- const float *RESTRICT scd{fil + BSincPhaseCount*2*m};
+ const float *RESTRICT scd{fil + BSincPhaseCount*2_uz*m};
const float *RESTRICT spd{scd + m};
size_t td{m >> 2};
size_t j{0u};
@@ -238,12 +239,13 @@ template<>
void Resample_<FastBSincTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac,
const uint increment, const al::span<float> dst)
{
- const float *const filter{state->bsinc.filter};
- const size_t m{state->bsinc.m};
+ const auto &bsinc = std::get<BsincState>(*state);
+ const float *const filter{bsinc.filter};
+ const size_t m{bsinc.m};
ASSUME(m > 0);
ASSUME(frac < MixerFracOne);
- src -= state->bsinc.l;
+ src -= bsinc.l;
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
@@ -254,7 +256,7 @@ void Resample_<FastBSincTag,SSETag>(const InterpState *state, const float *RESTR
__m128 r4{_mm_setzero_ps()};
{
const __m128 pf4{_mm_set1_ps(pf)};
- const float *RESTRICT fil{filter + m*pi*2};
+ const float *RESTRICT fil{filter + m*pi*2_uz};
const float *RESTRICT phd{fil + m};
size_t td{m >> 2};
size_t j{0u};
diff --git a/core/mixer/mixer_sse2.cpp b/core/mixer/mixer_sse2.cpp
index edaaf7a1..aa08b7ed 100644
--- a/core/mixer/mixer_sse2.cpp
+++ b/core/mixer/mixer_sse2.cpp
@@ -44,8 +44,8 @@ void Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRICT src, u
const __m128 fracOne4{_mm_set1_ps(1.0f/MixerFracOne)};
const __m128i fracMask4{_mm_set1_epi32(MixerFracMask)};
- alignas(16) uint pos_[4], frac_[4];
- InitPosArrays(frac, increment, frac_, pos_);
+ alignas(16) std::array<uint,4> pos_, frac_;
+ InitPosArrays(frac, increment, al::span{frac_}, al::span{pos_});
__m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]),
static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))};
__m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]),
diff --git a/core/mixer/mixer_sse41.cpp b/core/mixer/mixer_sse41.cpp
index 8ccd9fd3..d66f9ce5 100644
--- a/core/mixer/mixer_sse41.cpp
+++ b/core/mixer/mixer_sse41.cpp
@@ -45,8 +45,8 @@ void Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRICT src, u
const __m128 fracOne4{_mm_set1_ps(1.0f/MixerFracOne)};
const __m128i fracMask4{_mm_set1_epi32(MixerFracMask)};
- alignas(16) uint pos_[4], frac_[4];
- InitPosArrays(frac, increment, frac_, pos_);
+ alignas(16) std::array<uint,4> pos_, frac_;
+ InitPosArrays(frac, increment, al::span{frac_}, al::span{pos_});
__m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]),
static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))};
__m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]),