diff options
author | Sven Göthel <[email protected]> | 2024-01-05 13:52:12 +0100 |
---|---|---|
committer | Sven Göthel <[email protected]> | 2024-01-05 13:52:12 +0100 |
commit | ec98cdacc85ff0202852472c7756586437912f22 (patch) | |
tree | 42414746a27ab35cb8cdbc95af521d74821e57f4 /core/mixer | |
parent | fd5269bec9a5fe4815974b1786a037e6a247bfd2 (diff) | |
parent | b82cd2e60edb8fbe5fdd3567105ae76a016a554c (diff) |
Diffstat (limited to 'core/mixer')
-rw-r--r-- | core/mixer/defs.h | 21 | ||||
-rw-r--r-- | core/mixer/mixer_c.cpp | 68 | ||||
-rw-r--r-- | core/mixer/mixer_neon.cpp | 41 | ||||
-rw-r--r-- | core/mixer/mixer_sse.cpp | 28 | ||||
-rw-r--r-- | core/mixer/mixer_sse2.cpp | 4 | ||||
-rw-r--r-- | core/mixer/mixer_sse41.cpp | 4 |
6 files changed, 94 insertions, 72 deletions
diff --git a/core/mixer/defs.h b/core/mixer/defs.h index 48daca9b..4d0d19bf 100644 --- a/core/mixer/defs.h +++ b/core/mixer/defs.h @@ -2,7 +2,8 @@ #define CORE_MIXER_DEFS_H #include <array> -#include <stdlib.h> +#include <cstdlib> +#include <variant> #include "alspan.h" #include "core/bufferline.h" @@ -17,12 +18,12 @@ using uint = unsigned int; using float2 = std::array<float,2>; -constexpr int MixerFracBits{16}; -constexpr int MixerFracOne{1 << MixerFracBits}; -constexpr int MixerFracMask{MixerFracOne - 1}; -constexpr int MixerFracHalf{MixerFracOne >> 1}; +inline constexpr int MixerFracBits{16}; +inline constexpr int MixerFracOne{1 << MixerFracBits}; +inline constexpr int MixerFracMask{MixerFracOne - 1}; +inline constexpr int MixerFracHalf{MixerFracOne >> 1}; -constexpr float GainSilenceThreshold{0.00001f}; /* -100dB */ +inline constexpr float GainSilenceThreshold{0.00001f}; /* -100dB */ enum class Resampler : uint8_t { @@ -59,10 +60,7 @@ struct CubicState { const CubicCoefficients *filter; }; -union InterpState { - CubicState cubic; - BsincState bsinc; -}; +using InterpState = std::variant<CubicState,BsincState>; using ResamplerFunc = void(*)(const InterpState *state, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst); @@ -94,7 +92,8 @@ void MixDirectHrtf_(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOu /* Vectorized resampler helpers */ template<size_t N> -inline void InitPosArrays(uint frac, uint increment, uint (&frac_arr)[N], uint (&pos_arr)[N]) +inline void InitPosArrays(uint frac, uint increment, const al::span<uint,N> frac_arr, + const al::span<uint,N> pos_arr) { pos_arr[0] = 0; frac_arr[0] = frac; diff --git a/core/mixer/mixer_c.cpp b/core/mixer/mixer_c.cpp index 28a92ef7..93306bba 100644 --- a/core/mixer/mixer_c.cpp +++ b/core/mixer/mixer_c.cpp @@ -28,53 +28,53 @@ constexpr uint CubicPhaseDiffBits{MixerFracBits - CubicPhaseBits}; constexpr uint CubicPhaseDiffOne{1 << CubicPhaseDiffBits}; constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u}; -inline float do_point(const InterpState&, const float *RESTRICT vals, const uint) +inline float do_point(const float *RESTRICT vals, const uint) { return vals[0]; } -inline float do_lerp(const InterpState&, const float *RESTRICT vals, const uint frac) +inline float do_lerp(const float *RESTRICT vals, const uint frac) { return lerpf(vals[0], vals[1], static_cast<float>(frac)*(1.0f/MixerFracOne)); } -inline float do_cubic(const InterpState &istate, const float *RESTRICT vals, const uint frac) +inline float do_cubic(const CubicState &istate, const float *RESTRICT vals, const uint frac) { /* Calculate the phase index and factor. */ const uint pi{frac >> CubicPhaseDiffBits}; const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)}; - const float *RESTRICT fil{al::assume_aligned<16>(istate.cubic.filter[pi].mCoeffs)}; - const float *RESTRICT phd{al::assume_aligned<16>(istate.cubic.filter[pi].mDeltas)}; + const float *RESTRICT fil{al::assume_aligned<16>(istate.filter[pi].mCoeffs.data())}; + const float *RESTRICT phd{al::assume_aligned<16>(istate.filter[pi].mDeltas.data())}; /* Apply the phase interpolated filter. */ return (fil[0] + pf*phd[0])*vals[0] + (fil[1] + pf*phd[1])*vals[1] + (fil[2] + pf*phd[2])*vals[2] + (fil[3] + pf*phd[3])*vals[3]; } -inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac) +inline float do_bsinc(const BsincState &istate, const float *RESTRICT vals, const uint frac) { - const size_t m{istate.bsinc.m}; + const size_t m{istate.m}; ASSUME(m > 0); /* Calculate the phase index and factor. */ const uint pi{frac >> BsincPhaseDiffBits}; const float pf{static_cast<float>(frac&BsincPhaseDiffMask) * (1.0f/BsincPhaseDiffOne)}; - const float *RESTRICT fil{istate.bsinc.filter + m*pi*2}; + const float *RESTRICT fil{istate.filter + m*pi*2_uz}; const float *RESTRICT phd{fil + m}; - const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2_uz*m}; const float *RESTRICT spd{scd + m}; /* Apply the scale and phase interpolated filter. */ float r{0.0f}; for(size_t j_f{0};j_f < m;j_f++) - r += (fil[j_f] + istate.bsinc.sf*scd[j_f] + pf*(phd[j_f] + istate.bsinc.sf*spd[j_f])) * vals[j_f]; + r += (fil[j_f] + istate.sf*scd[j_f] + pf*(phd[j_f] + istate.sf*spd[j_f])) * vals[j_f]; return r; } -inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac) +inline float do_fastbsinc(const BsincState &istate, const float *RESTRICT vals, const uint frac) { - const size_t m{istate.bsinc.m}; + const size_t m{istate.m}; ASSUME(m > 0); /* Calculate the phase index and factor. */ const uint pi{frac >> BsincPhaseDiffBits}; const float pf{static_cast<float>(frac&BsincPhaseDiffMask) * (1.0f/BsincPhaseDiffOne)}; - const float *RESTRICT fil{istate.bsinc.filter + m*pi*2}; + const float *RESTRICT fil{istate.filter + m*pi*2_uz}; const float *RESTRICT phd{fil + m}; /* Apply the phase interpolated filter. */ @@ -84,16 +84,30 @@ inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals, return r; } -using SamplerT = float(&)(const InterpState&, const float*RESTRICT, const uint); +using SamplerT = float(&)(const float*RESTRICT, const uint); template<SamplerT Sampler> -void DoResample(const InterpState *state, const float *RESTRICT src, uint frac, +void DoResample(const float *RESTRICT src, uint frac, const uint increment, + const al::span<float> dst) +{ + ASSUME(frac < MixerFracOne); + for(float &out : dst) + { + out = Sampler(src, frac); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } +} + +template<typename T, typename U> +void DoResample(T sampler, const U istate, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst) { - const InterpState istate{*state}; ASSUME(frac < MixerFracOne); for(float &out : dst) { - out = Sampler(istate, src, frac); + out = sampler(istate, src, frac); frac += increment; src += frac>>MixerFracBits; @@ -146,29 +160,35 @@ force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT } // namespace template<> -void Resample_<PointTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, +void Resample_<PointTag,CTag>(const InterpState*, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst) -{ DoResample<do_point>(state, src, frac, increment, dst); } +{ DoResample<do_point>(src, frac, increment, dst); } template<> -void Resample_<LerpTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, +void Resample_<LerpTag,CTag>(const InterpState*, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst) -{ DoResample<do_lerp>(state, src, frac, increment, dst); } +{ DoResample<do_lerp>(src, frac, increment, dst); } template<> void Resample_<CubicTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst) -{ DoResample<do_cubic>(state, src-1, frac, increment, dst); } +{ DoResample(do_cubic, std::get<CubicState>(*state), src-1, frac, increment, dst); } template<> void Resample_<BSincTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst) -{ DoResample<do_bsinc>(state, src-state->bsinc.l, frac, increment, dst); } +{ + const auto istate = std::get<BsincState>(*state); + DoResample(do_bsinc, istate, src-istate.l, frac, increment, dst); +} template<> void Resample_<FastBSincTag,CTag>(const InterpState *state, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst) -{ DoResample<do_fastbsinc>(state, src-state->bsinc.l, frac, increment, dst); } +{ + const auto istate = std::get<BsincState>(*state); + DoResample(do_fastbsinc, istate, src-istate.l, frac, increment, dst); +} template<> diff --git a/core/mixer/mixer_neon.cpp b/core/mixer/mixer_neon.cpp index ead775af..9fa2425f 100644 --- a/core/mixer/mixer_neon.cpp +++ b/core/mixer/mixer_neon.cpp @@ -146,12 +146,11 @@ void Resample_<LerpTag,NEONTag>(const InterpState*, const float *RESTRICT src, u const int32x4_t increment4 = vdupq_n_s32(static_cast<int>(increment*4)); const float32x4_t fracOne4 = vdupq_n_f32(1.0f/MixerFracOne); const int32x4_t fracMask4 = vdupq_n_s32(MixerFracMask); - alignas(16) uint pos_[4], frac_[4]; - int32x4_t pos4, frac4; - InitPosArrays(frac, increment, frac_, pos_); - frac4 = vld1q_s32(reinterpret_cast<int*>(frac_)); - pos4 = vld1q_s32(reinterpret_cast<int*>(pos_)); + alignas(16) std::array<uint,4> pos_, frac_; + InitPosArrays(frac, increment, al::span{frac_}, al::span{pos_}); + int32x4_t frac4 = vld1q_s32(reinterpret_cast<int*>(frac_.data())); + int32x4_t pos4 = vld1q_s32(reinterpret_cast<int*>(pos_.data())); auto dst_iter = dst.begin(); for(size_t todo{dst.size()>>2};todo;--todo) @@ -197,7 +196,7 @@ void Resample_<CubicTag,NEONTag>(const InterpState *state, const float *RESTRICT { ASSUME(frac < MixerFracOne); - const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter); + const auto *RESTRICT filter = al::assume_aligned<16>(std::get<CubicState>(*state).filter); src -= 1; for(float &out_sample : dst) @@ -209,8 +208,8 @@ void Resample_<CubicTag,NEONTag>(const InterpState *state, const float *RESTRICT /* Apply the phase interpolated filter. */ /* f = fil + pf*phd */ - const float32x4_t f4 = vmlaq_f32(vld1q_f32(filter[pi].mCoeffs), pf4, - vld1q_f32(filter[pi].mDeltas)); + const float32x4_t f4 = vmlaq_f32(vld1q_f32(filter[pi].mCoeffs.data()), pf4, + vld1q_f32(filter[pi].mDeltas.data())); /* r = f*src */ float32x4_t r4{vmulq_f32(f4, vld1q_f32(src))}; @@ -227,13 +226,14 @@ template<> void Resample_<BSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst) { - const float *const filter{state->bsinc.filter}; - const float32x4_t sf4{vdupq_n_f32(state->bsinc.sf)}; - const size_t m{state->bsinc.m}; + const auto &bsinc = std::get<BsincState>(*state); + const float *const filter{bsinc.filter}; + const float32x4_t sf4{vdupq_n_f32(bsinc.sf)}; + const size_t m{bsinc.m}; ASSUME(m > 0); ASSUME(frac < MixerFracOne); - src -= state->bsinc.l; + src -= bsinc.l; for(float &out_sample : dst) { // Calculate the phase index and factor. @@ -244,9 +244,9 @@ void Resample_<BSincTag,NEONTag>(const InterpState *state, const float *RESTRICT float32x4_t r4{vdupq_n_f32(0.0f)}; { const float32x4_t pf4{vdupq_n_f32(pf)}; - const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT fil{filter + m*pi*2_uz}; const float *RESTRICT phd{fil + m}; - const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2_uz*m}; const float *RESTRICT spd{scd + m}; size_t td{m >> 2}; size_t j{0u}; @@ -271,15 +271,16 @@ void Resample_<BSincTag,NEONTag>(const InterpState *state, const float *RESTRICT } template<> -void Resample_<FastBSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src, uint frac, - const uint increment, const al::span<float> dst) +void Resample_<FastBSincTag,NEONTag>(const InterpState *state, const float *RESTRICT src, + uint frac, const uint increment, const al::span<float> dst) { - const float *const filter{state->bsinc.filter}; - const size_t m{state->bsinc.m}; + const auto &bsinc = std::get<BsincState>(*state); + const float *const filter{bsinc.filter}; + const size_t m{bsinc.m}; ASSUME(m > 0); ASSUME(frac < MixerFracOne); - src -= state->bsinc.l; + src -= bsinc.l; for(float &out_sample : dst) { // Calculate the phase index and factor. @@ -290,7 +291,7 @@ void Resample_<FastBSincTag,NEONTag>(const InterpState *state, const float *REST float32x4_t r4{vdupq_n_f32(0.0f)}; { const float32x4_t pf4{vdupq_n_f32(pf)}; - const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT fil{filter + m*pi*2_uz}; const float *RESTRICT phd{fil + m}; size_t td{m >> 2}; size_t j{0u}; diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp index 70f77c14..809d585d 100644 --- a/core/mixer/mixer_sse.cpp +++ b/core/mixer/mixer_sse.cpp @@ -159,7 +159,7 @@ void Resample_<CubicTag,SSETag>(const InterpState *state, const float *RESTRICT { ASSUME(frac < MixerFracOne); - const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter); + const auto *RESTRICT filter = al::assume_aligned<16>(std::get<CubicState>(*state).filter); src -= 1; for(float &out_sample : dst) @@ -171,8 +171,8 @@ void Resample_<CubicTag,SSETag>(const InterpState *state, const float *RESTRICT /* Apply the phase interpolated filter. */ /* f = fil + pf*phd */ - const __m128 f4 = MLA4(_mm_load_ps(filter[pi].mCoeffs), pf4, - _mm_load_ps(filter[pi].mDeltas)); + const __m128 f4 = MLA4(_mm_load_ps(filter[pi].mCoeffs.data()), pf4, + _mm_load_ps(filter[pi].mDeltas.data())); /* r = f*src */ __m128 r4{_mm_mul_ps(f4, _mm_loadu_ps(src))}; @@ -190,13 +190,14 @@ template<> void Resample_<BSincTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst) { - const float *const filter{state->bsinc.filter}; - const __m128 sf4{_mm_set1_ps(state->bsinc.sf)}; - const size_t m{state->bsinc.m}; + const auto &bsinc = std::get<BsincState>(*state); + const float *const filter{bsinc.filter}; + const __m128 sf4{_mm_set1_ps(bsinc.sf)}; + const size_t m{bsinc.m}; ASSUME(m > 0); ASSUME(frac < MixerFracOne); - src -= state->bsinc.l; + src -= bsinc.l; for(float &out_sample : dst) { // Calculate the phase index and factor. @@ -207,9 +208,9 @@ void Resample_<BSincTag,SSETag>(const InterpState *state, const float *RESTRICT __m128 r4{_mm_setzero_ps()}; { const __m128 pf4{_mm_set1_ps(pf)}; - const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT fil{filter + m*pi*2_uz}; const float *RESTRICT phd{fil + m}; - const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2_uz*m}; const float *RESTRICT spd{scd + m}; size_t td{m >> 2}; size_t j{0u}; @@ -238,12 +239,13 @@ template<> void Resample_<FastBSincTag,SSETag>(const InterpState *state, const float *RESTRICT src, uint frac, const uint increment, const al::span<float> dst) { - const float *const filter{state->bsinc.filter}; - const size_t m{state->bsinc.m}; + const auto &bsinc = std::get<BsincState>(*state); + const float *const filter{bsinc.filter}; + const size_t m{bsinc.m}; ASSUME(m > 0); ASSUME(frac < MixerFracOne); - src -= state->bsinc.l; + src -= bsinc.l; for(float &out_sample : dst) { // Calculate the phase index and factor. @@ -254,7 +256,7 @@ void Resample_<FastBSincTag,SSETag>(const InterpState *state, const float *RESTR __m128 r4{_mm_setzero_ps()}; { const __m128 pf4{_mm_set1_ps(pf)}; - const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT fil{filter + m*pi*2_uz}; const float *RESTRICT phd{fil + m}; size_t td{m >> 2}; size_t j{0u}; diff --git a/core/mixer/mixer_sse2.cpp b/core/mixer/mixer_sse2.cpp index edaaf7a1..aa08b7ed 100644 --- a/core/mixer/mixer_sse2.cpp +++ b/core/mixer/mixer_sse2.cpp @@ -44,8 +44,8 @@ void Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRICT src, u const __m128 fracOne4{_mm_set1_ps(1.0f/MixerFracOne)}; const __m128i fracMask4{_mm_set1_epi32(MixerFracMask)}; - alignas(16) uint pos_[4], frac_[4]; - InitPosArrays(frac, increment, frac_, pos_); + alignas(16) std::array<uint,4> pos_, frac_; + InitPosArrays(frac, increment, al::span{frac_}, al::span{pos_}); __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]), static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))}; __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]), diff --git a/core/mixer/mixer_sse41.cpp b/core/mixer/mixer_sse41.cpp index 8ccd9fd3..d66f9ce5 100644 --- a/core/mixer/mixer_sse41.cpp +++ b/core/mixer/mixer_sse41.cpp @@ -45,8 +45,8 @@ void Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRICT src, u const __m128 fracOne4{_mm_set1_ps(1.0f/MixerFracOne)}; const __m128i fracMask4{_mm_set1_epi32(MixerFracMask)}; - alignas(16) uint pos_[4], frac_[4]; - InitPosArrays(frac, increment, frac_, pos_); + alignas(16) std::array<uint,4> pos_, frac_; + InitPosArrays(frac, increment, al::span{frac_}, al::span{pos_}); __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]), static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))}; __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]), |