diff options
Diffstat (limited to 'core/mixer/mixer_sse.cpp')
-rw-r--r-- | core/mixer/mixer_sse.cpp | 51 |
1 files changed, 45 insertions, 6 deletions
diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp index 1b0d1386..4a31a0f1 100644 --- a/core/mixer/mixer_sse.cpp +++ b/core/mixer/mixer_sse.cpp @@ -7,10 +7,12 @@ #include "alnumeric.h" #include "core/bsinc_defs.h" +#include "core/cubic_defs.h" #include "defs.h" #include "hrtfbase.h" struct SSETag; +struct CubicTag; struct BSincTag; struct FastBSincTag; @@ -21,8 +23,13 @@ struct FastBSincTag; namespace { -constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits}; -constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff}; +constexpr uint BSincPhaseBitDiff{MixerFracBits - BSincPhaseBits}; +constexpr uint BSincPhaseDiffOne{1 << BSincPhaseBitDiff}; +constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u}; + +constexpr uint CubicPhaseBitDiff{MixerFracBits - CubicPhaseBits}; +constexpr uint CubicPhaseDiffOne{1 << CubicPhaseBitDiff}; +constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u}; #define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) @@ -147,6 +154,38 @@ force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT } // namespace template<> +float *Resample_<CubicTag,SSETag>(const InterpState *state, float *RESTRICT src, uint frac, + uint increment, const al::span<float> dst) +{ + const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter); + + src -= 1; + for(float &out_sample : dst) + { + const uint pi{frac >> CubicPhaseBitDiff}; + const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)}; + const __m128 pf4{_mm_set1_ps(pf)}; + + /* Apply the phase interpolated filter. */ + + /* f = fil + pf*phd */ + const __m128 f4 = MLA4(_mm_load_ps(filter[pi].mCoeffs), pf4, + _mm_load_ps(filter[pi].mDeltas)); + /* r = f*src */ + __m128 r4{_mm_mul_ps(f4, _mm_loadu_ps(src))}; + + r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); + r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); + out_sample = _mm_cvtss_f32(r4); + + frac += increment; + src += frac>>MixerFracBits; + frac &= MixerFracMask; + } + return dst.data(); +} + +template<> float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src, uint frac, uint increment, const al::span<float> dst) { @@ -159,8 +198,8 @@ float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src, for(float &out_sample : dst) { // Calculate the phase index and factor. - const uint pi{frac >> FracPhaseBitDiff}; - const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)}; + const uint pi{frac >> BSincPhaseBitDiff}; + const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)}; // Apply the scale and phase interpolated filter. __m128 r4{_mm_setzero_ps()}; @@ -206,8 +245,8 @@ float *Resample_<FastBSincTag,SSETag>(const InterpState *state, float *RESTRICT for(float &out_sample : dst) { // Calculate the phase index and factor. - const uint pi{frac >> FracPhaseBitDiff}; - const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)}; + const uint pi{frac >> BSincPhaseBitDiff}; + const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)}; // Apply the phase interpolated filter. __m128 r4{_mm_setzero_ps()}; |