diff options
Diffstat (limited to 'core/mixer')
-rw-r--r-- | core/mixer/defs.h | 9 | ||||
-rw-r--r-- | core/mixer/hrtfbase.h | 12 | ||||
-rw-r--r-- | core/mixer/hrtfdefs.h | 5 | ||||
-rw-r--r-- | core/mixer/mixer_c.cpp | 18 | ||||
-rw-r--r-- | core/mixer/mixer_neon.cpp | 18 | ||||
-rw-r--r-- | core/mixer/mixer_sse.cpp | 18 | ||||
-rw-r--r-- | core/mixer/mixer_sse2.cpp | 8 |
7 files changed, 46 insertions, 42 deletions
diff --git a/core/mixer/defs.h b/core/mixer/defs.h index acf60350..ba304f22 100644 --- a/core/mixer/defs.h +++ b/core/mixer/defs.h @@ -6,6 +6,7 @@ #include "alspan.h" #include "core/bufferline.h" +#include "core/resampler_limits.h" struct HrtfChannelState; struct HrtfFilter; @@ -19,12 +20,6 @@ constexpr int MixerFracBits{12}; constexpr int MixerFracOne{1 << MixerFracBits}; constexpr int MixerFracMask{MixerFracOne - 1}; -/* Maximum number of samples to pad on the ends of a buffer for resampling. - * Note that the padding is symmetric (half at the beginning and half at the - * end)! - */ -constexpr int MaxResamplerPadding{48}; - constexpr float GainSilenceThreshold{0.00001f}; /* -100dB */ @@ -80,7 +75,7 @@ template<typename InstTag> void MixHrtfBlend_(const float *InSamples, float2 *AccumSamples, const uint IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams, const size_t BufferSize); template<typename InstTag> -void MixDirectHrtf_(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +void MixDirectHrtf_(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize); diff --git a/core/mixer/hrtfbase.h b/core/mixer/hrtfbase.h index 7419f960..79b09a3d 100644 --- a/core/mixer/hrtfbase.h +++ b/core/mixer/hrtfbase.h @@ -12,7 +12,7 @@ using uint = unsigned int; using ApplyCoeffsT = void(&)(float2 *RESTRICT Values, const size_t irSize, - const HrirArray &Coeffs, const float left, const float right); + const ConstHrirSpan Coeffs, const float left, const float right); template<ApplyCoeffsT ApplyCoeffs> inline void MixHrtfBase(const float *InSamples, float2 *RESTRICT AccumSamples, const size_t IrSize, @@ -20,7 +20,7 @@ inline void MixHrtfBase(const float *InSamples, float2 *RESTRICT AccumSamples, c { ASSUME(BufferSize > 0); - const HrirArray &Coeffs = *hrtfparams->Coeffs; + const ConstHrirSpan Coeffs{hrtfparams->Coeffs}; const float gainstep{hrtfparams->GainStep}; const float gain{hrtfparams->Gain}; @@ -45,9 +45,9 @@ inline void MixHrtfBlendBase(const float *InSamples, float2 *RESTRICT AccumSampl { ASSUME(BufferSize > 0); - const auto &OldCoeffs = oldparams->Coeffs; + const ConstHrirSpan OldCoeffs{oldparams->Coeffs}; const float oldGainStep{oldparams->Gain / static_cast<float>(BufferSize)}; - const auto &NewCoeffs = *newparams->Coeffs; + const ConstHrirSpan NewCoeffs{newparams->Coeffs}; const float newGainStep{newparams->GainStep}; if LIKELY(oldparams->Gain > GainSilenceThreshold) @@ -84,7 +84,7 @@ inline void MixHrtfBlendBase(const float *InSamples, float2 *RESTRICT AccumSampl } template<ApplyCoeffsT ApplyCoeffs> -inline void MixDirectHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +inline void MixDirectHrtfBase(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *RESTRICT AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) { @@ -133,7 +133,7 @@ inline void MixDirectHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOu ChanState->mSplitter.processHfScale(tempbuf, ChanState->mHfScale); /* Now apply the HRIR coefficients to this channel. */ - const auto &Coeffs = ChanState->mCoeffs; + const ConstHrirSpan Coeffs{ChanState->mCoeffs}; for(size_t i{0u};i < BufferSize;++i) { const float insample{tempbuf[i]}; diff --git a/core/mixer/hrtfdefs.h b/core/mixer/hrtfdefs.h index 89a9bb8d..7046a31e 100644 --- a/core/mixer/hrtfdefs.h +++ b/core/mixer/hrtfdefs.h @@ -3,6 +3,7 @@ #include <array> +#include "alspan.h" #include "core/ambidefs.h" #include "core/bufferline.h" #include "core/filters/splitter.h" @@ -28,9 +29,11 @@ constexpr uint MinIrLength{8}; constexpr uint HrtfDirectDelay{256}; using HrirArray = std::array<float2,HrirLength>; +using HrirSpan = al::span<float2,HrirLength>; +using ConstHrirSpan = al::span<const float2,HrirLength>; struct MixHrtfFilter { - const HrirArray *Coeffs; + const ConstHrirSpan Coeffs; uint2 Delay; float Gain; float GainStep; diff --git a/core/mixer/mixer_c.cpp b/core/mixer/mixer_c.cpp index ff9538a4..f82f7dd1 100644 --- a/core/mixer/mixer_c.cpp +++ b/core/mixer/mixer_c.cpp @@ -32,15 +32,16 @@ inline float do_cubic(const InterpState&, const float *RESTRICT vals, const uint inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac) { const size_t m{istate.bsinc.m}; + ASSUME(m > 0); // Calculate the phase index and factor. const uint pi{frac >> FracPhaseBitDiff}; const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)}; - const float *fil{istate.bsinc.filter + m*pi*4}; - const float *phd{fil + m}; - const float *scd{phd + m}; - const float *spd{scd + m}; + const float *RESTRICT fil{istate.bsinc.filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT spd{scd + m}; // Apply the scale and phase interpolated filter. float r{0.0f}; @@ -51,13 +52,14 @@ inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, con inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac) { const size_t m{istate.bsinc.m}; + ASSUME(m > 0); // Calculate the phase index and factor. const uint pi{frac >> FracPhaseBitDiff}; const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)}; - const float *fil{istate.bsinc.filter + m*pi*4}; - const float *phd{fil + m}; + const float *RESTRICT fil{istate.bsinc.filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; // Apply the phase interpolated filter. float r{0.0f}; @@ -83,7 +85,7 @@ float *DoResample(const InterpState *state, float *RESTRICT src, uint frac, uint return dst.data(); } -inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const HrirArray &Coeffs, +inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs, const float left, const float right) { ASSUME(IrSize >= MinIrLength); @@ -149,7 +151,7 @@ void MixHrtfBlend_<CTag>(const float *InSamples, float2 *AccumSamples, const uin } template<> -void MixDirectHrtf_<CTag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +void MixDirectHrtf_<CTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) { diff --git a/core/mixer/mixer_neon.cpp b/core/mixer/mixer_neon.cpp index f3e5f130..a3afdc6b 100644 --- a/core/mixer/mixer_neon.cpp +++ b/core/mixer/mixer_neon.cpp @@ -34,7 +34,7 @@ inline float32x4_t set_f4(float l0, float l1, float l2, float l3) constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits}; constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff}; -inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const HrirArray &Coeffs, +inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs, const float left, const float right) { float32x4_t leftright4; @@ -118,6 +118,7 @@ float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src const float *const filter{state->bsinc.filter}; const float32x4_t sf4{vdupq_n_f32(state->bsinc.sf)}; const size_t m{state->bsinc.m}; + ASSUME(m > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -130,10 +131,10 @@ float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src float32x4_t r4{vdupq_n_f32(0.0f)}; { const float32x4_t pf4{vdupq_n_f32(pf)}; - const float *fil{filter + m*pi*4}; - const float *phd{fil + m}; - const float *scd{phd + m}; - const float *spd{scd + m}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT spd{scd + m}; size_t td{m >> 2}; size_t j{0u}; @@ -163,6 +164,7 @@ float *Resample_<FastBSincTag,NEONTag>(const InterpState *state, float *RESTRICT { const float *const filter{state->bsinc.filter}; const size_t m{state->bsinc.m}; + ASSUME(m > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -175,8 +177,8 @@ float *Resample_<FastBSincTag,NEONTag>(const InterpState *state, float *RESTRICT float32x4_t r4{vdupq_n_f32(0.0f)}; { const float32x4_t pf4{vdupq_n_f32(pf)}; - const float *fil{filter + m*pi*4}; - const float *phd{fil + m}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; size_t td{m >> 2}; size_t j{0u}; @@ -213,7 +215,7 @@ void MixHrtfBlend_<NEONTag>(const float *InSamples, float2 *AccumSamples, const } template<> -void MixDirectHrtf_<NEONTag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +void MixDirectHrtf_<NEONTag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) { diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp index c0fd8fa1..3cfb00a5 100644 --- a/core/mixer/mixer_sse.cpp +++ b/core/mixer/mixer_sse.cpp @@ -26,7 +26,7 @@ constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff}; #define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z)) -inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const HrirArray &Coeffs, +inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs, const float left, const float right) { const __m128 lrlr{_mm_setr_ps(left, right, left, right)}; @@ -82,6 +82,7 @@ float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src, const float *const filter{state->bsinc.filter}; const __m128 sf4{_mm_set1_ps(state->bsinc.sf)}; const size_t m{state->bsinc.m}; + ASSUME(m > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -94,10 +95,10 @@ float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src, __m128 r4{_mm_setzero_ps()}; { const __m128 pf4{_mm_set1_ps(pf)}; - const float *fil{filter + m*pi*4}; - const float *phd{fil + m}; - const float *scd{phd + m}; - const float *spd{scd + m}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; + const float *RESTRICT scd{fil + BSincPhaseCount*2*m}; + const float *RESTRICT spd{scd + m}; size_t td{m >> 2}; size_t j{0u}; @@ -128,6 +129,7 @@ float *Resample_<FastBSincTag,SSETag>(const InterpState *state, float *RESTRICT { const float *const filter{state->bsinc.filter}; const size_t m{state->bsinc.m}; + ASSUME(m > 0); src -= state->bsinc.l; for(float &out_sample : dst) @@ -140,8 +142,8 @@ float *Resample_<FastBSincTag,SSETag>(const InterpState *state, float *RESTRICT __m128 r4{_mm_setzero_ps()}; { const __m128 pf4{_mm_set1_ps(pf)}; - const float *fil{filter + m*pi*4}; - const float *phd{fil + m}; + const float *RESTRICT fil{filter + m*pi*2}; + const float *RESTRICT phd{fil + m}; size_t td{m >> 2}; size_t j{0u}; @@ -179,7 +181,7 @@ void MixHrtfBlend_<SSETag>(const float *InSamples, float2 *AccumSamples, const u } template<> -void MixDirectHrtf_<SSETag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, +void MixDirectHrtf_<SSETag>(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf, HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize) { diff --git a/core/mixer/mixer_sse2.cpp b/core/mixer/mixer_sse2.cpp index f91d5dcd..99d04210 100644 --- a/core/mixer/mixer_sse2.cpp +++ b/core/mixer/mixer_sse2.cpp @@ -52,10 +52,10 @@ float *Resample_<LerpTag,SSE2Tag>(const InterpState*, float *RESTRICT src, uint auto dst_iter = dst.begin(); for(size_t todo{dst.size()>>2};todo;--todo) { - const int pos0{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)))}; - const int pos1{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(1, 1, 1, 1)))}; - const int pos2{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(2, 2, 2, 2)))}; - const int pos3{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(3, 3, 3, 3)))}; + const int pos0{_mm_cvtsi128_si32(pos4)}; + const int pos1{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 4))}; + const int pos2{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 8))}; + const int pos3{_mm_cvtsi128_si32(_mm_srli_si128(pos4, 12))}; const __m128 val1{_mm_setr_ps(src[pos0 ], src[pos1 ], src[pos2 ], src[pos3 ])}; const __m128 val2{_mm_setr_ps(src[pos0+1], src[pos1+1], src[pos2+1], src[pos3+1])}; |