aboutsummaryrefslogtreecommitdiffstats
path: root/core/mixer
diff options
context:
space:
mode:
Diffstat (limited to 'core/mixer')
-rw-r--r--core/mixer/defs.h9
-rw-r--r--core/mixer/mixer_c.cpp35
-rw-r--r--core/mixer/mixer_neon.cpp52
-rw-r--r--core/mixer/mixer_sse.cpp51
4 files changed, 125 insertions, 22 deletions
diff --git a/core/mixer/defs.h b/core/mixer/defs.h
index 80d9fc7f..74a474fe 100644
--- a/core/mixer/defs.h
+++ b/core/mixer/defs.h
@@ -8,6 +8,7 @@
#include "core/bufferline.h"
#include "core/resampler_limits.h"
+struct CubicCoefficients;
struct HrtfChannelState;
struct HrtfFilter;
struct MixHrtfFilter;
@@ -51,7 +52,15 @@ struct BsincState {
const float *filter;
};
+struct CubicState {
+ /* Filter coefficients, and coefficient deltas. Starting at phase index 0,
+ * each subsequent phase index follows contiguously.
+ */
+ const CubicCoefficients *filter;
+};
+
union InterpState {
+ CubicState cubic;
BsincState bsinc;
};
diff --git a/core/mixer/mixer_c.cpp b/core/mixer/mixer_c.cpp
index 9ac2a9c4..24b9f0b7 100644
--- a/core/mixer/mixer_c.cpp
+++ b/core/mixer/mixer_c.cpp
@@ -5,7 +5,8 @@
#include <limits>
#include "alnumeric.h"
-#include "core/bsinc_tables.h"
+#include "core/bsinc_defs.h"
+#include "core/cubic_defs.h"
#include "defs.h"
#include "hrtfbase.h"
@@ -20,23 +21,39 @@ struct FastBSincTag;
namespace {
-constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};
-constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};
+constexpr uint BsincPhaseBitDiff{MixerFracBits - BSincPhaseBits};
+constexpr uint BsincPhaseDiffOne{1 << BsincPhaseBitDiff};
+
+constexpr uint CubicPhaseBitDiff{MixerFracBits - CubicPhaseBits};
+constexpr uint CubicPhaseDiffOne{1 << CubicPhaseBitDiff};
+constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
inline float do_point(const InterpState&, const float *RESTRICT vals, const uint)
{ return vals[0]; }
inline float do_lerp(const InterpState&, const float *RESTRICT vals, const uint frac)
{ return lerpf(vals[0], vals[1], static_cast<float>(frac)*(1.0f/MixerFracOne)); }
-inline float do_cubic(const InterpState&, const float *RESTRICT vals, const uint frac)
-{ return cubic(vals[0], vals[1], vals[2], vals[3], static_cast<float>(frac)*(1.0f/MixerFracOne)); }
+inline float do_cubic(const InterpState &istate, const float *RESTRICT vals, const uint frac)
+{
+ /* Calculate the phase index and factor. */
+ const uint pi{frac >> CubicPhaseBitDiff};
+ const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
+
+ const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(istate.cubic.filter + pi);
+
+ // Apply the phase interpolated filter.
+ return (filter->mCoeffs[0] + pf*filter->mDeltas[0]) * vals[0]
+ + (filter->mCoeffs[1] + pf*filter->mDeltas[1]) * vals[1]
+ + (filter->mCoeffs[2] + pf*filter->mDeltas[2]) * vals[2]
+ + (filter->mCoeffs[3] + pf*filter->mDeltas[3]) * vals[3];
+}
inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac)
{
const size_t m{istate.bsinc.m};
ASSUME(m > 0);
// Calculate the phase index and factor.
- const uint pi{frac >> FracPhaseBitDiff};
- const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
+ const uint pi{frac >> BsincPhaseBitDiff};
+ const float pf{static_cast<float>(frac & (BsincPhaseDiffOne-1)) * (1.0f/BsincPhaseDiffOne)};
const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
const float *RESTRICT phd{fil + m};
@@ -55,8 +72,8 @@ inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals,
ASSUME(m > 0);
// Calculate the phase index and factor.
- const uint pi{frac >> FracPhaseBitDiff};
- const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
+ const uint pi{frac >> BsincPhaseBitDiff};
+ const float pf{static_cast<float>(frac & (BsincPhaseDiffOne-1)) * (1.0f/BsincPhaseDiffOne)};
const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
const float *RESTRICT phd{fil + m};
diff --git a/core/mixer/mixer_neon.cpp b/core/mixer/mixer_neon.cpp
index 5a91da60..e636ac74 100644
--- a/core/mixer/mixer_neon.cpp
+++ b/core/mixer/mixer_neon.cpp
@@ -7,11 +7,13 @@
#include "alnumeric.h"
#include "core/bsinc_defs.h"
+#include "core/cubic_defs.h"
#include "defs.h"
#include "hrtfbase.h"
struct NEONTag;
struct LerpTag;
+struct CubicTag;
struct BSincTag;
struct FastBSincTag;
@@ -22,6 +24,14 @@ struct FastBSincTag;
namespace {
+constexpr uint BSincPhaseBitDiff{MixerFracBits - BSincPhaseBits};
+constexpr uint BSincPhaseDiffOne{1 << BSincPhaseBitDiff};
+constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u};
+
+constexpr uint CubicPhaseBitDiff{MixerFracBits - CubicPhaseBits};
+constexpr uint CubicPhaseDiffOne{1 << CubicPhaseBitDiff};
+constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
+
inline float32x4_t set_f4(float l0, float l1, float l2, float l3)
{
float32x4_t ret{vmovq_n_f32(l0)};
@@ -31,9 +41,6 @@ inline float32x4_t set_f4(float l0, float l1, float l2, float l3)
return ret;
}
-constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};
-constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};
-
inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs,
const float left, const float right)
{
@@ -184,6 +191,37 @@ float *Resample_<LerpTag,NEONTag>(const InterpState*, float *RESTRICT src, uint
}
template<>
+float *Resample_<CubicTag,NEONTag>(const InterpState *state, float *RESTRICT src, uint frac,
+ uint increment, const al::span<float> dst)
+{
+ const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter);
+
+ src -= 1;
+ for(float &out_sample : dst)
+ {
+ const uint pi{frac >> CubicPhaseBitDiff};
+ const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
+ const float32x4_t pf4{vdupq_n_f32(pf)};
+
+ /* Apply the phase interpolated filter. */
+
+ /* f = fil + pf*phd */
+ const float32x4_t f4 = vmlaq_f32(vld1q_f32(filter[pi].mCoeffs), pf4,
+ vld1q_f32(filter[pi].mDeltas));
+ /* r = f*src */
+ float32x4_t r4{vmulq_f32(f4, vld1q_f32(src))};
+
+ r4 = vaddq_f32(r4, vrev64q_f32(r4));
+ out_sample = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+ return dst.data();
+}
+
+template<>
float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src, uint frac,
uint increment, const al::span<float> dst)
{
@@ -196,8 +234,8 @@ float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
- const uint pi{frac >> FracPhaseBitDiff};
- const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
+ const uint pi{frac >> BSincPhaseBitDiff};
+ const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
// Apply the scale and phase interpolated filter.
float32x4_t r4{vdupq_n_f32(0.0f)};
@@ -242,8 +280,8 @@ float *Resample_<FastBSincTag,NEONTag>(const InterpState *state, float *RESTRICT
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
- const uint pi{frac >> FracPhaseBitDiff};
- const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
+ const uint pi{frac >> BSincPhaseBitDiff};
+ const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
// Apply the phase interpolated filter.
float32x4_t r4{vdupq_n_f32(0.0f)};
diff --git a/core/mixer/mixer_sse.cpp b/core/mixer/mixer_sse.cpp
index 1b0d1386..4a31a0f1 100644
--- a/core/mixer/mixer_sse.cpp
+++ b/core/mixer/mixer_sse.cpp
@@ -7,10 +7,12 @@
#include "alnumeric.h"
#include "core/bsinc_defs.h"
+#include "core/cubic_defs.h"
#include "defs.h"
#include "hrtfbase.h"
struct SSETag;
+struct CubicTag;
struct BSincTag;
struct FastBSincTag;
@@ -21,8 +23,13 @@ struct FastBSincTag;
namespace {
-constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};
-constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};
+constexpr uint BSincPhaseBitDiff{MixerFracBits - BSincPhaseBits};
+constexpr uint BSincPhaseDiffOne{1 << BSincPhaseBitDiff};
+constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u};
+
+constexpr uint CubicPhaseBitDiff{MixerFracBits - CubicPhaseBits};
+constexpr uint CubicPhaseDiffOne{1 << CubicPhaseBitDiff};
+constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z))
@@ -147,6 +154,38 @@ force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT
} // namespace
template<>
+float *Resample_<CubicTag,SSETag>(const InterpState *state, float *RESTRICT src, uint frac,
+ uint increment, const al::span<float> dst)
+{
+ const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter);
+
+ src -= 1;
+ for(float &out_sample : dst)
+ {
+ const uint pi{frac >> CubicPhaseBitDiff};
+ const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
+ const __m128 pf4{_mm_set1_ps(pf)};
+
+ /* Apply the phase interpolated filter. */
+
+ /* f = fil + pf*phd */
+ const __m128 f4 = MLA4(_mm_load_ps(filter[pi].mCoeffs), pf4,
+ _mm_load_ps(filter[pi].mDeltas));
+ /* r = f*src */
+ __m128 r4{_mm_mul_ps(f4, _mm_loadu_ps(src))};
+
+ r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
+ r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
+ out_sample = _mm_cvtss_f32(r4);
+
+ frac += increment;
+ src += frac>>MixerFracBits;
+ frac &= MixerFracMask;
+ }
+ return dst.data();
+}
+
+template<>
float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src, uint frac,
uint increment, const al::span<float> dst)
{
@@ -159,8 +198,8 @@ float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src,
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
- const uint pi{frac >> FracPhaseBitDiff};
- const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
+ const uint pi{frac >> BSincPhaseBitDiff};
+ const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
// Apply the scale and phase interpolated filter.
__m128 r4{_mm_setzero_ps()};
@@ -206,8 +245,8 @@ float *Resample_<FastBSincTag,SSETag>(const InterpState *state, float *RESTRICT
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
- const uint pi{frac >> FracPhaseBitDiff};
- const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
+ const uint pi{frac >> BSincPhaseBitDiff};
+ const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
// Apply the phase interpolated filter.
__m128 r4{_mm_setzero_ps()};