Partially revert "Use a different method for HRTF mixing"

The sound localization with virtual channel mixing was just too poor, so while it's more costly to do per-source HRTF mixing, it's unavoidable if you want good localization. This is only partially reverted because having the virtual channel is still beneficial, particularly with B-Format rendering and effect mixing which otherwise skip HRTF processing. As before, the number of virtual channels can potentially be customized, specifying more or less channels depending on the system's needs.
author: Chris Robinson <[email protected]> 2014-11-23 10:49:54 -0800
committer: Chris Robinson <[email protected]> 2014-11-23 10:49:54 -0800
commit: 45d6bb58a4293c5b1ab229cea86e0ef24a2a084b (patch)
tree: ec03ad6eac812ae209f8d973687afa5b99616133 /Alc/mixer_neon.c
parent: fc3608b381c0492674b4cfc1da0dcf5389ace722 (diff)
1 files changed, 32 insertions, 0 deletions
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c
index 413bd627..8ce17644 100644
--- a/Alc/mixer_neon.c
+++ b/Alc/mixer_neon.c
@@ -9,6 +9,38 @@
 #include "hrtf.h"
 
 
+static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2],
+                                   const ALuint IrSize,
+                                   ALfloat (*restrict Coeffs)[2],
+                                   const ALfloat (*restrict CoeffStep)[2],
+                                   ALfloat left, ALfloat right)
+{
+    ALuint c;
+    float32x4_t leftright4;
+    {
+        float32x2_t leftright2 = vdup_n_f32(0.0);
+        leftright2 = vset_lane_f32(left, leftright2, 0);
+        leftright2 = vset_lane_f32(right, leftright2, 1);
+        leftright4 = vcombine_f32(leftright2, leftright2);
+    }
+    for(c = 0;c < IrSize;c += 2)
+    {
+        const ALuint o0 = (Offset+c)&HRIR_MASK;
+        const ALuint o1 = (o0+1)&HRIR_MASK;
+        float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]),
+                                        vld1_f32((float32_t*)&Values[o1][0]));
+        float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]);
+        float32x4_t deltas = vld1q_f32(&CoeffStep[c][0]);
+
+        vals = vmlaq_f32(vals, coefs, leftright4);
+        coefs = vaddq_f32(coefs, deltas);
+
+        vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals));
+        vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals));
+        vst1q_f32(&Coeffs[c][0], coefs);
+    }
+}
+
 static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
                                const ALuint IrSize,
                                ALfloat (*restrict Coeffs)[2],
author	Chris Robinson <[email protected]>	2014-11-23 10:49:54 -0800
committer	Chris Robinson <[email protected]>	2014-11-23 10:49:54 -0800
commit	45d6bb58a4293c5b1ab229cea86e0ef24a2a084b (patch)
tree	ec03ad6eac812ae209f8d973687afa5b99616133 /Alc/mixer_neon.c
parent	fc3608b381c0492674b4cfc1da0dcf5389ace722 (diff)