diff options
author | Chris Robinson <[email protected]> | 2014-11-23 10:49:54 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2014-11-23 10:49:54 -0800 |
commit | 45d6bb58a4293c5b1ab229cea86e0ef24a2a084b (patch) | |
tree | ec03ad6eac812ae209f8d973687afa5b99616133 /Alc/mixer_neon.c | |
parent | fc3608b381c0492674b4cfc1da0dcf5389ace722 (diff) |
Partially revert "Use a different method for HRTF mixing"
The sound localization with virtual channel mixing was just too poor, so while
it's more costly to do per-source HRTF mixing, it's unavoidable if you want
good localization.
This is only partially reverted because having the virtual channel is still
beneficial, particularly with B-Format rendering and effect mixing which
otherwise skip HRTF processing. As before, the number of virtual channels can
potentially be customized, specifying more or less channels depending on the
system's needs.
Diffstat (limited to 'Alc/mixer_neon.c')
-rw-r--r-- | Alc/mixer_neon.c | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 413bd627..8ce17644 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -9,6 +9,38 @@ #include "hrtf.h" +static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2], + const ALuint IrSize, + ALfloat (*restrict Coeffs)[2], + const ALfloat (*restrict CoeffStep)[2], + ALfloat left, ALfloat right) +{ + ALuint c; + float32x4_t leftright4; + { + float32x2_t leftright2 = vdup_n_f32(0.0); + leftright2 = vset_lane_f32(left, leftright2, 0); + leftright2 = vset_lane_f32(right, leftright2, 1); + leftright4 = vcombine_f32(leftright2, leftright2); + } + for(c = 0;c < IrSize;c += 2) + { + const ALuint o0 = (Offset+c)&HRIR_MASK; + const ALuint o1 = (o0+1)&HRIR_MASK; + float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]), + vld1_f32((float32_t*)&Values[o1][0])); + float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]); + float32x4_t deltas = vld1q_f32(&CoeffStep[c][0]); + + vals = vmlaq_f32(vals, coefs, leftright4); + coefs = vaddq_f32(coefs, deltas); + + vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals)); + vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals)); + vst1q_f32(&Coeffs[c][0], coefs); + } +} + static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2], const ALuint IrSize, ALfloat (*restrict Coeffs)[2], |