aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--common/phase_shifter.h24
1 files changed, 3 insertions, 21 deletions
diff --git a/common/phase_shifter.h b/common/phase_shifter.h
index 6b0ad512..b9c889c2 100644
--- a/common/phase_shifter.h
+++ b/common/phase_shifter.h
@@ -75,25 +75,6 @@ struct PhaseShifterT {
private:
#if defined(HAVE_NEON)
- /* There doesn't seem to be NEON intrinsics to do this kind of stipple
- * shuffling, so there's two custom methods for it.
- */
- static auto shuffle_2020(float32x4_t a, float32x4_t b)
- {
- float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 0))};
- ret = vsetq_lane_f32(vgetq_lane_f32(a, 2), ret, 1);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 0), ret, 2);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 2), ret, 3);
- return ret;
- }
- static auto shuffle_3131(float32x4_t a, float32x4_t b)
- {
- float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 1))};
- ret = vsetq_lane_f32(vgetq_lane_f32(a, 3), ret, 1);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 1), ret, 2);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 3), ret, 3);
- return ret;
- }
static auto unpacklo(float32x4_t a, float32x4_t b)
{
float32x2x2_t result{vzip_f32(vget_low_f32(a), vget_low_f32(b))};
@@ -174,9 +155,10 @@ inline void PhaseShifterT<S>::process(al::span<float> dst, const float *RESTRICT
const float32x4_t coeffs{vld1q_f32(&mCoeffs[j])};
const float32x4_t s0{vld1q_f32(&src[j*2])};
const float32x4_t s1{vld1q_f32(&src[j*2 + 4])};
+ const float32x4x2_t values{vuzpq_f32(s0, s1)};
- r04 = vmlaq_f32(r04, shuffle_2020(s0, s1), coeffs);
- r14 = vmlaq_f32(r14, shuffle_3131(s0, s1), coeffs);
+ r04 = vmlaq_f32(r04, values.val[0], coeffs);
+ r14 = vmlaq_f32(r14, values.val[1], coeffs);
}
src += 2;