aboutsummaryrefslogtreecommitdiffstats
path: root/common/phase_shifter.h
diff options
context:
space:
mode:
authorSven Gothel <[email protected]>2023-11-28 12:51:46 +0100
committerSven Gothel <[email protected]>2023-11-28 12:51:46 +0100
commit1aaf4f070011490bcece50394b9b32dfa593fd9e (patch)
tree17d68284e401a35eea3d3a574d986d446a60763a /common/phase_shifter.h
parent6e7cee4fa9a8af03f28ca26cd89f8357390dfc90 (diff)
parent571b546f35eead77ce109f8d4dd6c3de3199d573 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'common/phase_shifter.h')
-rw-r--r--common/phase_shifter.h39
1 files changed, 14 insertions, 25 deletions
diff --git a/common/phase_shifter.h b/common/phase_shifter.h
index 0d4166bc..e1a83dab 100644
--- a/common/phase_shifter.h
+++ b/common/phase_shifter.h
@@ -9,11 +9,14 @@
#include <array>
#include <stddef.h>
+#include <type_traits>
#include "alcomplex.h"
#include "alspan.h"
+struct NoInit { };
+
/* Implements a wide-band +90 degree phase-shift. Note that this should be
* given one sample less of a delay (FilterSize/2 - 1) compared to the direct
* signal delay (FilterSize/2) to properly align.
@@ -53,14 +56,16 @@ struct PhaseShifterT {
std::fill_n(fftBuffer.get(), fft_size, complex_d{});
fftBuffer[half_size] = 1.0;
- forward_fft(al::as_span(fftBuffer.get(), fft_size));
- for(size_t i{0};i < half_size+1;++i)
+ forward_fft(al::span{fftBuffer.get(), fft_size});
+ fftBuffer[0] *= std::numeric_limits<double>::epsilon();
+ for(size_t i{1};i < half_size;++i)
fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()};
+ fftBuffer[half_size] *= std::numeric_limits<double>::epsilon();
for(size_t i{half_size+1};i < fft_size;++i)
fftBuffer[i] = std::conj(fftBuffer[fft_size - i]);
- inverse_fft(al::as_span(fftBuffer.get(), fft_size));
+ inverse_fft(al::span{fftBuffer.get(), fft_size});
- auto fftiter = fftBuffer.get() + half_size + (FilterSize/2 - 1);
+ auto fftiter = fftBuffer.get() + fft_size - 1;
for(float &coeff : mCoeffs)
{
coeff = static_cast<float>(fftiter->real() / double{fft_size});
@@ -68,29 +73,12 @@ struct PhaseShifterT {
}
}
+ PhaseShifterT(NoInit) { }
+
void process(al::span<float> dst, const float *RESTRICT src) const;
private:
#if defined(HAVE_NEON)
- /* There doesn't seem to be NEON intrinsics to do this kind of stipple
- * shuffling, so there's two custom methods for it.
- */
- static auto shuffle_2020(float32x4_t a, float32x4_t b)
- {
- float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 0))};
- ret = vsetq_lane_f32(vgetq_lane_f32(a, 2), ret, 1);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 0), ret, 2);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 2), ret, 3);
- return ret;
- }
- static auto shuffle_3131(float32x4_t a, float32x4_t b)
- {
- float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 1))};
- ret = vsetq_lane_f32(vgetq_lane_f32(a, 3), ret, 1);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 1), ret, 2);
- ret = vsetq_lane_f32(vgetq_lane_f32(b, 3), ret, 3);
- return ret;
- }
static auto unpacklo(float32x4_t a, float32x4_t b)
{
float32x2x2_t result{vzip_f32(vget_low_f32(a), vget_low_f32(b))};
@@ -171,9 +159,10 @@ inline void PhaseShifterT<S>::process(al::span<float> dst, const float *RESTRICT
const float32x4_t coeffs{vld1q_f32(&mCoeffs[j])};
const float32x4_t s0{vld1q_f32(&src[j*2])};
const float32x4_t s1{vld1q_f32(&src[j*2 + 4])};
+ const float32x4x2_t values{vuzpq_f32(s0, s1)};
- r04 = vmlaq_f32(r04, shuffle_2020(s0, s1), coeffs);
- r14 = vmlaq_f32(r14, shuffle_3131(s0, s1), coeffs);
+ r04 = vmlaq_f32(r04, values.val[0], coeffs);
+ r14 = vmlaq_f32(r14, values.val[1], coeffs);
}
src += 2;