aboutsummaryrefslogtreecommitdiffstats
path: root/alc/mixer/mixer_neon.cpp
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2020-05-02 16:42:24 -0700
committerChris Robinson <[email protected]>2020-05-02 16:42:24 -0700
commit6823fa2de211e57bca9fb73b90ae04b7edc5f484 (patch)
treec974195fc11df38aaf4b759cbbca25a5aa765da5 /alc/mixer/mixer_neon.cpp
parent3898b77dd6b3d0bd9fad6b4f65f15b3c5b44c39b (diff)
Improve the leftover and realignment mixing loops
Using a mask helps the compiler recognize that the leftover (any remaining non- multiple-of-4) and realignment loops will only have 3 iterations at most, which it can unroll or otherwise make more meaningful optimizations for. Previously it would try to vectorize and partially unroll the loops, which is wasteful when there would never be enough to vectorize.
Diffstat (limited to 'alc/mixer/mixer_neon.cpp')
-rw-r--r--alc/mixer/mixer_neon.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp
index 6f58a72b..610ea25c 100644
--- a/alc/mixer/mixer_neon.cpp
+++ b/alc/mixer/mixer_neon.cpp
@@ -219,7 +219,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu
{
const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f};
const auto min_len = minz(Counter, InSamples.size());
- const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size());
+ const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len;
for(FloatBufferLine &output : OutBuffer)
{
float *RESTRICT dst{al::assume_aligned<16>(output.data()+OutPos)};
@@ -258,7 +258,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu
step_count = vgetq_lane_f32(step_count4, 0);
}
/* Mix with applying left over gain steps that aren't aligned multiples of 4. */
- for(;pos != min_len;++pos)
+ for(uint_fast32_t leftover{min_len&3};leftover;++pos,--leftover)
{
dst[pos] += InSamples[pos] * (gain + step*step_count);
step_count += 1.0f;
@@ -269,7 +269,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu
gain += step*step_count;
/* Mix until pos is aligned with 4 or the mix is done. */
- for(;pos != aligned_len;++pos)
+ for(uint_fast32_t leftover{aligned_len&3};leftover;++pos,--leftover)
dst[pos] += InSamples[pos] * gain;
}
*CurrentGains = gain;
@@ -289,7 +289,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu
pos += 4;
} while(--todo);
}
- for(;pos != InSamples.size();++pos)
+ for(uint_fast32_t leftover{InSamples.size()&3};leftover;++pos,--leftover)
dst[pos] += InSamples[pos] * gain;
}
}