From 6823fa2de211e57bca9fb73b90ae04b7edc5f484 Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Sat, 2 May 2020 16:42:24 -0700 Subject: Improve the leftover and realignment mixing loops Using a mask helps the compiler recognize that the leftover (any remaining non- multiple-of-4) and realignment loops will only have 3 iterations at most, which it can unroll or otherwise make more meaningful optimizations for. Previously it would try to vectorize and partially unroll the loops, which is wasteful when there would never be enough to vectorize. --- alc/mixer/mixer_neon.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'alc/mixer/mixer_neon.cpp') diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp index 6f58a72b..610ea25c 100644 --- a/alc/mixer/mixer_neon.cpp +++ b/alc/mixer/mixer_neon.cpp @@ -219,7 +219,7 @@ void Mix_(const al::span InSamples, const al::span 0) ? 1.0f / static_cast(Counter) : 0.0f}; const auto min_len = minz(Counter, InSamples.size()); - const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()); + const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len; for(FloatBufferLine &output : OutBuffer) { float *RESTRICT dst{al::assume_aligned<16>(output.data()+OutPos)}; @@ -258,7 +258,7 @@ void Mix_(const al::span InSamples, const al::span(const al::span InSamples, const al::span(const al::span InSamples, const al::span