diff options
author | Chris Robinson <[email protected]> | 2020-05-02 16:42:24 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2020-05-02 16:42:24 -0700 |
commit | 6823fa2de211e57bca9fb73b90ae04b7edc5f484 (patch) | |
tree | c974195fc11df38aaf4b759cbbca25a5aa765da5 /alc/mixer/mixer_neon.cpp | |
parent | 3898b77dd6b3d0bd9fad6b4f65f15b3c5b44c39b (diff) |
Improve the leftover and realignment mixing loops
Using a mask helps the compiler recognize that the leftover (any remaining non-
multiple-of-4) and realignment loops will only have 3 iterations at most, which
it can unroll or otherwise make more meaningful optimizations for. Previously
it would try to vectorize and partially unroll the loops, which is wasteful
when there would never be enough to vectorize.
Diffstat (limited to 'alc/mixer/mixer_neon.cpp')
-rw-r--r-- | alc/mixer/mixer_neon.cpp | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp index 6f58a72b..610ea25c 100644 --- a/alc/mixer/mixer_neon.cpp +++ b/alc/mixer/mixer_neon.cpp @@ -219,7 +219,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu { const float delta{(Counter > 0) ? 1.0f / static_cast<float>(Counter) : 0.0f}; const auto min_len = minz(Counter, InSamples.size()); - const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()); + const auto aligned_len = minz((min_len+3) & ~size_t{3}, InSamples.size()) - min_len; for(FloatBufferLine &output : OutBuffer) { float *RESTRICT dst{al::assume_aligned<16>(output.data()+OutPos)}; @@ -258,7 +258,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu step_count = vgetq_lane_f32(step_count4, 0); } /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ - for(;pos != min_len;++pos) + for(uint_fast32_t leftover{min_len&3};leftover;++pos,--leftover) { dst[pos] += InSamples[pos] * (gain + step*step_count); step_count += 1.0f; @@ -269,7 +269,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu gain += step*step_count; /* Mix until pos is aligned with 4 or the mix is done. */ - for(;pos != aligned_len;++pos) + for(uint_fast32_t leftover{aligned_len&3};leftover;++pos,--leftover) dst[pos] += InSamples[pos] * gain; } *CurrentGains = gain; @@ -289,7 +289,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu pos += 4; } while(--todo); } - for(;pos != InSamples.size();++pos) + for(uint_fast32_t leftover{InSamples.size()&3};leftover;++pos,--leftover) dst[pos] += InSamples[pos] * gain; } } |