diff options
-rw-r--r-- | core/voice.cpp | 142 |
1 files changed, 83 insertions, 59 deletions
diff --git a/core/voice.cpp b/core/voice.cpp index 03b56317..c02210ef 100644 --- a/core/voice.cpp +++ b/core/voice.cpp @@ -298,7 +298,7 @@ inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src sample = (sample^0x8000) - 32768; index = clampi((index^0x8000) - 32768, 0, al::size(IMAStep_size)-1); - if(!skip) [[likely]] + if(skip == 0) { dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f; if(wrote == samplesToLoad) return; @@ -306,38 +306,50 @@ inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src else --skip; - int samples[8]{}; + auto decode_sample = [&sample,&index](const uint nibble) + { + sample += IMA4Codeword[nibble] * IMAStep_size[index] / 8; + sample = clampi(sample, -32768, 32767); + + index += IMA4Index_adjust[nibble]; + index = clampi(index, 0, al::size(IMAStep_size)-1); + + return sample; + }; + + /* The rest of the block is arranged as a series of nibbles, contained + * in 4 *bytes* per channel interleaved. So we can decode a series of 8 + * samples at once from each of these 4 bytes. + * + * First, decode the 8 sample sets being skipped entirely (they still + * need to be decoded for proper state on the remaining samples). + */ + const size_t startOffset{(skip&~size_t{7}) + 1}; const al::byte *nibbleData{src + (srcStep+srcChan)*4}; - for(size_t i{1};i < samplesPerBlock;i+=8) + for(;skip >= 8;skip-=8) { - /* The rest of the block is arranged as a series of nibbles, with 4 - * bytes per channel interleaved. So we can decode a series of 8 - * samples at once from these next 4 bytes. - */ uint code{uint{nibbleData[0]} | (uint{nibbleData[1]} << 8) | (uint{nibbleData[2]} << 16) | (uint{nibbleData[3]} << 24)}; + nibbleData += 4*srcStep; + for(size_t j{0};j < 8;++j) { - const uint nibble{code & 0xf}; + std::ignore = decode_sample(code & 0xf); code >>= 4; - - sample += IMA4Codeword[nibble] * IMAStep_size[index] / 8; - sample = clampi(sample, -32768, 32767); - samples[j] = sample; - - index += IMA4Index_adjust[nibble]; - index = clampi(index, 0, al::size(IMAStep_size)-1); } + } + + int samples[8]{}; + for(size_t i{startOffset};i < samplesPerBlock;i+=8) + { + uint code{uint{nibbleData[0]} | (uint{nibbleData[1]} << 8) + | (uint{nibbleData[2]} << 16) | (uint{nibbleData[3]} << 24)}; nibbleData += 4*srcStep; - /* If we're skipping these 8 samples, go on to the next set. They - * still need to be decoded to update the predictor state for the - * next set. - */ - if(skip >= 8) + for(size_t j{0};j < 8;++j) { - skip -= 8; - continue; + samples[j] = decode_sample(code & 0xf); + code >>= 4; } const size_t todo{minz(8-skip, samplesToLoad-wrote)}; @@ -389,28 +401,58 @@ inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte * /* The second history sample is "older", so it's the first to be * written out. */ - if(skip < 2) [[likely]] + if(skip == 0) { - if(!skip) [[likely]] - { - dstSamples[wrote++] = static_cast<float>(sampleHistory[1]) / 32768.0f; - if(wrote == samplesToLoad) return; - } - else - --skip; + dstSamples[wrote++] = static_cast<float>(sampleHistory[1]) / 32768.0f; + if(wrote == samplesToLoad) return; + dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f; + if(wrote == samplesToLoad) return; + } + else if(skip == 1) + { + --skip; dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f; if(wrote == samplesToLoad) return; } else skip -= 2; - int samples[8]{}; + auto decode_sample = [&sampleHistory,&delta,blockpred](const int nibble) + { + int pred{(sampleHistory[0]*MSADPCMAdaptionCoeff[blockpred][0] + + sampleHistory[1]*MSADPCMAdaptionCoeff[blockpred][1]) / 256}; + pred += ((nibble^0x08) - 0x08) * delta; + pred = clampi(pred, -32768, 32767); + + sampleHistory[1] = sampleHistory[0]; + sampleHistory[0] = pred; + + delta = (MSADPCMAdaption[nibble] * delta) / 256; + delta = maxi(16, delta); + + return pred; + }; + + /* The rest of the block is a series of nibbles, interleaved per- + * channel. Decode the number of samples that we need to skip in the + * block. + */ + const size_t startOffset{skip + 2}; size_t nibbleOffset{srcChan}; - for(size_t i{2};i < samplesPerBlock;) + for(;skip;--skip) { - /* The rest of the block is a series of nibbles, interleaved per- - * channel. Here we decode a set of (up to) 8 samples at a time to - * write out together. + const size_t byteOffset{nibbleOffset>>1}; + const size_t byteShift{((nibbleOffset&1)^1) * 4}; + std::ignore = decode_sample((input[byteOffset]>>byteShift) & 15); + nibbleOffset += srcStep; + } + + int samples[8]{}; + for(size_t i{startOffset};i < samplesPerBlock;) + { + /* Here we decode a set of (up to) 8 samples at a time to write out + * together. This is more efficient than decoding each sample + * individually and checking for the end each time. */ const size_t todo{minz(samplesPerBlock-i, 8)}; @@ -418,33 +460,15 @@ inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte * { const size_t byteOffset{nibbleOffset>>1}; const size_t byteShift{((nibbleOffset&1)^1) * 4}; - const int nibble{(input[byteOffset]>>byteShift) & 15}; + samples[j] = decode_sample((input[byteOffset]>>byteShift) & 15); nibbleOffset += srcStep; - - int pred{(sampleHistory[0]*MSADPCMAdaptionCoeff[blockpred][0] + - sampleHistory[1]*MSADPCMAdaptionCoeff[blockpred][1]) / 256}; - pred += ((nibble^0x08) - 0x08) * delta; - pred = clampi(pred, -32768, 32767); - - sampleHistory[1] = sampleHistory[0]; - sampleHistory[0] = pred; - samples[j] = pred; - - delta = (MSADPCMAdaption[nibble] * delta) / 256; - delta = maxi(16, delta); } - if(skip < todo) [[likely]] - { - const size_t towrite{minz(todo-skip, samplesToLoad-wrote)}; - for(size_t j{0};j < towrite;++j) - dstSamples[wrote++] = static_cast<float>(samples[j+skip]) / 32768.0f; - if(wrote == samplesToLoad) - return; - skip = 0; - } - else - skip -= todo; + const size_t towrite{minz(todo, samplesToLoad-wrote)}; + for(size_t j{0};j < towrite;++j) + dstSamples[wrote++] = static_cast<float>(samples[j]) / 32768.0f; + if(wrote == samplesToLoad) + return; i += todo; } |