aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--core/voice.cpp142
1 files changed, 83 insertions, 59 deletions
diff --git a/core/voice.cpp b/core/voice.cpp
index 03b56317..c02210ef 100644
--- a/core/voice.cpp
+++ b/core/voice.cpp
@@ -298,7 +298,7 @@ inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src
sample = (sample^0x8000) - 32768;
index = clampi((index^0x8000) - 32768, 0, al::size(IMAStep_size)-1);
- if(!skip) [[likely]]
+ if(skip == 0)
{
dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f;
if(wrote == samplesToLoad) return;
@@ -306,38 +306,50 @@ inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src
else
--skip;
- int samples[8]{};
+ auto decode_sample = [&sample,&index](const uint nibble)
+ {
+ sample += IMA4Codeword[nibble] * IMAStep_size[index] / 8;
+ sample = clampi(sample, -32768, 32767);
+
+ index += IMA4Index_adjust[nibble];
+ index = clampi(index, 0, al::size(IMAStep_size)-1);
+
+ return sample;
+ };
+
+ /* The rest of the block is arranged as a series of nibbles, contained
+ * in 4 *bytes* per channel interleaved. So we can decode a series of 8
+ * samples at once from each of these 4 bytes.
+ *
+ * First, decode the 8 sample sets being skipped entirely (they still
+ * need to be decoded for proper state on the remaining samples).
+ */
+ const size_t startOffset{(skip&~size_t{7}) + 1};
const al::byte *nibbleData{src + (srcStep+srcChan)*4};
- for(size_t i{1};i < samplesPerBlock;i+=8)
+ for(;skip >= 8;skip-=8)
{
- /* The rest of the block is arranged as a series of nibbles, with 4
- * bytes per channel interleaved. So we can decode a series of 8
- * samples at once from these next 4 bytes.
- */
uint code{uint{nibbleData[0]} | (uint{nibbleData[1]} << 8)
| (uint{nibbleData[2]} << 16) | (uint{nibbleData[3]} << 24)};
+ nibbleData += 4*srcStep;
+
for(size_t j{0};j < 8;++j)
{
- const uint nibble{code & 0xf};
+ std::ignore = decode_sample(code & 0xf);
code >>= 4;
-
- sample += IMA4Codeword[nibble] * IMAStep_size[index] / 8;
- sample = clampi(sample, -32768, 32767);
- samples[j] = sample;
-
- index += IMA4Index_adjust[nibble];
- index = clampi(index, 0, al::size(IMAStep_size)-1);
}
+ }
+
+ int samples[8]{};
+ for(size_t i{startOffset};i < samplesPerBlock;i+=8)
+ {
+ uint code{uint{nibbleData[0]} | (uint{nibbleData[1]} << 8)
+ | (uint{nibbleData[2]} << 16) | (uint{nibbleData[3]} << 24)};
nibbleData += 4*srcStep;
- /* If we're skipping these 8 samples, go on to the next set. They
- * still need to be decoded to update the predictor state for the
- * next set.
- */
- if(skip >= 8)
+ for(size_t j{0};j < 8;++j)
{
- skip -= 8;
- continue;
+ samples[j] = decode_sample(code & 0xf);
+ code >>= 4;
}
const size_t todo{minz(8-skip, samplesToLoad-wrote)};
@@ -389,28 +401,58 @@ inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *
/* The second history sample is "older", so it's the first to be
* written out.
*/
- if(skip < 2) [[likely]]
+ if(skip == 0)
{
- if(!skip) [[likely]]
- {
- dstSamples[wrote++] = static_cast<float>(sampleHistory[1]) / 32768.0f;
- if(wrote == samplesToLoad) return;
- }
- else
- --skip;
+ dstSamples[wrote++] = static_cast<float>(sampleHistory[1]) / 32768.0f;
+ if(wrote == samplesToLoad) return;
+ dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f;
+ if(wrote == samplesToLoad) return;
+ }
+ else if(skip == 1)
+ {
+ --skip;
dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f;
if(wrote == samplesToLoad) return;
}
else
skip -= 2;
- int samples[8]{};
+ auto decode_sample = [&sampleHistory,&delta,blockpred](const int nibble)
+ {
+ int pred{(sampleHistory[0]*MSADPCMAdaptionCoeff[blockpred][0] +
+ sampleHistory[1]*MSADPCMAdaptionCoeff[blockpred][1]) / 256};
+ pred += ((nibble^0x08) - 0x08) * delta;
+ pred = clampi(pred, -32768, 32767);
+
+ sampleHistory[1] = sampleHistory[0];
+ sampleHistory[0] = pred;
+
+ delta = (MSADPCMAdaption[nibble] * delta) / 256;
+ delta = maxi(16, delta);
+
+ return pred;
+ };
+
+ /* The rest of the block is a series of nibbles, interleaved per-
+ * channel. Decode the number of samples that we need to skip in the
+ * block.
+ */
+ const size_t startOffset{skip + 2};
size_t nibbleOffset{srcChan};
- for(size_t i{2};i < samplesPerBlock;)
+ for(;skip;--skip)
{
- /* The rest of the block is a series of nibbles, interleaved per-
- * channel. Here we decode a set of (up to) 8 samples at a time to
- * write out together.
+ const size_t byteOffset{nibbleOffset>>1};
+ const size_t byteShift{((nibbleOffset&1)^1) * 4};
+ std::ignore = decode_sample((input[byteOffset]>>byteShift) & 15);
+ nibbleOffset += srcStep;
+ }
+
+ int samples[8]{};
+ for(size_t i{startOffset};i < samplesPerBlock;)
+ {
+ /* Here we decode a set of (up to) 8 samples at a time to write out
+ * together. This is more efficient than decoding each sample
+ * individually and checking for the end each time.
*/
const size_t todo{minz(samplesPerBlock-i, 8)};
@@ -418,33 +460,15 @@ inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *
{
const size_t byteOffset{nibbleOffset>>1};
const size_t byteShift{((nibbleOffset&1)^1) * 4};
- const int nibble{(input[byteOffset]>>byteShift) & 15};
+ samples[j] = decode_sample((input[byteOffset]>>byteShift) & 15);
nibbleOffset += srcStep;
-
- int pred{(sampleHistory[0]*MSADPCMAdaptionCoeff[blockpred][0] +
- sampleHistory[1]*MSADPCMAdaptionCoeff[blockpred][1]) / 256};
- pred += ((nibble^0x08) - 0x08) * delta;
- pred = clampi(pred, -32768, 32767);
-
- sampleHistory[1] = sampleHistory[0];
- sampleHistory[0] = pred;
- samples[j] = pred;
-
- delta = (MSADPCMAdaption[nibble] * delta) / 256;
- delta = maxi(16, delta);
}
- if(skip < todo) [[likely]]
- {
- const size_t towrite{minz(todo-skip, samplesToLoad-wrote)};
- for(size_t j{0};j < towrite;++j)
- dstSamples[wrote++] = static_cast<float>(samples[j+skip]) / 32768.0f;
- if(wrote == samplesToLoad)
- return;
- skip = 0;
- }
- else
- skip -= todo;
+ const size_t towrite{minz(todo, samplesToLoad-wrote)};
+ for(size_t j{0};j < towrite;++j)
+ dstSamples[wrote++] = static_cast<float>(samples[j]) / 32768.0f;
+ if(wrote == samplesToLoad)
+ return;
i += todo;
}