/* * 2-channel UHJ Decoder * * Copyright (c) Chris Robinson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include "config.h" #ifdef HAVE_SSE_INTRINSICS #include #elif defined(HAVE_NEON) #include #endif #include #include #include #include #include #include #include #include #include "albit.h" #include "albyte.h" #include "alcomplex.h" #include "almalloc.h" #include "alspan.h" #include "vector.h" #include "opthelpers.h" #include "sndfile.h" #include "win_main_utf8.h" struct FileDeleter { void operator()(FILE *file) { fclose(file); } }; using FilePtr = std::unique_ptr; struct SndFileDeleter { void operator()(SNDFILE *sndfile) { sf_close(sndfile); } }; using SndFilePtr = std::unique_ptr; using ubyte = unsigned char; using ushort = unsigned short; using uint = unsigned int; using complex_d = std::complex; using byte4 = std::array; constexpr ubyte SUBTYPE_BFORMAT_FLOAT[]{ 0x03, 0x00, 0x00, 0x00, 0x21, 0x07, 0xd3, 0x11, 0x86, 0x44, 0xc8, 0xc1, 0xca, 0x00, 0x00, 0x00 }; void fwrite16le(ushort val, FILE *f) { ubyte data[2]{ static_cast(val&0xff), static_cast((val>>8)&0xff) }; fwrite(data, 1, 2, f); } void fwrite32le(uint val, FILE *f) { ubyte data[4]{ static_cast(val&0xff), static_cast((val>>8)&0xff), static_cast((val>>16)&0xff), static_cast((val>>24)&0xff) }; fwrite(data, 1, 4, f); } template byte4 f32AsLEBytes(const float &value) = delete; template<> byte4 f32AsLEBytes(const float &value) { byte4 ret{}; std::memcpy(ret.data(), &value, 4); return ret; } template<> byte4 f32AsLEBytes(const float &value) { byte4 ret{}; std::memcpy(ret.data(), &value, 4); std::swap(ret[0], ret[3]); std::swap(ret[1], ret[2]); return ret; } constexpr uint BufferLineSize{1024}; using FloatBufferLine = std::array; using FloatBufferSpan = al::span; struct UhjDecoder { constexpr static size_t sFilterSize{128}; alignas(16) std::array mS{}; alignas(16) std::array mD{}; alignas(16) std::array mT{}; alignas(16) std::array mQ{}; /* History for the FIR filter. */ alignas(16) std::array mDTHistory{}; alignas(16) std::array mSHistory{}; alignas(16) std::array mTemp{}; void decode(const float *RESTRICT InSamples, const al::span OutSamples, const size_t SamplesToDo); void decode2(const float *RESTRICT InSamples, const al::span OutSamples, const size_t SamplesToDo); DEF_NEWDEL(UhjDecoder) }; /* Same basic filter design as in core/uhjfilter.cpp. */ template struct PhaseShifterT { static_assert((FilterSize&(FilterSize-1)) == 0, "FilterSize needs to be power-of-two"); alignas(16) std::array Coeffs{}; PhaseShifterT() { constexpr size_t fft_size{FilterSize * 2}; constexpr size_t half_size{fft_size / 2}; auto fftBuffer = std::make_unique(fft_size); std::fill_n(fftBuffer.get(), fft_size, complex_d{}); fftBuffer[half_size] = 1.0; forward_fft({fftBuffer.get(), fft_size}); for(size_t i{0};i < half_size+1;++i) fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()}; for(size_t i{half_size+1};i < fft_size;++i) fftBuffer[i] = std::conj(fftBuffer[fft_size - i]); inverse_fft({fftBuffer.get(), fft_size}); auto fftiter = fftBuffer.get() + half_size + (FilterSize-1); for(float &coeff : Coeffs) { coeff = static_cast(fftiter->real() / double{fft_size}); fftiter -= 2; } } }; const PhaseShifterT PShift{}; /* Mostly the same as in core/uhjfilter.cpp, except this overwrites the output * instead of adding to it. */ void allpass_process(al::span dst, const float *RESTRICT src) { #ifdef HAVE_SSE_INTRINSICS if(size_t todo{dst.size()>>1}) { auto *out = reinterpret_cast<__m64*>(dst.data()); do { __m128 r04{_mm_setzero_ps()}; __m128 r14{_mm_setzero_ps()}; for(size_t j{0};j < PShift.Coeffs.size();j+=4) { const __m128 coeffs{_mm_load_ps(&PShift.Coeffs[j])}; const __m128 s0{_mm_loadu_ps(&src[j*2])}; const __m128 s1{_mm_loadu_ps(&src[j*2 + 4])}; __m128 s{_mm_shuffle_ps(s0, s1, _MM_SHUFFLE(2, 0, 2, 0))}; r04 = _mm_add_ps(r04, _mm_mul_ps(s, coeffs)); s = _mm_shuffle_ps(s0, s1, _MM_SHUFFLE(3, 1, 3, 1)); r14 = _mm_add_ps(r14, _mm_mul_ps(s, coeffs)); } src += 2; __m128 r4{_mm_add_ps(_mm_unpackhi_ps(r04, r14), _mm_unpacklo_ps(r04, r14))}; r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); _mm_storel_pi(out, r4); ++out; } while(--todo); } if((dst.size()&1)) { __m128 r4{_mm_setzero_ps()}; for(size_t j{0};j < PShift.Coeffs.size();j+=4) { const __m128 coeffs{_mm_load_ps(&PShift.Coeffs[j])}; const __m128 s{_mm_setr_ps(src[j*2], src[j*2 + 2], src[j*2 + 4], src[j*2 + 6])}; r4 = _mm_add_ps(r4, _mm_mul_ps(s, coeffs)); } r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3))); r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4)); dst.back() = _mm_cvtss_f32(r4); } #elif defined(HAVE_NEON) size_t pos{0}; if(size_t todo{dst.size()>>1}) { auto shuffle_2020 = [](float32x4_t a, float32x4_t b) { float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 0))}; ret = vsetq_lane_f32(vgetq_lane_f32(a, 2), ret, 1); ret = vsetq_lane_f32(vgetq_lane_f32(b, 0), ret, 2); ret = vsetq_lane_f32(vgetq_lane_f32(b, 2), ret, 3); return ret; }; auto shuffle_3131 = [](float32x4_t a, float32x4_t b) { float32x4_t ret{vmovq_n_f32(vgetq_lane_f32(a, 1))}; ret = vsetq_lane_f32(vgetq_lane_f32(a, 3), ret, 1); ret = vsetq_lane_f32(vgetq_lane_f32(b, 1), ret, 2); ret = vsetq_lane_f32(vgetq_lane_f32(b, 3), ret, 3); return ret; }; auto unpacklo = [](float32x4_t a, float32x4_t b) { float32x2x2_t result{vzip_f32(vget_low_f32(a), vget_low_f32(b))}; return vcombine_f32(result.val[0], result.val[1]); }; auto unpackhi = [](float32x4_t a, float32x4_t b) { float32x2x2_t result{vzip_f32(vget_high_f32(a), vget_high_f32(b))}; return vcombine_f32(result.val[0], result.val[1]); }; do { float32x4_t r04{vdupq_n_f32(0.0f)}; float32x4_t r14{vdupq_n_f32(0.0f)}; for(size_t j{0};j < PShift.Coeffs.size();j+=4) { const float32x4_t coeffs{vld1q_f32(&PShift.Coeffs[j])}; const float32x4_t s0{vld1q_f32(&src[j*2])}; const float32x4_t s1{vld1q_f32(&src[j*2 + 4])}; r04 = vmlaq_f32(r04, shuffle_2020(s0, s1), coeffs); r14 = vmlaq_f32(r14, shuffle_3131(s0, s1), coeffs); } src += 2; float32x4_t r4{vaddq_f32(unpackhi(r04, r14), unpacklo(r04, r14))}; float32x2_t r2{vadd_f32(vget_low_f32(r4), vget_high_f32(r4))}; vst1_f32(&dst[pos], r2); pos += 2; } while(--todo); } if((dst.size()&1)) { auto load4 = [](float32_t a, float32_t b, float32_t c, float32_t d) { float32x4_t ret{vmovq_n_f32(a)}; ret = vsetq_lane_f32(b, ret, 1); ret = vsetq_lane_f32(c, ret, 2); ret = vsetq_lane_f32(d, ret, 3); return ret; }; float32x4_t r4{vdupq_n_f32(0.0f)}; for(size_t j{0};j < PShift.Coeffs.size();j+=4) { const float32x4_t coeffs{vld1q_f32(&PShift.Coeffs[j])}; const float32x4_t s{load4(src[j*2], src[j*2 + 2], src[j*2 + 4], src[j*2 + 6])}; r4 = vmlaq_f32(r4, s, coeffs); } r4 = vaddq_f32(r4, vrev64q_f32(r4)); dst[pos] = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0); } #else for(float &output : dst) { float ret{0.0f}; for(size_t j{0};j < PShift.Coeffs.size();++j) ret += src[j*2] * PShift.Coeffs[j]; output = ret; ++src; } #endif } /* Decoding 3- and 4-channel UHJ is done as: * * S = Left + Right * D = Left - Right * * W = 0.981530*S + 0.197484*j(0.828347*D + 0.767835*T) * X = 0.418504*S - j(0.828347*D + 0.767835*T) * Y = 0.795954*D - 0.676406*T + j(0.186626*S) * Z = 1.023332*Q * * where j is a +90 degree phase shift. 3-channel UHJ excludes Q/Z. * * NOTE: Some souces specify * * S = (Left + Right)/2 * D = (Left - Right)/2 * * However, this is incorrect. It's halving Left and Right even though they * were already halved during encoding, causing S and D to be half what they * initially were at the encoding stage. This division is not present in * Gerzon's original paper for deriving Sigma (S) or Delta (D) from the L and R * signals. As proof, taking Y for example: * * Y = 0.795954*D - 0.676406*T + j(0.186626*S) * * * Plug in the encoding parameters, using ? as a placeholder for whether S * and D should receive an extra 0.5 factor * Y = 0.795954*(j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y)*? - * 0.676406*(j(-0.1432*W + 0.6511746*X) - 0.7071068*Y) + * 0.186626*j(0.9396926*W + 0.1855740*X)*? * * * Move common factors in * Y = (j(-0.3420201*0.795954*?*W + 0.5098604*0.795954*?*X) + 0.6554516*0.795954*?*Y) - * (j(-0.1432*0.676406*W + 0.6511746*0.676406*X) - 0.7071068*0.676406*Y) + * j(0.9396926*0.186626*?*W + 0.1855740*0.186626*?*X) * * * Clean up extraneous groupings * Y = j(-0.3420201*0.795954*?*W + 0.5098604*0.795954*?*X) + 0.6554516*0.795954*?*Y - * j(-0.1432*0.676406*W + 0.6511746*0.676406*X) + 0.7071068*0.676406*Y + * j*(0.9396926*0.186626*?*W + 0.1855740*0.186626*?*X) * * * Move phase shifts together and combine them * Y = j(-0.3420201*0.795954*?*W + 0.5098604*0.795954*?*X - -0.1432*0.676406*W - * 0.6511746*0.676406*X + 0.9396926*0.186626*?*W + 0.1855740*0.186626*?*X) + * 0.6554516*0.795954*?*Y + 0.7071068*0.676406*Y * * * Reorder terms * Y = j(-0.3420201*0.795954*?*W + 0.1432*0.676406*W + 0.9396926*0.186626*?*W + * 0.5098604*0.795954*?*X + -0.6511746*0.676406*X + 0.1855740*0.186626*?*X) + * 0.7071068*0.676406*Y + 0.6554516*0.795954*?*Y * * * Move common factors out * Y = j((-0.3420201*0.795954*? + 0.1432*0.676406 + 0.9396926*0.186626*?)*W + * ( 0.5098604*0.795954*? + -0.6511746*0.676406 + 0.1855740*0.186626*?)*X) + * (0.7071068*0.676406 + 0.6554516*0.795954*?)*Y * * * Result w/ 0.5 factor: * -0.3420201*0.795954*0.5 + 0.1432*0.676406 + 0.9396926*0.186626*0.5 = 0.04843*W * 0.5098604*0.795954*0.5 + -0.6511746*0.676406 + 0.1855740*0.186626*0.5 = -0.22023*X * 0.7071068*0.676406 + 0.6554516*0.795954*0.5 = 0.73915*Y * -> Y = j(0.04843*W + -0.22023*X) + 0.73915*Y * * * Result w/o 0.5 factor: * -0.3420201*0.795954 + 0.1432*0.676406 + 0.9396926*0.186626 = 0.00000*W * 0.5098604*0.795954 + -0.6511746*0.676406 + 0.1855740*0.186626 = 0.00000*X * 0.7071068*0.676406 + 0.6554516*0.795954 = 1.00000*Y * -> Y = j(0.00000*W + 0.00000*X) + 1.00000*Y * * Not halving produces a result matching the original input. */ void UhjDecoder::decode(const float *RESTRICT InSamples, const al::span OutSamples, const size_t SamplesToDo) { ASSUME(SamplesToDo > 0); const size_t Channels{OutSamples.size()}; float *woutput{OutSamples[0].data()}; float *xoutput{OutSamples[1].data()}; float *youtput{OutSamples[2].data()}; /* Add a delay to the input channels, to align it with the all-passed * signal. */ /* S = Left + Right */ for(size_t i{0};i < SamplesToDo;++i) mS[sFilterSize+i] = InSamples[i*Channels + 0] + InSamples[i*Channels + 1]; /* D = Left - Right */ for(size_t i{0};i < SamplesToDo;++i) mD[sFilterSize+i] = InSamples[i*Channels + 0] - InSamples[i*Channels + 1]; /* T */ for(size_t i{0};i < SamplesToDo;++i) mT[sFilterSize+i] = InSamples[i*Channels + 2]; if(Channels > 3) { /* Q */ for(size_t i{0};i < SamplesToDo;++i) mQ[sFilterSize+i] = InSamples[i*Channels + 3]; } /* Precompute j(0.828347*D + 0.767835*T) and store in xoutput. */ auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin()); std::transform(mD.cbegin(), mD.cbegin()+SamplesToDo+sFilterSize, mT.cbegin(), tmpiter, [](const float D, const float T) noexcept { return 0.828347f*D + 0.767835f*T; }); std::copy_n(mTemp.cbegin()+SamplesToDo, mDTHistory.size(), mDTHistory.begin()); allpass_process({xoutput, SamplesToDo}, mTemp.data()); for(size_t i{0};i < SamplesToDo;++i) { /* W = 0.981530*S + 0.197484*j(0.828347*D + 0.767835*T) */ woutput[i] = 0.981530f*mS[i] + 0.197484f*xoutput[i]; /* 0.418504*S - j(0.828347*D + 0.767835*T) */ xoutput[i] = 0.418504f*mS[i] - xoutput[i]; } /* Precompute j*S and store in youtput. */ tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin()); std::copy_n(mS.cbegin(), SamplesToDo+sFilterSize, tmpiter); std::copy_n(mTemp.cbegin()+SamplesToDo, mSHistory.size(), mSHistory.begin()); allpass_process({youtput, SamplesToDo}, mTemp.data()); for(size_t i{0};i < SamplesToDo;++i) { /* Y = 0.795954*D - 0.676406*T + j(0.186626*S) */ youtput[i] = 0.795954f*mD[i] - 0.676406f*mT[i] + 0.186626f*youtput[i]; } if(Channels > 3) { float *zoutput{OutSamples[3].data()}; /* Z = 1.023332*Q */ for(size_t i{0};i < SamplesToDo;++i) zoutput[i] = 1.023332f*mQ[i]; } std::copy(mS.begin()+SamplesToDo, mS.begin()+SamplesToDo+sFilterSize, mS.begin()); std::copy(mD.begin()+SamplesToDo, mD.begin()+SamplesToDo+sFilterSize, mD.begin()); std::copy(mT.begin()+SamplesToDo, mT.begin()+SamplesToDo+sFilterSize, mT.begin()); std::copy(mQ.begin()+SamplesToDo, mQ.begin()+SamplesToDo+sFilterSize, mQ.begin()); } /* There is a difference with decoding 2-channel UHJ compared to 3-channel, due * to 2-channel having lost some of the original signal. The B-Format signal * reconstructed from 2-channel UHJ should not be run through a normal B-Format * decoder, as it needs different shelf filters. * * 2-channel UHJ decoding is done as: * * S = Left + Right * D = Left - Right * * W = 0.981530*S + j*0.163585*D * X = 0.418504*S - j*0.828347*D * Y = 0.762956*D + j*0.384230*S * * where j is a +90 degree phase shift. * * NOTE: As above, S and D should not be halved. The only consequence of * halving here is merely a -6dB reduction in output, but it's still incorrect. */ void UhjDecoder::decode2(const float *RESTRICT InSamples, const al::span OutSamples, const size_t SamplesToDo) { ASSUME(SamplesToDo > 0); float *woutput{OutSamples[0].data()}; float *xoutput{OutSamples[1].data()}; float *youtput{OutSamples[2].data()}; /* S = Left + Right */ for(size_t i{0};i < SamplesToDo;++i) mS[sFilterSize+i] = InSamples[i*2 + 0] + InSamples[i*2 + 1]; /* D = Left - Right */ for(size_t i{0};i < SamplesToDo;++i) mD[sFilterSize+i] = InSamples[i*2 + 0] - InSamples[i*2 + 1]; /* Precompute j*D and store in xoutput. */ auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin()); std::copy_n(mD.cbegin(), SamplesToDo+sFilterSize, tmpiter); std::copy_n(mTemp.cbegin()+SamplesToDo, mDTHistory.size(), mDTHistory.begin()); allpass_process({xoutput, SamplesToDo}, mTemp.data()); for(size_t i{0};i < SamplesToDo;++i) { /* W = 0.981530*S + j*0.163585*D */ woutput[i] = 0.981530f*mS[i] + 0.163585f*xoutput[i]; /* X = 0.418504*S - j*0.828347*D */ xoutput[i] = 0.418504f*mS[i] - 0.828347f*xoutput[i]; } /* Precompute j*S and store in youtput. */ tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin()); std::copy_n(mS.cbegin(), SamplesToDo+sFilterSize, tmpiter); std::copy_n(mTemp.cbegin()+SamplesToDo, mSHistory.size(), mSHistory.begin()); allpass_process({youtput, SamplesToDo}, mTemp.data()); for(size_t i{0};i < SamplesToDo;++i) { /* Y = 0.762956*D + j*0.384230*S */ youtput[i] = 0.762956f*mD[i] + 0.384230f*youtput[i]; } std::copy(mS.begin()+SamplesToDo, mS.begin()+SamplesToDo+sFilterSize, mS.begin()); std::copy(mD.begin()+SamplesToDo, mD.begin()+SamplesToDo+sFilterSize, mD.begin()); } int main(int argc, char **argv) { if(argc < 2 || std::strcmp(argv[1], "-h") == 0 || std::strcmp(argv[1], "--help") == 0) { printf("Usage: %s \n", argv[0]); return 1; } size_t num_files{0}, num_decoded{0}; for(int fidx{1};fidx < argc;++fidx) { ++num_files; SF_INFO ininfo{}; SndFilePtr infile{sf_open(argv[fidx], SFM_READ, &ininfo)}; if(!infile) { fprintf(stderr, "Failed to open %s\n", argv[fidx]); continue; } if(sf_command(infile.get(), SFC_WAVEX_GET_AMBISONIC, NULL, 0) == SF_AMBISONIC_B_FORMAT) { fprintf(stderr, "%s is already B-Format\n", argv[fidx]); continue; } uint outchans{}; if(ininfo.channels == 2) outchans = 3; else if(ininfo.channels == 3 || ininfo.channels == 4) outchans = static_cast(ininfo.channels); else { fprintf(stderr, "%s is not a 2-, 3-, or 4-channel file\n", argv[fidx]); continue; } printf("Converting %s from %d-channel UHJ...\n", argv[fidx], ininfo.channels); std::string outname{argv[fidx]}; auto lastslash = outname.find_last_of('/'); if(lastslash != std::string::npos) outname.erase(0, lastslash+1); auto lastdot = outname.find_last_of('.'); if(lastdot != std::string::npos) outname.resize(lastdot+1); outname += "amb"; FilePtr outfile{fopen(outname.c_str(), "wb")}; if(!outfile) { fprintf(stderr, "Failed to create %s\n", outname.c_str()); continue; } fputs("RIFF", outfile.get()); fwrite32le(0xFFFFFFFF, outfile.get()); // 'RIFF' header len; filled in at close fputs("WAVE", outfile.get()); fputs("fmt ", outfile.get()); fwrite32le(40, outfile.get()); // 'fmt ' header len; 40 bytes for EXTENSIBLE // 16-bit val, format type id (extensible: 0xFFFE) fwrite16le(0xFFFE, outfile.get()); // 16-bit val, channel count fwrite16le(static_cast(outchans), outfile.get()); // 32-bit val, frequency fwrite32le(static_cast(ininfo.samplerate), outfile.get()); // 32-bit val, bytes per second fwrite32le(static_cast(ininfo.samplerate)*sizeof(float)*outchans, outfile.get()); // 16-bit val, frame size fwrite16le(static_cast(sizeof(float)*outchans), outfile.get()); // 16-bit val, bits per sample fwrite16le(static_cast(sizeof(float)*8), outfile.get()); // 16-bit val, extra byte count fwrite16le(22, outfile.get()); // 16-bit val, valid bits per sample fwrite16le(static_cast(sizeof(float)*8), outfile.get()); // 32-bit val, channel mask fwrite32le(0, outfile.get()); // 16 byte GUID, sub-type format fwrite(SUBTYPE_BFORMAT_FLOAT, 1, 16, outfile.get()); fputs("data", outfile.get()); fwrite32le(0xFFFFFFFF, outfile.get()); // 'data' header len; filled in at close if(ferror(outfile.get())) { fprintf(stderr, "Error writing wave file header: %s (%d)\n", strerror(errno), errno); continue; } auto DataStart = ftell(outfile.get()); auto decoder = std::make_unique(); auto inmem = std::make_unique(BufferLineSize*static_cast(ininfo.channels)); auto decmem = al::vector, 16>(outchans); auto outmem = std::make_unique(BufferLineSize*outchans); /* The all-pass filter has a lead-in of 127 samples, and a lead-out of * 128 samples. So after reading the last samples from the input, an * additional 255 samples of silence need to be fed through the decoder * for it to finish. */ sf_count_t LeadOut{UhjDecoder::sFilterSize*2 - 1}; while(LeadOut > 0) { sf_count_t sgot{sf_readf_float(infile.get(), inmem.get(), BufferLineSize)}; sgot = std::max(sgot, 0); if(sgot < BufferLineSize) { const sf_count_t remaining{std::min(BufferLineSize - sgot, LeadOut)}; std::fill_n(inmem.get() + sgot*ininfo.channels, remaining*ininfo.channels, 0.0f); sgot += remaining; LeadOut -= remaining; } auto got = static_cast(sgot); if(ininfo.channels == 2) decoder->decode2(inmem.get(), decmem, got); else if(ininfo.channels == 3 || ininfo.channels == 4) decoder->decode(inmem.get(), decmem, got); for(size_t i{0};i < got;++i) { for(size_t j{0};j < outchans;++j) outmem[i*outchans + j] = f32AsLEBytes(decmem[j][i]); } size_t wrote{fwrite(outmem.get(), sizeof(byte4)*outchans, got, outfile.get())}; if(wrote < got) { fprintf(stderr, "Error writing wave data: %s (%d)\n", strerror(errno), errno); break; } } auto DataEnd = ftell(outfile.get()); if(DataEnd > DataStart) { long dataLen{DataEnd - DataStart}; if(fseek(outfile.get(), 4, SEEK_SET) == 0) fwrite32le(static_cast(DataEnd-8), outfile.get()); // 'WAVE' header len if(fseek(outfile.get(), DataStart-4, SEEK_SET) == 0) fwrite32le(static_cast(dataLen), outfile.get()); // 'data' header len } fflush(outfile.get()); ++num_decoded; } if(num_decoded == 0) fprintf(stderr, "Failed to decode any input files\n"); else if(num_decoded < num_files) fprintf(stderr, "Decoded %zu of %zu files\n", num_decoded, num_files); else printf("Decoded %zu file%s\n", num_decoded, (num_decoded==1)?"":"s"); return 0; }