From 44fbc93909a1a1d1dc26c01feb32bf13a5140234 Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Sat, 9 Dec 2023 12:35:07 -0800 Subject: Be less messy with PFFFT Remove a 1-element array for an over-allocated struct array. Also add a wrapper struct for C++. --- common/pffft.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 6 deletions(-) (limited to 'common/pffft.h') diff --git a/common/pffft.h b/common/pffft.h index 9cff9e54..b31304f6 100644 --- a/common/pffft.h +++ b/common/pffft.h @@ -83,23 +83,27 @@ #include #ifdef __cplusplus +#include +#include + extern "C" { #endif /* opaque struct holding internal stuff (precomputed twiddle factors) this * struct can be shared by many threads as it contains only read-only data. */ -typedef struct PFFFT_Setup PFFFT_Setup; - -#ifndef PFFFT_COMMON_ENUMS -#define PFFFT_COMMON_ENUMS +struct PFFFT_Setup; /* direction of the transform */ -typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; +enum pffft_direction_t { PFFFT_FORWARD, PFFFT_BACKWARD }; /* type of transform */ -typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; +enum pffft_transform_t { PFFFT_REAL, PFFFT_COMPLEX }; +#ifndef __cplusplus +typedef struct PFFFT_Setup PFFFT_Setup; +typedef enum pffft_direction_t pffft_direction_t; +typedef enum pffft_transform_t pffft_transform_t; #endif /** @@ -187,6 +191,47 @@ int pffft_simd_size(); #ifdef __cplusplus } + +struct PFFFTSetup { + PFFFT_Setup *mSetup{}; + + PFFFTSetup() = default; + PFFFTSetup(const PFFFTSetup&) = delete; + PFFFTSetup(PFFFTSetup&& rhs) : mSetup{rhs.mSetup} { rhs.mSetup = nullptr; } + explicit PFFFTSetup(std::nullptr_t) { } + explicit PFFFTSetup(unsigned int n, pffft_transform_t transform) + : mSetup{pffft_new_setup(n, transform)} + { } + ~PFFFTSetup() { if(mSetup) pffft_destroy_setup(mSetup); } + + PFFFTSetup& operator=(const PFFFTSetup&) = delete; + PFFFTSetup& operator=(PFFFTSetup&& rhs) + { + if(mSetup) + pffft_destroy_setup(mSetup); + mSetup = std::exchange(rhs.mSetup, nullptr); + return *this; + } + + void transform(const float *input, float *output, float *work, pffft_direction_t direction) const + { pffft_transform(mSetup, input, output, work, direction); } + + void transform_ordered(const float *input, float *output, float *work, + pffft_direction_t direction) const + { pffft_transform_ordered(mSetup, input, output, work, direction); } + + void zreorder(const float *input, float *output, pffft_direction_t direction) const + { pffft_zreorder(mSetup, input, output, direction); } + + void zconvolve_scale_accumulate(const float *dft_a, const float *dft_b, float *dft_ab, + float scaling) const + { pffft_zconvolve_scale_accumulate(mSetup, dft_a, dft_b, dft_ab, scaling); } + + void zconvolve_accumulate(const float *dft_a, const float *dft_b, float *dft_ab) const + { pffft_zconvolve_accumulate(mSetup, dft_a, dft_b, dft_ab); } + + [[nodiscard]] operator bool() const noexcept { return mSetup != nullptr; } +}; #endif #endif // PFFFT_H -- cgit v1.2.3 From ea5628061c4bc3a121f7f8e6448c037f425e6a7d Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Fri, 15 Dec 2023 00:18:28 -0800 Subject: Add some noexcept for free functions --- common/pffft.cpp | 25 +++++++++++++------------ common/pffft.h | 12 +++++++----- 2 files changed, 20 insertions(+), 17 deletions(-) (limited to 'common/pffft.h') diff --git a/common/pffft.cpp b/common/pffft.cpp index bf564086..9d9dad23 100644 --- a/common/pffft.cpp +++ b/common/pffft.cpp @@ -68,6 +68,7 @@ #include "albit.h" #include "almalloc.h" #include "alnumbers.h" +#include "alnumeric.h" #include "alspan.h" #include "opthelpers.h" @@ -538,8 +539,8 @@ NOINLINE void passf5_ps(const size_t ido, const size_t l1, const v4sf *cc, v4sf const v4sf ti11{LD_PS1(0.951056516295154f*fsign)}; const v4sf ti12{LD_PS1(0.587785252292473f*fsign)}; -#define cc_ref(a_1,a_2) cc[(a_2-1)*ido + (a_1) + 1] -#define ch_ref(a_1,a_3) ch[(a_3-1)*l1*ido + (a_1) + 1] +#define cc_ref(a_1,a_2) cc[((a_2)-1)*ido + (a_1) + 1] +#define ch_ref(a_1,a_3) ch[((a_3)-1)*l1*ido + (a_1) + 1] assert(ido > 2); for(size_t k{0};k < l1;++k, cc += 5*ido, ch += ido) @@ -958,8 +959,8 @@ void radf5_ps(const size_t ido, const size_t l1, const v4sf *RESTRICT cc, v4sf * const v4sf tr12{LD_PS1(-0.809016994374947f)}; const v4sf ti12{LD_PS1(0.587785252292473f)}; -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + a_1] +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + (a_1)] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + (a_1)] /* Parameter adjustments */ ch -= 1 + ido * 6; @@ -1040,8 +1041,8 @@ void radb5_ps(const size_t ido, const size_t l1, const v4sf *RESTRICT cc, v4sf * const v4sf tr12{LD_PS1(-0.809016994374947f)}; const v4sf ti12{LD_PS1(0.587785252292473f)}; -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + (a_1)] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + (a_1)] /* Parameter adjustments */ ch -= 1 + ido*(1 + l1); @@ -1408,9 +1409,9 @@ void cffti1_ps(const uint n, float *wa, const al::span ifac) void *pffft_aligned_malloc(size_t nb_bytes) { return al_malloc(MALLOC_V4SF_ALIGNMENT, nb_bytes); } -void pffft_aligned_free(void *p) { al_free(p); } +void pffft_aligned_free(void *p) noexcept { al_free(p); } -int pffft_simd_size() { return SIMD_SZ; } +int pffft_simd_size() noexcept { return SIMD_SZ; } struct PFFFT_Setup { alignas(MALLOC_V4SF_ALIGNMENT) uint N; @@ -1436,7 +1437,7 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) assert((N%(SIMD_SZ*SIMD_SZ)) == 0); const uint Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ; - const size_t storelen{sizeof(PFFFT_Setup) + (2u*Ncvec * sizeof(v4sf))}; + const size_t storelen{sizeof(PFFFT_Setup) + (2_zu*Ncvec * sizeof(v4sf))}; void *store{al_calloc(MALLOC_V4SF_ALIGNMENT, storelen)}; if(!store) return nullptr; @@ -1446,12 +1447,12 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) s->transform = transform; /* nb of complex simd vectors */ s->Ncvec = Ncvec; - s->e = {reinterpret_cast(reinterpret_cast(s+1)), 2u*Ncvec}; + s->e = {reinterpret_cast(reinterpret_cast(s+1)), 2_zu*Ncvec}; s->twiddle = reinterpret_cast(&s->e[2u*Ncvec*(SIMD_SZ-1)/SIMD_SZ]); if constexpr(SIMD_SZ > 1) { - auto e = std::vector(2u*Ncvec*(SIMD_SZ-1), 0.0f); + auto e = std::vector(2_zu*Ncvec*(SIMD_SZ-1), 0.0f); for(size_t k{0};k < s->Ncvec;++k) { const size_t i{k / SIMD_SZ}; @@ -1485,7 +1486,7 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) } -void pffft_destroy_setup(PFFFT_Setup *s) +void pffft_destroy_setup(PFFFT_Setup *s) noexcept { std::destroy_at(s); al_free(s); diff --git a/common/pffft.h b/common/pffft.h index b31304f6..5ef03820 100644 --- a/common/pffft.h +++ b/common/pffft.h @@ -111,8 +111,9 @@ typedef enum pffft_transform_t pffft_transform_t; * structure is read-only so it can safely be shared by multiple concurrent * threads. */ +[[gnu::malloc]] PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform); -void pffft_destroy_setup(PFFFT_Setup *setup); +void pffft_destroy_setup(PFFFT_Setup *setup) noexcept; /** * Perform a Fourier transform. The z-domain data is stored in the most @@ -183,11 +184,12 @@ void pffft_zconvolve_accumulate(const PFFFT_Setup *setup, const float *dft_a, co * and powerpc). This function may be used to obtain such correctly aligned * buffers. */ +[[gnu::alloc_size(1), gnu::malloc]] void *pffft_aligned_malloc(size_t nb_bytes); -void pffft_aligned_free(void *ptr); +void pffft_aligned_free(void *ptr) noexcept; /* Return 4 or 1 depending if vectorization was enable when building pffft.cpp. */ -int pffft_simd_size(); +int pffft_simd_size() noexcept; #ifdef __cplusplus } @@ -197,7 +199,7 @@ struct PFFFTSetup { PFFFTSetup() = default; PFFFTSetup(const PFFFTSetup&) = delete; - PFFFTSetup(PFFFTSetup&& rhs) : mSetup{rhs.mSetup} { rhs.mSetup = nullptr; } + PFFFTSetup(PFFFTSetup&& rhs) noexcept : mSetup{rhs.mSetup} { rhs.mSetup = nullptr; } explicit PFFFTSetup(std::nullptr_t) { } explicit PFFFTSetup(unsigned int n, pffft_transform_t transform) : mSetup{pffft_new_setup(n, transform)} @@ -205,7 +207,7 @@ struct PFFFTSetup { ~PFFFTSetup() { if(mSetup) pffft_destroy_setup(mSetup); } PFFFTSetup& operator=(const PFFFTSetup&) = delete; - PFFFTSetup& operator=(PFFFTSetup&& rhs) + PFFFTSetup& operator=(PFFFTSetup&& rhs) noexcept { if(mSetup) pffft_destroy_setup(mSetup); -- cgit v1.2.3 From 768781bab97732fbd0d66fa153d4ebc768be1240 Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Wed, 27 Dec 2023 04:23:19 -0800 Subject: Improve ownership handing with PFFFT --- common/pffft.cpp | 53 +++++++++++++++++++++++++++-------------------------- common/pffft.h | 41 +++++++++-------------------------------- 2 files changed, 36 insertions(+), 58 deletions(-) (limited to 'common/pffft.h') diff --git a/common/pffft.cpp b/common/pffft.cpp index 9d9dad23..bbfbaa49 100644 --- a/common/pffft.cpp +++ b/common/pffft.cpp @@ -380,7 +380,9 @@ force_inline void vcplxmulconj(v4sf &ar, v4sf &ai, v4sf br, v4sf bi) noexcept #endif //!PFFFT_SIMD_DISABLE /* SSE and co like 16-bytes aligned pointers */ -#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines... +/* with a 64-byte alignment, we are even aligned on L2 cache lines... */ +constexpr auto V4sfAlignment = size_t(64); +constexpr auto V4sfAlignVal = std::align_val_t(V4sfAlignment); /* passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2 @@ -1406,24 +1408,20 @@ void cffti1_ps(const uint n, float *wa, const al::span ifac) } // namespace -void *pffft_aligned_malloc(size_t nb_bytes) -{ return al_malloc(MALLOC_V4SF_ALIGNMENT, nb_bytes); } - -void pffft_aligned_free(void *p) noexcept { al_free(p); } - -int pffft_simd_size() noexcept { return SIMD_SZ; } - +/* NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) */ struct PFFFT_Setup { - alignas(MALLOC_V4SF_ALIGNMENT) uint N; - uint Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) - std::array ifac; - pffft_transform_t transform; + uint N{}; + uint Ncvec{}; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */ + std::array ifac{}; + pffft_transform_t transform{}; - float *twiddle; // N/4 elements - al::span e; // N/4*3 elements + float *twiddle{}; /* N/4 elements */ + al::span e; /* N/4*3 elements */ + + alignas(V4sfAlignment) std::byte end; }; -PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) +gsl::owner pffft_new_setup(unsigned int N, pffft_transform_t transform) { assert(transform == PFFFT_REAL || transform == PFFFT_COMPLEX); assert(N > 0); @@ -1436,23 +1434,25 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) else assert((N%(SIMD_SZ*SIMD_SZ)) == 0); - const uint Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ; - const size_t storelen{sizeof(PFFFT_Setup) + (2_zu*Ncvec * sizeof(v4sf))}; + const uint Ncvec{(transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ}; - void *store{al_calloc(MALLOC_V4SF_ALIGNMENT, storelen)}; - if(!store) return nullptr; + const size_t storelen{std::max(offsetof(PFFFT_Setup, end) + 2_zu*Ncvec*sizeof(v4sf), + sizeof(PFFFT_Setup))}; + gsl::owner storage{::new(V4sfAlignVal) std::byte[storelen]{}}; + al::span extrastore{&storage[offsetof(PFFFT_Setup, end)], 2_zu*Ncvec*sizeof(v4sf)}; - PFFFT_Setup *s{::new(store) PFFFT_Setup{}}; + gsl::owner s{::new(storage) PFFFT_Setup{}}; s->N = N; s->transform = transform; - /* nb of complex simd vectors */ s->Ncvec = Ncvec; - s->e = {reinterpret_cast(reinterpret_cast(s+1)), 2_zu*Ncvec}; - s->twiddle = reinterpret_cast(&s->e[2u*Ncvec*(SIMD_SZ-1)/SIMD_SZ]); + + const size_t ecount{2_zu*Ncvec*(SIMD_SZ-1)/SIMD_SZ}; + s->e = {std::launder(reinterpret_cast(extrastore.data())), ecount}; + s->twiddle = std::launder(reinterpret_cast(&extrastore[ecount*sizeof(v4sf)])); if constexpr(SIMD_SZ > 1) { - auto e = std::vector(2_zu*Ncvec*(SIMD_SZ-1), 0.0f); + auto e = std::vector(s->e.size()*SIMD_SZ, 0.0f); for(size_t k{0};k < s->Ncvec;++k) { const size_t i{k / SIMD_SZ}; @@ -1486,10 +1486,11 @@ PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform) } -void pffft_destroy_setup(PFFFT_Setup *s) noexcept +void pffft_destroy_setup(gsl::owner s) noexcept { std::destroy_at(s); - al_free(s); + auto storage = reinterpret_cast>(s); + ::operator delete[](storage, V4sfAlignVal); } #if !defined(PFFFT_SIMD_DISABLE) diff --git a/common/pffft.h b/common/pffft.h index 5ef03820..cf356524 100644 --- a/common/pffft.h +++ b/common/pffft.h @@ -79,15 +79,12 @@ #ifndef PFFFT_H #define PFFFT_H -#include // for size_t -#include - -#ifdef __cplusplus #include +#include #include -extern "C" { -#endif +#include "almalloc.h" + /* opaque struct holding internal stuff (precomputed twiddle factors) this * struct can be shared by many threads as it contains only read-only data. @@ -100,20 +97,14 @@ enum pffft_direction_t { PFFFT_FORWARD, PFFFT_BACKWARD }; /* type of transform */ enum pffft_transform_t { PFFFT_REAL, PFFFT_COMPLEX }; -#ifndef __cplusplus -typedef struct PFFFT_Setup PFFFT_Setup; -typedef enum pffft_direction_t pffft_direction_t; -typedef enum pffft_transform_t pffft_transform_t; -#endif - /** * Prepare for performing transforms of size N -- the returned PFFFT_Setup * structure is read-only so it can safely be shared by multiple concurrent * threads. */ [[gnu::malloc]] -PFFFT_Setup *pffft_new_setup(unsigned int N, pffft_transform_t transform); -void pffft_destroy_setup(PFFFT_Setup *setup) noexcept; +gsl::owner pffft_new_setup(unsigned int N, pffft_transform_t transform); +void pffft_destroy_setup(gsl::owner setup) noexcept; /** * Perform a Fourier transform. The z-domain data is stored in the most @@ -179,28 +170,14 @@ void pffft_zconvolve_scale_accumulate(const PFFFT_Setup *setup, const float *dft */ void pffft_zconvolve_accumulate(const PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab); -/** - * The float buffers must have the correct alignment (16-byte boundary on intel - * and powerpc). This function may be used to obtain such correctly aligned - * buffers. - */ -[[gnu::alloc_size(1), gnu::malloc]] -void *pffft_aligned_malloc(size_t nb_bytes); -void pffft_aligned_free(void *ptr) noexcept; - -/* Return 4 or 1 depending if vectorization was enable when building pffft.cpp. */ -int pffft_simd_size() noexcept; - -#ifdef __cplusplus -} struct PFFFTSetup { - PFFFT_Setup *mSetup{}; + gsl::owner mSetup{}; PFFFTSetup() = default; PFFFTSetup(const PFFFTSetup&) = delete; PFFFTSetup(PFFFTSetup&& rhs) noexcept : mSetup{rhs.mSetup} { rhs.mSetup = nullptr; } - explicit PFFFTSetup(std::nullptr_t) { } + explicit PFFFTSetup(std::nullptr_t) noexcept { } explicit PFFFTSetup(unsigned int n, pffft_transform_t transform) : mSetup{pffft_new_setup(n, transform)} { } @@ -211,7 +188,8 @@ struct PFFFTSetup { { if(mSetup) pffft_destroy_setup(mSetup); - mSetup = std::exchange(rhs.mSetup, nullptr); + mSetup = rhs.mSetup; + rhs.mSetup = nullptr; return *this; } @@ -234,6 +212,5 @@ struct PFFFTSetup { [[nodiscard]] operator bool() const noexcept { return mSetup != nullptr; } }; -#endif #endif // PFFFT_H -- cgit v1.2.3