NSIMD documentation
Index | Tutorial | FAQ | Contribute | API overview | API reference | Wrapped intrinsics | Modules

Horizontal sum

Description

Returns the sum of all the elements contained in v

C base API (generic)

#define vaddv(a0, type)
#define vaddv_e(a0, type, simd_ext)

C advanced API (generic, requires C11)

#define nsimd_addv(a0)

C++ base API (generic)

template <NSIMD_CONCEPT_VALUE_TYPE T> T NSIMD_VECTORCALL addv(typename simd_traits<T, NSIMD_SIMD>::simd_vector a0, T);

C++ advanced API

template <NSIMD_CONCEPT_VALUE_TYPE T, NSIMD_CONCEPT_SIMD_EXT SimdExt> T addv(pack<T, 1, SimdExt> const& a0);
template <NSIMD_CONCEPT_VALUE_TYPE T, int N, NSIMD_CONCEPT_SIMD_EXT SimdExt> T addv(pack<T, N, SimdExt> const& a0);

C base API (architecture specifics)

AVX2

f64 NSIMD_VECTORCALL nsimd_addv_avx2_f64(nsimd_avx2_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_avx2_f32(nsimd_avx2_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_avx2_f16(nsimd_avx2_vf16 a0);

SVE512

f64 NSIMD_VECTORCALL nsimd_addv_sve512_f64(nsimd_sve512_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_sve512_f32(nsimd_sve512_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_sve512_f16(nsimd_sve512_vf16 a0);

SVE

f64 NSIMD_VECTORCALL nsimd_addv_sve_f64(nsimd_sve_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_sve_f32(nsimd_sve_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_sve_f16(nsimd_sve_vf16 a0);

CPU

f64 NSIMD_VECTORCALL nsimd_addv_cpu_f64(nsimd_cpu_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_cpu_f32(nsimd_cpu_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_cpu_f16(nsimd_cpu_vf16 a0);

SVE2048

f64 NSIMD_VECTORCALL nsimd_addv_sve2048_f64(nsimd_sve2048_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_sve2048_f32(nsimd_sve2048_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_sve2048_f16(nsimd_sve2048_vf16 a0);

NEON128

f64 NSIMD_VECTORCALL nsimd_addv_neon128_f64(nsimd_neon128_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_neon128_f32(nsimd_neon128_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_neon128_f16(nsimd_neon128_vf16 a0);

AVX512_SKYLAKE

f64 NSIMD_VECTORCALL nsimd_addv_avx512_skylake_f64(nsimd_avx512_skylake_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_avx512_skylake_f32(nsimd_avx512_skylake_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_avx512_skylake_f16(nsimd_avx512_skylake_vf16 a0);

AARCH64

f64 NSIMD_VECTORCALL nsimd_addv_aarch64_f64(nsimd_aarch64_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_aarch64_f32(nsimd_aarch64_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_aarch64_f16(nsimd_aarch64_vf16 a0);

AVX512_KNL

f64 NSIMD_VECTORCALL nsimd_addv_avx512_knl_f64(nsimd_avx512_knl_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_avx512_knl_f32(nsimd_avx512_knl_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_avx512_knl_f16(nsimd_avx512_knl_vf16 a0);

SSE2

f64 NSIMD_VECTORCALL nsimd_addv_sse2_f64(nsimd_sse2_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_sse2_f32(nsimd_sse2_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_sse2_f16(nsimd_sse2_vf16 a0);

SSE42

f64 NSIMD_VECTORCALL nsimd_addv_sse42_f64(nsimd_sse42_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_sse42_f32(nsimd_sse42_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_sse42_f16(nsimd_sse42_vf16 a0);

SVE256

f64 NSIMD_VECTORCALL nsimd_addv_sve256_f64(nsimd_sve256_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_sve256_f32(nsimd_sve256_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_sve256_f16(nsimd_sve256_vf16 a0);

SVE1024

f64 NSIMD_VECTORCALL nsimd_addv_sve1024_f64(nsimd_sve1024_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_sve1024_f32(nsimd_sve1024_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_sve1024_f16(nsimd_sve1024_vf16 a0);

VSX

f64 NSIMD_VECTORCALL nsimd_addv_vsx_f64(nsimd_vsx_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_vsx_f32(nsimd_vsx_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_vsx_f16(nsimd_vsx_vf16 a0);

SVE128

f64 NSIMD_VECTORCALL nsimd_addv_sve128_f64(nsimd_sve128_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_sve128_f32(nsimd_sve128_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_sve128_f16(nsimd_sve128_vf16 a0);

VMX

f64 NSIMD_VECTORCALL nsimd_addv_vmx_f64(nsimd_vmx_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_vmx_f32(nsimd_vmx_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_vmx_f16(nsimd_vmx_vf16 a0);

AVX

f64 NSIMD_VECTORCALL nsimd_addv_avx_f64(nsimd_avx_vf64 a0);
f32 NSIMD_VECTORCALL nsimd_addv_avx_f32(nsimd_avx_vf32 a0);
f16 NSIMD_VECTORCALL nsimd_addv_avx_f16(nsimd_avx_vf16 a0);

C++ base API (architecture specifics)

AVX2

f64 NSIMD_VECTORCALL addv(nsimd_avx2_vf64 a0, f64, avx2);
f32 NSIMD_VECTORCALL addv(nsimd_avx2_vf32 a0, f32, avx2);
f16 NSIMD_VECTORCALL addv(nsimd_avx2_vf16 a0, f16, avx2);

SVE512

f64 NSIMD_VECTORCALL addv(nsimd_sve512_vf64 a0, f64, sve512);
f32 NSIMD_VECTORCALL addv(nsimd_sve512_vf32 a0, f32, sve512);
f16 NSIMD_VECTORCALL addv(nsimd_sve512_vf16 a0, f16, sve512);

SVE

f64 NSIMD_VECTORCALL addv(nsimd_sve_vf64 a0, f64, sve);
f32 NSIMD_VECTORCALL addv(nsimd_sve_vf32 a0, f32, sve);
f16 NSIMD_VECTORCALL addv(nsimd_sve_vf16 a0, f16, sve);

CPU

f64 NSIMD_VECTORCALL addv(nsimd_cpu_vf64 a0, f64, cpu);
f32 NSIMD_VECTORCALL addv(nsimd_cpu_vf32 a0, f32, cpu);
f16 NSIMD_VECTORCALL addv(nsimd_cpu_vf16 a0, f16, cpu);

SVE2048

f64 NSIMD_VECTORCALL addv(nsimd_sve2048_vf64 a0, f64, sve2048);
f32 NSIMD_VECTORCALL addv(nsimd_sve2048_vf32 a0, f32, sve2048);
f16 NSIMD_VECTORCALL addv(nsimd_sve2048_vf16 a0, f16, sve2048);

NEON128

f64 NSIMD_VECTORCALL addv(nsimd_neon128_vf64 a0, f64, neon128);
f32 NSIMD_VECTORCALL addv(nsimd_neon128_vf32 a0, f32, neon128);
f16 NSIMD_VECTORCALL addv(nsimd_neon128_vf16 a0, f16, neon128);

AVX512_SKYLAKE

f64 NSIMD_VECTORCALL addv(nsimd_avx512_skylake_vf64 a0, f64, avx512_skylake);
f32 NSIMD_VECTORCALL addv(nsimd_avx512_skylake_vf32 a0, f32, avx512_skylake);
f16 NSIMD_VECTORCALL addv(nsimd_avx512_skylake_vf16 a0, f16, avx512_skylake);

AARCH64

f64 NSIMD_VECTORCALL addv(nsimd_aarch64_vf64 a0, f64, aarch64);
f32 NSIMD_VECTORCALL addv(nsimd_aarch64_vf32 a0, f32, aarch64);
f16 NSIMD_VECTORCALL addv(nsimd_aarch64_vf16 a0, f16, aarch64);

AVX512_KNL

f64 NSIMD_VECTORCALL addv(nsimd_avx512_knl_vf64 a0, f64, avx512_knl);
f32 NSIMD_VECTORCALL addv(nsimd_avx512_knl_vf32 a0, f32, avx512_knl);
f16 NSIMD_VECTORCALL addv(nsimd_avx512_knl_vf16 a0, f16, avx512_knl);

SSE2

f64 NSIMD_VECTORCALL addv(nsimd_sse2_vf64 a0, f64, sse2);
f32 NSIMD_VECTORCALL addv(nsimd_sse2_vf32 a0, f32, sse2);
f16 NSIMD_VECTORCALL addv(nsimd_sse2_vf16 a0, f16, sse2);

SSE42

f64 NSIMD_VECTORCALL addv(nsimd_sse42_vf64 a0, f64, sse42);
f32 NSIMD_VECTORCALL addv(nsimd_sse42_vf32 a0, f32, sse42);
f16 NSIMD_VECTORCALL addv(nsimd_sse42_vf16 a0, f16, sse42);

SVE256

f64 NSIMD_VECTORCALL addv(nsimd_sve256_vf64 a0, f64, sve256);
f32 NSIMD_VECTORCALL addv(nsimd_sve256_vf32 a0, f32, sve256);
f16 NSIMD_VECTORCALL addv(nsimd_sve256_vf16 a0, f16, sve256);

SVE1024

f64 NSIMD_VECTORCALL addv(nsimd_sve1024_vf64 a0, f64, sve1024);
f32 NSIMD_VECTORCALL addv(nsimd_sve1024_vf32 a0, f32, sve1024);
f16 NSIMD_VECTORCALL addv(nsimd_sve1024_vf16 a0, f16, sve1024);

VSX

f64 NSIMD_VECTORCALL addv(nsimd_vsx_vf64 a0, f64, vsx);
f32 NSIMD_VECTORCALL addv(nsimd_vsx_vf32 a0, f32, vsx);
f16 NSIMD_VECTORCALL addv(nsimd_vsx_vf16 a0, f16, vsx);

SVE128

f64 NSIMD_VECTORCALL addv(nsimd_sve128_vf64 a0, f64, sve128);
f32 NSIMD_VECTORCALL addv(nsimd_sve128_vf32 a0, f32, sve128);
f16 NSIMD_VECTORCALL addv(nsimd_sve128_vf16 a0, f16, sve128);

VMX

f64 NSIMD_VECTORCALL addv(nsimd_vmx_vf64 a0, f64, vmx);
f32 NSIMD_VECTORCALL addv(nsimd_vmx_vf32 a0, f32, vmx);
f16 NSIMD_VECTORCALL addv(nsimd_vmx_vf16 a0, f16, vmx);

AVX

f64 NSIMD_VECTORCALL addv(nsimd_avx_vf64 a0, f64, avx);
f32 NSIMD_VECTORCALL addv(nsimd_avx_vf32 a0, f32, avx);
f16 NSIMD_VECTORCALL addv(nsimd_avx_vf16 a0, f16, avx);