NSIMD documentation
Index | Tutorial | FAQ | Contribute | API overview | API reference | Wrapped intrinsics | Modules

Scatter elements from SIMD vector to memory

Description

Scatter elements from third argument to memory with base address given as first argument and steps given as second argument. This operator avoids using a SIMD register.

C base API (generic)

#define vscatter_linear(a0, a1, a2, type)
#define vscatter_linear_e(a0, a1, a2, type, simd_ext)

C advanced API (generic, requires C11)

#define nsimd_scatter_linear(a0, a1, a2)

C++ base API (generic)

template <NSIMD_CONCEPT_VALUE_TYPE T> void NSIMD_VECTORCALL scatter_linear(T* a0, int a1, typename simd_traits<T, NSIMD_SIMD>::simd_vector a2, T);

C++ advanced API

template <NSIMD_CONCEPT_VALUE_TYPE T, NSIMD_CONCEPT_SIMD_EXT SimdExt> void scatter_linear(T* a0, int a1, pack<T, 1, SimdExt> const& a2);
template <NSIMD_CONCEPT_VALUE_TYPE T, int N, NSIMD_CONCEPT_SIMD_EXT SimdExt> void scatter_linear(T* a0, int a1, pack<T, N, SimdExt> const& a2);

C base API (architecture specifics)

AVX2

void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_f64(f64* a0, int a1, nsimd_avx2_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_f32(f32* a0, int a1, nsimd_avx2_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_f16(f16* a0, int a1, nsimd_avx2_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_i64(i64* a0, int a1, nsimd_avx2_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_i32(i32* a0, int a1, nsimd_avx2_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_i16(i16* a0, int a1, nsimd_avx2_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_i8(i8* a0, int a1, nsimd_avx2_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_u64(u64* a0, int a1, nsimd_avx2_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_u32(u32* a0, int a1, nsimd_avx2_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_u16(u16* a0, int a1, nsimd_avx2_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx2_u8(u8* a0, int a1, nsimd_avx2_vu8 a2);

SVE512

void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_f64(f64* a0, int a1, nsimd_sve512_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_f32(f32* a0, int a1, nsimd_sve512_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_f16(f16* a0, int a1, nsimd_sve512_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_i64(i64* a0, int a1, nsimd_sve512_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_i32(i32* a0, int a1, nsimd_sve512_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_i16(i16* a0, int a1, nsimd_sve512_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_i8(i8* a0, int a1, nsimd_sve512_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_u64(u64* a0, int a1, nsimd_sve512_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_u32(u32* a0, int a1, nsimd_sve512_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_u16(u16* a0, int a1, nsimd_sve512_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve512_u8(u8* a0, int a1, nsimd_sve512_vu8 a2);

SVE

void NSIMD_VECTORCALL nsimd_scatter_linear_sve_f64(f64* a0, int a1, nsimd_sve_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_f32(f32* a0, int a1, nsimd_sve_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_f16(f16* a0, int a1, nsimd_sve_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_i64(i64* a0, int a1, nsimd_sve_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_i32(i32* a0, int a1, nsimd_sve_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_i16(i16* a0, int a1, nsimd_sve_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_i8(i8* a0, int a1, nsimd_sve_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_u64(u64* a0, int a1, nsimd_sve_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_u32(u32* a0, int a1, nsimd_sve_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_u16(u16* a0, int a1, nsimd_sve_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve_u8(u8* a0, int a1, nsimd_sve_vu8 a2);

CPU

void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_f64(f64* a0, int a1, nsimd_cpu_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_f32(f32* a0, int a1, nsimd_cpu_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_f16(f16* a0, int a1, nsimd_cpu_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_i64(i64* a0, int a1, nsimd_cpu_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_i32(i32* a0, int a1, nsimd_cpu_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_i16(i16* a0, int a1, nsimd_cpu_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_i8(i8* a0, int a1, nsimd_cpu_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_u64(u64* a0, int a1, nsimd_cpu_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_u32(u32* a0, int a1, nsimd_cpu_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_u16(u16* a0, int a1, nsimd_cpu_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_cpu_u8(u8* a0, int a1, nsimd_cpu_vu8 a2);

SVE2048

void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_f64(f64* a0, int a1, nsimd_sve2048_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_f32(f32* a0, int a1, nsimd_sve2048_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_f16(f16* a0, int a1, nsimd_sve2048_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_i64(i64* a0, int a1, nsimd_sve2048_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_i32(i32* a0, int a1, nsimd_sve2048_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_i16(i16* a0, int a1, nsimd_sve2048_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_i8(i8* a0, int a1, nsimd_sve2048_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_u64(u64* a0, int a1, nsimd_sve2048_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_u32(u32* a0, int a1, nsimd_sve2048_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_u16(u16* a0, int a1, nsimd_sve2048_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve2048_u8(u8* a0, int a1, nsimd_sve2048_vu8 a2);

NEON128

void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_f64(f64* a0, int a1, nsimd_neon128_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_f32(f32* a0, int a1, nsimd_neon128_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_f16(f16* a0, int a1, nsimd_neon128_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_i64(i64* a0, int a1, nsimd_neon128_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_i32(i32* a0, int a1, nsimd_neon128_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_i16(i16* a0, int a1, nsimd_neon128_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_i8(i8* a0, int a1, nsimd_neon128_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_u64(u64* a0, int a1, nsimd_neon128_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_u32(u32* a0, int a1, nsimd_neon128_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_u16(u16* a0, int a1, nsimd_neon128_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_neon128_u8(u8* a0, int a1, nsimd_neon128_vu8 a2);

AVX512_SKYLAKE

void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_f64(f64* a0, int a1, nsimd_avx512_skylake_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_f32(f32* a0, int a1, nsimd_avx512_skylake_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_f16(f16* a0, int a1, nsimd_avx512_skylake_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_i64(i64* a0, int a1, nsimd_avx512_skylake_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_i32(i32* a0, int a1, nsimd_avx512_skylake_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_i16(i16* a0, int a1, nsimd_avx512_skylake_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_i8(i8* a0, int a1, nsimd_avx512_skylake_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_u64(u64* a0, int a1, nsimd_avx512_skylake_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_u32(u32* a0, int a1, nsimd_avx512_skylake_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_u16(u16* a0, int a1, nsimd_avx512_skylake_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_skylake_u8(u8* a0, int a1, nsimd_avx512_skylake_vu8 a2);

AARCH64

void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_f64(f64* a0, int a1, nsimd_aarch64_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_f32(f32* a0, int a1, nsimd_aarch64_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_f16(f16* a0, int a1, nsimd_aarch64_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_i64(i64* a0, int a1, nsimd_aarch64_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_i32(i32* a0, int a1, nsimd_aarch64_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_i16(i16* a0, int a1, nsimd_aarch64_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_i8(i8* a0, int a1, nsimd_aarch64_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_u64(u64* a0, int a1, nsimd_aarch64_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_u32(u32* a0, int a1, nsimd_aarch64_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_u16(u16* a0, int a1, nsimd_aarch64_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_aarch64_u8(u8* a0, int a1, nsimd_aarch64_vu8 a2);

AVX512_KNL

void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_f64(f64* a0, int a1, nsimd_avx512_knl_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_f32(f32* a0, int a1, nsimd_avx512_knl_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_f16(f16* a0, int a1, nsimd_avx512_knl_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_i64(i64* a0, int a1, nsimd_avx512_knl_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_i32(i32* a0, int a1, nsimd_avx512_knl_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_i16(i16* a0, int a1, nsimd_avx512_knl_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_i8(i8* a0, int a1, nsimd_avx512_knl_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_u64(u64* a0, int a1, nsimd_avx512_knl_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_u32(u32* a0, int a1, nsimd_avx512_knl_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_u16(u16* a0, int a1, nsimd_avx512_knl_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx512_knl_u8(u8* a0, int a1, nsimd_avx512_knl_vu8 a2);

SSE2

void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_f64(f64* a0, int a1, nsimd_sse2_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_f32(f32* a0, int a1, nsimd_sse2_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_f16(f16* a0, int a1, nsimd_sse2_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_i64(i64* a0, int a1, nsimd_sse2_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_i32(i32* a0, int a1, nsimd_sse2_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_i16(i16* a0, int a1, nsimd_sse2_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_i8(i8* a0, int a1, nsimd_sse2_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_u64(u64* a0, int a1, nsimd_sse2_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_u32(u32* a0, int a1, nsimd_sse2_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_u16(u16* a0, int a1, nsimd_sse2_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse2_u8(u8* a0, int a1, nsimd_sse2_vu8 a2);

SSE42

void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_f64(f64* a0, int a1, nsimd_sse42_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_f32(f32* a0, int a1, nsimd_sse42_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_f16(f16* a0, int a1, nsimd_sse42_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_i64(i64* a0, int a1, nsimd_sse42_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_i32(i32* a0, int a1, nsimd_sse42_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_i16(i16* a0, int a1, nsimd_sse42_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_i8(i8* a0, int a1, nsimd_sse42_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_u64(u64* a0, int a1, nsimd_sse42_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_u32(u32* a0, int a1, nsimd_sse42_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_u16(u16* a0, int a1, nsimd_sse42_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sse42_u8(u8* a0, int a1, nsimd_sse42_vu8 a2);

SVE256

void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_f64(f64* a0, int a1, nsimd_sve256_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_f32(f32* a0, int a1, nsimd_sve256_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_f16(f16* a0, int a1, nsimd_sve256_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_i64(i64* a0, int a1, nsimd_sve256_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_i32(i32* a0, int a1, nsimd_sve256_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_i16(i16* a0, int a1, nsimd_sve256_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_i8(i8* a0, int a1, nsimd_sve256_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_u64(u64* a0, int a1, nsimd_sve256_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_u32(u32* a0, int a1, nsimd_sve256_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_u16(u16* a0, int a1, nsimd_sve256_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve256_u8(u8* a0, int a1, nsimd_sve256_vu8 a2);

SVE1024

void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_f64(f64* a0, int a1, nsimd_sve1024_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_f32(f32* a0, int a1, nsimd_sve1024_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_f16(f16* a0, int a1, nsimd_sve1024_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_i64(i64* a0, int a1, nsimd_sve1024_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_i32(i32* a0, int a1, nsimd_sve1024_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_i16(i16* a0, int a1, nsimd_sve1024_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_i8(i8* a0, int a1, nsimd_sve1024_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_u64(u64* a0, int a1, nsimd_sve1024_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_u32(u32* a0, int a1, nsimd_sve1024_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_u16(u16* a0, int a1, nsimd_sve1024_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve1024_u8(u8* a0, int a1, nsimd_sve1024_vu8 a2);

VSX

void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_f64(f64* a0, int a1, nsimd_vsx_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_f32(f32* a0, int a1, nsimd_vsx_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_f16(f16* a0, int a1, nsimd_vsx_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_i64(i64* a0, int a1, nsimd_vsx_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_i32(i32* a0, int a1, nsimd_vsx_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_i16(i16* a0, int a1, nsimd_vsx_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_i8(i8* a0, int a1, nsimd_vsx_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_u64(u64* a0, int a1, nsimd_vsx_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_u32(u32* a0, int a1, nsimd_vsx_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_u16(u16* a0, int a1, nsimd_vsx_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vsx_u8(u8* a0, int a1, nsimd_vsx_vu8 a2);

SVE128

void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_f64(f64* a0, int a1, nsimd_sve128_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_f32(f32* a0, int a1, nsimd_sve128_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_f16(f16* a0, int a1, nsimd_sve128_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_i64(i64* a0, int a1, nsimd_sve128_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_i32(i32* a0, int a1, nsimd_sve128_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_i16(i16* a0, int a1, nsimd_sve128_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_i8(i8* a0, int a1, nsimd_sve128_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_u64(u64* a0, int a1, nsimd_sve128_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_u32(u32* a0, int a1, nsimd_sve128_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_u16(u16* a0, int a1, nsimd_sve128_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_sve128_u8(u8* a0, int a1, nsimd_sve128_vu8 a2);

VMX

void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_f64(f64* a0, int a1, nsimd_vmx_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_f32(f32* a0, int a1, nsimd_vmx_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_f16(f16* a0, int a1, nsimd_vmx_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_i64(i64* a0, int a1, nsimd_vmx_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_i32(i32* a0, int a1, nsimd_vmx_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_i16(i16* a0, int a1, nsimd_vmx_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_i8(i8* a0, int a1, nsimd_vmx_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_u64(u64* a0, int a1, nsimd_vmx_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_u32(u32* a0, int a1, nsimd_vmx_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_u16(u16* a0, int a1, nsimd_vmx_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_vmx_u8(u8* a0, int a1, nsimd_vmx_vu8 a2);

AVX

void NSIMD_VECTORCALL nsimd_scatter_linear_avx_f64(f64* a0, int a1, nsimd_avx_vf64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_f32(f32* a0, int a1, nsimd_avx_vf32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_f16(f16* a0, int a1, nsimd_avx_vf16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_i64(i64* a0, int a1, nsimd_avx_vi64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_i32(i32* a0, int a1, nsimd_avx_vi32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_i16(i16* a0, int a1, nsimd_avx_vi16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_i8(i8* a0, int a1, nsimd_avx_vi8 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_u64(u64* a0, int a1, nsimd_avx_vu64 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_u32(u32* a0, int a1, nsimd_avx_vu32 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_u16(u16* a0, int a1, nsimd_avx_vu16 a2);
void NSIMD_VECTORCALL nsimd_scatter_linear_avx_u8(u8* a0, int a1, nsimd_avx_vu8 a2);

C++ base API (architecture specifics)

AVX2

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_avx2_vf64 a2, f64, avx2);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_avx2_vf32 a2, f32, avx2);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_avx2_vf16 a2, f16, avx2);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_avx2_vi64 a2, i64, avx2);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_avx2_vi32 a2, i32, avx2);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_avx2_vi16 a2, i16, avx2);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_avx2_vi8 a2, i8, avx2);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_avx2_vu64 a2, u64, avx2);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_avx2_vu32 a2, u32, avx2);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_avx2_vu16 a2, u16, avx2);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_avx2_vu8 a2, u8, avx2);

SVE512

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_sve512_vf64 a2, f64, sve512);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_sve512_vf32 a2, f32, sve512);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_sve512_vf16 a2, f16, sve512);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_sve512_vi64 a2, i64, sve512);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_sve512_vi32 a2, i32, sve512);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_sve512_vi16 a2, i16, sve512);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_sve512_vi8 a2, i8, sve512);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_sve512_vu64 a2, u64, sve512);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_sve512_vu32 a2, u32, sve512);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_sve512_vu16 a2, u16, sve512);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_sve512_vu8 a2, u8, sve512);

SVE

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_sve_vf64 a2, f64, sve);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_sve_vf32 a2, f32, sve);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_sve_vf16 a2, f16, sve);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_sve_vi64 a2, i64, sve);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_sve_vi32 a2, i32, sve);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_sve_vi16 a2, i16, sve);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_sve_vi8 a2, i8, sve);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_sve_vu64 a2, u64, sve);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_sve_vu32 a2, u32, sve);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_sve_vu16 a2, u16, sve);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_sve_vu8 a2, u8, sve);

CPU

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_cpu_vf64 a2, f64, cpu);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_cpu_vf32 a2, f32, cpu);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_cpu_vf16 a2, f16, cpu);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_cpu_vi64 a2, i64, cpu);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_cpu_vi32 a2, i32, cpu);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_cpu_vi16 a2, i16, cpu);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_cpu_vi8 a2, i8, cpu);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_cpu_vu64 a2, u64, cpu);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_cpu_vu32 a2, u32, cpu);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_cpu_vu16 a2, u16, cpu);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_cpu_vu8 a2, u8, cpu);

SVE2048

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_sve2048_vf64 a2, f64, sve2048);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_sve2048_vf32 a2, f32, sve2048);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_sve2048_vf16 a2, f16, sve2048);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_sve2048_vi64 a2, i64, sve2048);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_sve2048_vi32 a2, i32, sve2048);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_sve2048_vi16 a2, i16, sve2048);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_sve2048_vi8 a2, i8, sve2048);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_sve2048_vu64 a2, u64, sve2048);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_sve2048_vu32 a2, u32, sve2048);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_sve2048_vu16 a2, u16, sve2048);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_sve2048_vu8 a2, u8, sve2048);

NEON128

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_neon128_vf64 a2, f64, neon128);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_neon128_vf32 a2, f32, neon128);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_neon128_vf16 a2, f16, neon128);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_neon128_vi64 a2, i64, neon128);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_neon128_vi32 a2, i32, neon128);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_neon128_vi16 a2, i16, neon128);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_neon128_vi8 a2, i8, neon128);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_neon128_vu64 a2, u64, neon128);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_neon128_vu32 a2, u32, neon128);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_neon128_vu16 a2, u16, neon128);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_neon128_vu8 a2, u8, neon128);

AVX512_SKYLAKE

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_avx512_skylake_vf64 a2, f64, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_avx512_skylake_vf32 a2, f32, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_avx512_skylake_vf16 a2, f16, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_avx512_skylake_vi64 a2, i64, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_avx512_skylake_vi32 a2, i32, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_avx512_skylake_vi16 a2, i16, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_avx512_skylake_vi8 a2, i8, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_avx512_skylake_vu64 a2, u64, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_avx512_skylake_vu32 a2, u32, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_avx512_skylake_vu16 a2, u16, avx512_skylake);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_avx512_skylake_vu8 a2, u8, avx512_skylake);

AARCH64

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_aarch64_vf64 a2, f64, aarch64);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_aarch64_vf32 a2, f32, aarch64);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_aarch64_vf16 a2, f16, aarch64);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_aarch64_vi64 a2, i64, aarch64);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_aarch64_vi32 a2, i32, aarch64);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_aarch64_vi16 a2, i16, aarch64);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_aarch64_vi8 a2, i8, aarch64);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_aarch64_vu64 a2, u64, aarch64);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_aarch64_vu32 a2, u32, aarch64);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_aarch64_vu16 a2, u16, aarch64);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_aarch64_vu8 a2, u8, aarch64);

AVX512_KNL

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_avx512_knl_vf64 a2, f64, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_avx512_knl_vf32 a2, f32, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_avx512_knl_vf16 a2, f16, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_avx512_knl_vi64 a2, i64, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_avx512_knl_vi32 a2, i32, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_avx512_knl_vi16 a2, i16, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_avx512_knl_vi8 a2, i8, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_avx512_knl_vu64 a2, u64, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_avx512_knl_vu32 a2, u32, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_avx512_knl_vu16 a2, u16, avx512_knl);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_avx512_knl_vu8 a2, u8, avx512_knl);

SSE2

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_sse2_vf64 a2, f64, sse2);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_sse2_vf32 a2, f32, sse2);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_sse2_vf16 a2, f16, sse2);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_sse2_vi64 a2, i64, sse2);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_sse2_vi32 a2, i32, sse2);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_sse2_vi16 a2, i16, sse2);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_sse2_vi8 a2, i8, sse2);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_sse2_vu64 a2, u64, sse2);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_sse2_vu32 a2, u32, sse2);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_sse2_vu16 a2, u16, sse2);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_sse2_vu8 a2, u8, sse2);

SSE42

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_sse42_vf64 a2, f64, sse42);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_sse42_vf32 a2, f32, sse42);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_sse42_vf16 a2, f16, sse42);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_sse42_vi64 a2, i64, sse42);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_sse42_vi32 a2, i32, sse42);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_sse42_vi16 a2, i16, sse42);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_sse42_vi8 a2, i8, sse42);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_sse42_vu64 a2, u64, sse42);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_sse42_vu32 a2, u32, sse42);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_sse42_vu16 a2, u16, sse42);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_sse42_vu8 a2, u8, sse42);

SVE256

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_sve256_vf64 a2, f64, sve256);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_sve256_vf32 a2, f32, sve256);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_sve256_vf16 a2, f16, sve256);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_sve256_vi64 a2, i64, sve256);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_sve256_vi32 a2, i32, sve256);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_sve256_vi16 a2, i16, sve256);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_sve256_vi8 a2, i8, sve256);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_sve256_vu64 a2, u64, sve256);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_sve256_vu32 a2, u32, sve256);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_sve256_vu16 a2, u16, sve256);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_sve256_vu8 a2, u8, sve256);

SVE1024

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_sve1024_vf64 a2, f64, sve1024);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_sve1024_vf32 a2, f32, sve1024);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_sve1024_vf16 a2, f16, sve1024);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_sve1024_vi64 a2, i64, sve1024);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_sve1024_vi32 a2, i32, sve1024);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_sve1024_vi16 a2, i16, sve1024);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_sve1024_vi8 a2, i8, sve1024);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_sve1024_vu64 a2, u64, sve1024);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_sve1024_vu32 a2, u32, sve1024);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_sve1024_vu16 a2, u16, sve1024);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_sve1024_vu8 a2, u8, sve1024);

VSX

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_vsx_vf64 a2, f64, vsx);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_vsx_vf32 a2, f32, vsx);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_vsx_vf16 a2, f16, vsx);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_vsx_vi64 a2, i64, vsx);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_vsx_vi32 a2, i32, vsx);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_vsx_vi16 a2, i16, vsx);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_vsx_vi8 a2, i8, vsx);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_vsx_vu64 a2, u64, vsx);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_vsx_vu32 a2, u32, vsx);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_vsx_vu16 a2, u16, vsx);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_vsx_vu8 a2, u8, vsx);

SVE128

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_sve128_vf64 a2, f64, sve128);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_sve128_vf32 a2, f32, sve128);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_sve128_vf16 a2, f16, sve128);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_sve128_vi64 a2, i64, sve128);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_sve128_vi32 a2, i32, sve128);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_sve128_vi16 a2, i16, sve128);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_sve128_vi8 a2, i8, sve128);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_sve128_vu64 a2, u64, sve128);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_sve128_vu32 a2, u32, sve128);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_sve128_vu16 a2, u16, sve128);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_sve128_vu8 a2, u8, sve128);

VMX

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_vmx_vf64 a2, f64, vmx);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_vmx_vf32 a2, f32, vmx);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_vmx_vf16 a2, f16, vmx);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_vmx_vi64 a2, i64, vmx);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_vmx_vi32 a2, i32, vmx);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_vmx_vi16 a2, i16, vmx);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_vmx_vi8 a2, i8, vmx);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_vmx_vu64 a2, u64, vmx);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_vmx_vu32 a2, u32, vmx);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_vmx_vu16 a2, u16, vmx);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_vmx_vu8 a2, u8, vmx);

AVX

void NSIMD_VECTORCALL scatter_linear(f64* a0, int a1, nsimd_avx_vf64 a2, f64, avx);
void NSIMD_VECTORCALL scatter_linear(f32* a0, int a1, nsimd_avx_vf32 a2, f32, avx);
void NSIMD_VECTORCALL scatter_linear(f16* a0, int a1, nsimd_avx_vf16 a2, f16, avx);
void NSIMD_VECTORCALL scatter_linear(i64* a0, int a1, nsimd_avx_vi64 a2, i64, avx);
void NSIMD_VECTORCALL scatter_linear(i32* a0, int a1, nsimd_avx_vi32 a2, i32, avx);
void NSIMD_VECTORCALL scatter_linear(i16* a0, int a1, nsimd_avx_vi16 a2, i16, avx);
void NSIMD_VECTORCALL scatter_linear(i8* a0, int a1, nsimd_avx_vi8 a2, i8, avx);
void NSIMD_VECTORCALL scatter_linear(u64* a0, int a1, nsimd_avx_vu64 a2, u64, avx);
void NSIMD_VECTORCALL scatter_linear(u32* a0, int a1, nsimd_avx_vu32 a2, u32, avx);
void NSIMD_VECTORCALL scatter_linear(u16* a0, int a1, nsimd_avx_vu16 a2, u16, avx);
void NSIMD_VECTORCALL scatter_linear(u8* a0, int a1, nsimd_avx_vu8 a2, u8, avx);