Notations are as follows:
T
for trick usually using other intrinsics
E
for scalar emulation
NOOP
for no operation
NA
means the operator does not exist for the given type
intrinsic
for the actual wrapped intrinsic
abs on i8: _mm512_abs_epi8
abs on u8: NOOP
abs on i16: _mm512_abs_epi16
abs on u16: NOOP
abs on i32: _mm512_abs_epi32
abs on u32: NOOP
abs on i64: _mm512_abs_epi64
abs on u64: NOOP
abs on f16: T
abs on f32: T
abs on f64: T
add on i8: _mm512_add_epi8
add on u8: _mm512_add_epi8
add on i16: _mm512_add_epi16
add on u16: _mm512_add_epi16
add on i32: _mm512_add_epi32
add on u32: _mm512_add_epi32
add on i64: _mm512_add_epi64
add on u64: _mm512_add_epi64
add on f16: T
add on f32: _mm512_add_ps
add on f64: _mm512_add_pd
adds on i8: _mm512_adds_epi8
adds on u8: _mm512_adds_epu8
adds on i16: _mm512_adds_epi16
adds on u16: _mm512_adds_epu16
adds on i32: T
adds on u32: T
adds on i64: T
adds on u64: T
adds on f16: T
adds on f32: T
adds on f64: T
addv on i8: NA
addv on u8: NA
addv on i16: NA
addv on u16: NA
addv on i32: NA
addv on u32: NA
addv on i64: NA
addv on u64: NA
addv on f16: T
addv on f32: T
addv on f64: T
all on i8: T
all on u8: T
all on i16: T
all on u16: T
all on i32: T
all on u32: T
all on i64: T
all on u64: T
all on f16: T
all on f32: T
all on f64: T
andb on i8: _mm512_and_si512
andb on u8: _mm512_and_si512
andb on i16: _mm512_and_si512
andb on u16: _mm512_and_si512
andb on i32: _mm512_and_si512
andb on u32: _mm512_and_si512
andb on i64: _mm512_and_si512
andb on u64: _mm512_and_si512
andb on f16: T
andb on f32: _mm512_and_ps
andb on f64: _mm512_and_pd
andl on i8: T
andl on u8: T
andl on i16: T
andl on u16: T
andl on i32: T
andl on u32: T
andl on i64: T
andl on u64: T
andl on f16: T
andl on f32: T
andl on f64: T
andnotb on i8: _mm512_andnot_si512
andnotb on u8: _mm512_andnot_si512
andnotb on i16: _mm512_andnot_si512
andnotb on u16: _mm512_andnot_si512
andnotb on i32: _mm512_andnot_si512
andnotb on u32: _mm512_andnot_si512
andnotb on i64: _mm512_andnot_si512
andnotb on u64: _mm512_andnot_si512
andnotb on f16: T
andnotb on f32: _mm512_andnot_ps
andnotb on f64: _mm512_andnot_pd
andnotl on i8: T
andnotl on u8: T
andnotl on i16: T
andnotl on u16: T
andnotl on i32: T
andnotl on u32: T
andnotl on i64: T
andnotl on u64: T
andnotl on f16: T
andnotl on f32: T
andnotl on f64: T
any on i8: =
any on u8: =
any on i16: =
any on u16: =
any on i32: =
any on u32: =
any on i64: =
any on u64: =
any on f16: T
any on f32: =
any on f64: =
ceil on i8: NOOP
ceil on u8: NOOP
ceil on i16: NOOP
ceil on u16: NOOP
ceil on i32: NOOP
ceil on u32: NOOP
ceil on i64: NOOP
ceil on u64: NOOP
ceil on f16: T
ceil on f32: _mm512_ceil_ps
ceil on f64: _mm512_ceil_pd
div on i8: E
div on u8: E
div on i16: E
div on u16: E
div on i32: E
div on u32: E
div on i64: E
div on u64: E
div on f16: T
div on f32: _mm512_div_ps
div on f64: _mm512_div_pd
eq on i8: T
eq on u8: T
eq on i16: T
eq on u16: T
eq on i32: T
eq on u32: T
eq on i64: T
eq on u64: T
eq on f16: T
eq on f32: T
eq on f64: T
floor on i8: NOOP
floor on u8: NOOP
floor on i16: NOOP
floor on u16: NOOP
floor on i32: NOOP
floor on u32: NOOP
floor on i64: NOOP
floor on u64: NOOP
floor on f16: T
floor on f32: _mm512_floor_ps
floor on f64: _mm512_floor_pd
fma on i8: T
fma on u8: T
fma on i16: T
fma on u16: T
fma on i32: T
fma on u32: T
fma on i64: T
fma on u64: T
fma on f16: T
fma on f32: _mm512_fmadd_ps
fma on f64: _mm512_fmadd_pd
fms on i8: T
fms on u8: T
fms on i16: T
fms on u16: T
fms on i32: T
fms on u32: T
fms on i64: T
fms on u64: T
fms on f16: T
fms on f32: _mm512_fmsub_ps
fms on f64: _mm512_fmsub_pd
fnma on i8: T
fnma on u8: T
fnma on i16: T
fnma on u16: T
fnma on i32: T
fnma on u32: T
fnma on i64: T
fnma on u64: T
fnma on f16: T
fnma on f32: _mm512_fnmadd_ps
fnma on f64: _mm512_fnmadd_pd
fnms on i8: T
fnms on u8: T
fnms on i16: T
fnms on u16: T
fnms on i32: T
fnms on u32: T
fnms on i64: T
fnms on u64: T
fnms on f16: T
fnms on f32: _mm512_fnmsub_ps
fnms on f64: _mm512_fnmsub_pd
gather on i8: NA
gather on u8: NA
gather on i16: E
gather on u16: E
gather on i32: _mm512_i32gather_epi32
gather on u32: _mm512_i32gather_epi32
gather on i64: _mm512_i64gather_epi64
gather on u64: _mm512_i64gather_epi64
gather on f16: E
gather on f32: _mm512_i32gather_ps
gather on f64: _mm512_i64gather_pd
gather_linear on i8: T
gather_linear on u8: T
gather_linear on i16: T
gather_linear on u16: T
gather_linear on i32: T
gather_linear on u32: T
gather_linear on i64: T
gather_linear on u64: T
gather_linear on f16: E
gather_linear on f32: T
gather_linear on f64: T
ge on i8: T
ge on u8: T
ge on i16: T
ge on u16: T
ge on i32: T
ge on u32: T
ge on i64: T
ge on u64: T
ge on f16: T
ge on f32: T
ge on f64: T
gt on i8: T
gt on u8: T
gt on i16: T
gt on u16: T
gt on i32: T
gt on u32: T
gt on i64: T
gt on u64: T
gt on f16: T
gt on f32: T
gt on f64: T
if_else1 on i8: _mm512_mask_blend_epi8
if_else1 on u8: _mm512_mask_blend_epi8
if_else1 on i16: _mm512_mask_blend_epi16
if_else1 on u16: _mm512_mask_blend_epi16
if_else1 on i32: _mm512_mask_blend_epi32
if_else1 on u32: _mm512_mask_blend_epi32
if_else1 on i64: _mm512_mask_blend_epi64
if_else1 on u64: _mm512_mask_blend_epi64
if_else1 on f16: T
if_else1 on f32: _mm512_mask_blend_ps
if_else1 on f64: _mm512_mask_blend_pd
iota on i8: T
iota on u8: T
iota on i16: T
iota on u16: T
iota on i32: T
iota on u32: T
iota on i64: T
iota on u64: T
iota on f16: T
iota on f32: T
iota on f64: T
le on i8: T
le on u8: T
le on i16: T
le on u16: T
le on i32: T
le on u32: T
le on i64: T
le on u64: T
le on f16: T
le on f32: T
le on f64: T
len on i8: NOOP
len on u8: NOOP
len on i16: NOOP
len on u16: NOOP
len on i32: NOOP
len on u32: NOOP
len on i64: NOOP
len on u64: NOOP
len on f16: NOOP
len on f32: NOOP
len on f64: NOOP
load2a on i8: T
load2a on u8: T
load2a on i16: T
load2a on u16: T
load2a on i32: T
load2a on u32: T
load2a on i64: T
load2a on u64: T
load2a on f16: T
load2a on f32: T
load2a on f64: T
load2u on i8: T
load2u on u8: T
load2u on i16: T
load2u on u16: T
load2u on i32: T
load2u on u32: T
load2u on i64: T
load2u on u64: T
load2u on f16: T
load2u on f32: T
load2u on f64: T
load3a on i8: T
load3a on u8: T
load3a on i16: T
load3a on u16: T
load3a on i32: T
load3a on u32: T
load3a on i64: T
load3a on u64: T
load3a on f16: T
load3a on f32: T
load3a on f64: T
load3u on i8: T
load3u on u8: T
load3u on i16: T
load3u on u16: T
load3u on i32: T
load3u on u32: T
load3u on i64: T
load3u on u64: T
load3u on f16: T
load3u on f32: T
load3u on f64: T
load4a on i8: T
load4a on u8: T
load4a on i16: T
load4a on u16: T
load4a on i32: T
load4a on u32: T
load4a on i64: T
load4a on u64: T
load4a on f16: T
load4a on f32: T
load4a on f64: T
load4u on i8: T
load4u on u8: T
load4u on i16: T
load4u on u16: T
load4u on i32: T
load4u on u32: T
load4u on i64: T
load4u on u64: T
load4u on f16: T
load4u on f32: T
load4u on f64: T
loada on i8: T
loada on u8: T
loada on i16: T
loada on u16: T
loada on i32: T
loada on u32: T
loada on i64: T
loada on u64: T
loada on f16: T
loada on f32: _mm512_load_ps
loada on f64: _mm512_load_pd
loadla on i8: E
loadla on u8: E
loadla on i16: E
loadla on u16: E
loadla on i32: E
loadla on u32: E
loadla on i64: E
loadla on u64: E
loadla on f16: T
loadla on f32: E
loadla on f64: E
loadlu on i8: E
loadlu on u8: E
loadlu on i16: E
loadlu on u16: E
loadlu on i32: E
loadlu on u32: E
loadlu on i64: E
loadlu on u64: E
loadlu on f16: T
loadlu on f32: E
loadlu on f64: E
loadu on i8: T
loadu on u8: T
loadu on i16: T
loadu on u16: T
loadu on i32: T
loadu on u32: T
loadu on i64: T
loadu on u64: T
loadu on f16: T
loadu on f32: _mm512_loadu_ps
loadu on f64: _mm512_loadu_pd
lt on i8: T
lt on u8: T
lt on i16: T
lt on u16: T
lt on i32: T
lt on u32: T
lt on i64: T
lt on u64: T
lt on f16: T
lt on f32: T
lt on f64: T
mask_for_loop_tail on i8: T
mask_for_loop_tail on u8: T
mask_for_loop_tail on i16: T
mask_for_loop_tail on u16: T
mask_for_loop_tail on i32: T
mask_for_loop_tail on u32: T
mask_for_loop_tail on i64: T
mask_for_loop_tail on u64: T
mask_for_loop_tail on f16: T
mask_for_loop_tail on f32: T
mask_for_loop_tail on f64: T
mask_storea1 on i8: _mm512_mask_storeu_epi8
mask_storea1 on u8: _mm512_mask_storeu_epi8
mask_storea1 on i16: _mm512_mask_storeu_epi16
mask_storea1 on u16: _mm512_mask_storeu_epi16
mask_storea1 on i32: _mm512_mask_store_epi32
mask_storea1 on u32: _mm512_mask_store_epi32
mask_storea1 on i64: _mm512_mask_store_epi64
mask_storea1 on u64: _mm512_mask_store_epi64
mask_storea1 on f16: E
mask_storea1 on f32: _mm512_mask_store_ps
mask_storea1 on f64: _mm512_mask_store_pd
mask_storeu1 on i8: _mm512_mask_storeu_epi8
mask_storeu1 on u8: _mm512_mask_storeu_epi8
mask_storeu1 on i16: _mm512_mask_storeu_epi16
mask_storeu1 on u16: _mm512_mask_storeu_epi16
mask_storeu1 on i32: _mm512_mask_storeu_epi32
mask_storeu1 on u32: _mm512_mask_storeu_epi32
mask_storeu1 on i64: _mm512_mask_storeu_epi64
mask_storeu1 on u64: _mm512_mask_storeu_epi64
mask_storeu1 on f16: E
mask_storeu1 on f32: _mm512_mask_storeu_ps
mask_storeu1 on f64: _mm512_mask_storeu_pd
masko_loada1 on i8: _mm512_mask_loadu_epi8
masko_loada1 on u8: _mm512_mask_loadu_epi8
masko_loada1 on i16: _mm512_mask_loadu_epi16
masko_loada1 on u16: _mm512_mask_loadu_epi16
masko_loada1 on i32: _mm512_mask_load_epi32
masko_loada1 on u32: _mm512_mask_load_epi32
masko_loada1 on i64: _mm512_mask_load_epi64
masko_loada1 on u64: _mm512_mask_load_epi64
masko_loada1 on f16: E
masko_loada1 on f32: _mm512_mask_load_ps
masko_loada1 on f64: _mm512_mask_load_pd
masko_loadu1 on i8: _mm512_mask_loadu_epi8
masko_loadu1 on u8: _mm512_mask_loadu_epi8
masko_loadu1 on i16: _mm512_mask_loadu_epi16
masko_loadu1 on u16: _mm512_mask_loadu_epi16
masko_loadu1 on i32: _mm512_mask_loadu_epi32
masko_loadu1 on u32: _mm512_mask_loadu_epi32
masko_loadu1 on i64: _mm512_mask_loadu_epi64
masko_loadu1 on u64: _mm512_mask_loadu_epi64
masko_loadu1 on f16: E
masko_loadu1 on f32: _mm512_mask_loadu_ps
masko_loadu1 on f64: _mm512_mask_loadu_pd
maskz_loada1 on i8: _mm512_maskz_loadu_epi8
maskz_loada1 on u8: _mm512_maskz_loadu_epi8
maskz_loada1 on i16: _mm512_maskz_loadu_epi16
maskz_loada1 on u16: _mm512_maskz_loadu_epi16
maskz_loada1 on i32: _mm512_maskz_load_epi32
maskz_loada1 on u32: _mm512_maskz_load_epi32
maskz_loada1 on i64: _mm512_maskz_load_epi64
maskz_loada1 on u64: _mm512_maskz_load_epi64
maskz_loada1 on f16: E
maskz_loada1 on f32: _mm512_maskz_load_ps
maskz_loada1 on f64: _mm512_maskz_load_pd
maskz_loadu1 on i8: _mm512_maskz_loadu_epi8
maskz_loadu1 on u8: _mm512_maskz_loadu_epi8
maskz_loadu1 on i16: _mm512_maskz_loadu_epi16
maskz_loadu1 on u16: _mm512_maskz_loadu_epi16
maskz_loadu1 on i32: _mm512_maskz_loadu_epi32
maskz_loadu1 on u32: _mm512_maskz_loadu_epi32
maskz_loadu1 on i64: _mm512_maskz_loadu_epi64
maskz_loadu1 on u64: _mm512_maskz_loadu_epi64
maskz_loadu1 on f16: E
maskz_loadu1 on f32: _mm512_maskz_loadu_ps
maskz_loadu1 on f64: _mm512_maskz_loadu_pd
max on i8: _mm512_max_epi8
max on u8: _mm512_max_epu8
max on i16: _mm512_max_epi16
max on u16: _mm512_max_epu16
max on i32: _mm512_max_epi32
max on u32: _mm512_max_epu32
max on i64: _mm512_max_epi64
max on u64: _mm512_max_epu64
max on f16: T
max on f32: _mm512_max_ps
max on f64: _mm512_max_pd
min on i8: _mm512_min_epi8
min on u8: _mm512_min_epu8
min on i16: _mm512_min_epi16
min on u16: _mm512_min_epu16
min on i32: _mm512_min_epi32
min on u32: _mm512_min_epu32
min on i64: _mm512_min_epi64
min on u64: _mm512_min_epu64
min on f16: T
min on f32: _mm512_min_ps
min on f64: _mm512_min_pd
mul on i8: T
mul on u8: T
mul on i16: _mm512_mullo_epi16
mul on u16: _mm512_mullo_epi16
mul on i32: _mm512_mullo_epi32
mul on u32: _mm512_mullo_epi32
mul on i64: _mm512_mullo_epi64
mul on u64: _mm512_mullo_epi64
mul on f16: T
mul on f32: _mm512_mul_ps
mul on f64: _mm512_mul_pd
nbtrue on i8: T
nbtrue on u8: T
nbtrue on i16: T
nbtrue on u16: T
nbtrue on i32: T
nbtrue on u32: T
nbtrue on i64: T
nbtrue on u64: T
nbtrue on f16: T
nbtrue on f32: T
nbtrue on f64: T
ne on i8: T
ne on u8: T
ne on i16: T
ne on u16: T
ne on i32: T
ne on u32: T
ne on i64: T
ne on u64: T
ne on f16: T
ne on f32: T
ne on f64: T
neg on i8: T
neg on u8: T
neg on i16: T
neg on u16: T
neg on i32: T
neg on u32: T
neg on i64: T
neg on u64: T
neg on f16: T
neg on f32: T
neg on f64: T
notb on i8: T
notb on u8: T
notb on i16: T
notb on u16: T
notb on i32: T
notb on u32: T
notb on i64: T
notb on u64: T
notb on f16: T
notb on f32: T
notb on f64: T
notl on i8: T
notl on u8: T
notl on i16: T
notl on u16: T
notl on i32: T
notl on u32: T
notl on i64: T
notl on u64: T
notl on f16: T
notl on f32: T
notl on f64: T
orb on i8: _mm512_or_si512
orb on u8: _mm512_or_si512
orb on i16: _mm512_or_si512
orb on u16: _mm512_or_si512
orb on i32: _mm512_or_si512
orb on u32: _mm512_or_si512
orb on i64: _mm512_or_si512
orb on u64: _mm512_or_si512
orb on f16: T
orb on f32: _mm512_or_ps
orb on f64: _mm512_or_pd
orl on i8: T
orl on u8: T
orl on i16: T
orl on u16: T
orl on i32: T
orl on u32: T
orl on i64: T
orl on u64: T
orl on f16: T
orl on f32: T
orl on f64: T
rec on i8: NA
rec on u8: NA
rec on i16: NA
rec on u16: NA
rec on i32: NA
rec on u32: NA
rec on i64: NA
rec on u64: NA
rec on f16: T
rec on f32: T
rec on f64: T
rec11 on i8: NA
rec11 on u8: NA
rec11 on i16: NA
rec11 on u16: NA
rec11 on i32: NA
rec11 on u32: NA
rec11 on i64: NA
rec11 on u64: NA
rec11 on f16: T
rec11 on f32: _mm512_rcp14_ps
rec11 on f64: _mm512_rcp14_pd
rec8 on i8: NA
rec8 on u8: NA
rec8 on i16: NA
rec8 on u16: NA
rec8 on i32: NA
rec8 on u32: NA
rec8 on i64: NA
rec8 on u64: NA
rec8 on f16: T
rec8 on f32: _mm512_rcp14_ps
rec8 on f64: _mm512_rcp14_pd
round_to_even on i8: NOOP
round_to_even on u8: NOOP
round_to_even on i16: NOOP
round_to_even on u16: NOOP
round_to_even on i32: NOOP
round_to_even on u32: NOOP
round_to_even on i64: NOOP
round_to_even on u64: NOOP
round_to_even on f16: T
round_to_even on f32: _mm512_roundscale_ps
round_to_even on f64: _mm512_roundscale_pd
rsqrt11 on i8: NA
rsqrt11 on u8: NA
rsqrt11 on i16: NA
rsqrt11 on u16: NA
rsqrt11 on i32: NA
rsqrt11 on u32: NA
rsqrt11 on i64: NA
rsqrt11 on u64: NA
rsqrt11 on f16: T
rsqrt11 on f32: _mm512_rsqrt14_ps
rsqrt11 on f64: _mm512_rsqrt14_pd
rsqrt8 on i8: NA
rsqrt8 on u8: NA
rsqrt8 on i16: NA
rsqrt8 on u16: NA
rsqrt8 on i32: NA
rsqrt8 on u32: NA
rsqrt8 on i64: NA
rsqrt8 on u64: NA
rsqrt8 on f16: T
rsqrt8 on f32: _mm512_rsqrt14_ps
rsqrt8 on f64: _mm512_rsqrt14_pd
scatter on i8: NA
scatter on u8: NA
scatter on i16: E
scatter on u16: E
scatter on i32: _mm512_i32scatter_epi32
scatter on u32: _mm512_i32scatter_epi32
scatter on i64: _mm512_i64scatter_epi64
scatter on u64: _mm512_i64scatter_epi64
scatter on f16: E
scatter on f32: _mm512_i32scatter_ps
scatter on f64: _mm512_i64scatter_pd
scatter_linear on i8: T
scatter_linear on u8: T
scatter_linear on i16: T
scatter_linear on u16: T
scatter_linear on i32: T
scatter_linear on u32: T
scatter_linear on i64: T
scatter_linear on u64: T
scatter_linear on f16: E
scatter_linear on f32: T
scatter_linear on f64: T
set1 on i8: _mm512_set1_epi8
set1 on u8: T
set1 on i16: _mm512_set1_epi16
set1 on u16: T
set1 on i32: _mm512_set1_epi32
set1 on u32: T
set1 on i64: _mm512_set1_epi64
set1 on u64: T
set1 on f16: T
set1 on f32: _mm512_set1_ps
set1 on f64: _mm512_set1_pd
set1l on i8: T
set1l on u8: T
set1l on i16: T
set1l on u16: T
set1l on i32: T
set1l on u32: T
set1l on i64: T
set1l on u64: T
set1l on f16: T
set1l on f32: T
set1l on f64: T
shl on i8: T
shl on u8: T
shl on i16: T
shl on u16: T
shl on i32: T
shl on u32: T
shl on i64: T
shl on u64: T
shl on f16: NA
shl on f32: NA
shl on f64: NA
shr on i8: T
shr on u8: T
shr on i16: T
shr on u16: T
shr on i32: T
shr on u32: T
shr on i64: T
shr on u64: T
shr on f16: NA
shr on f32: NA
shr on f64: NA
shra on i8: T
shra on u8: T
shra on i16: T
shra on u16: T
shra on i32: T
shra on u32: T
shra on i64: T
shra on u64: T
shra on f16: NA
shra on f32: NA
shra on f64: NA
sqrt on i8: NA
sqrt on u8: NA
sqrt on i16: NA
sqrt on u16: NA
sqrt on i32: NA
sqrt on u32: NA
sqrt on i64: NA
sqrt on u64: NA
sqrt on f16: T
sqrt on f32: _mm512_sqrt_ps
sqrt on f64: _mm512_sqrt_pd
store2a on i8: T
store2a on u8: T
store2a on i16: T
store2a on u16: T
store2a on i32: T
store2a on u32: T
store2a on i64: T
store2a on u64: T
store2a on f16: T
store2a on f32: T
store2a on f64: T
store2u on i8: T
store2u on u8: T
store2u on i16: T
store2u on u16: T
store2u on i32: T
store2u on u32: T
store2u on i64: T
store2u on u64: T
store2u on f16: T
store2u on f32: T
store2u on f64: T
store3a on i8: T
store3a on u8: T
store3a on i16: T
store3a on u16: T
store3a on i32: T
store3a on u32: T
store3a on i64: T
store3a on u64: T
store3a on f16: T
store3a on f32: T
store3a on f64: T
store3u on i8: T
store3u on u8: T
store3u on i16: T
store3u on u16: T
store3u on i32: T
store3u on u32: T
store3u on i64: T
store3u on u64: T
store3u on f16: T
store3u on f32: T
store3u on f64: T
store4a on i8: T
store4a on u8: T
store4a on i16: T
store4a on u16: T
store4a on i32: T
store4a on u32: T
store4a on i64: T
store4a on u64: T
store4a on f16: T
store4a on f32: T
store4a on f64: T
store4u on i8: T
store4u on u8: T
store4u on i16: T
store4u on u16: T
store4u on i32: T
store4u on u32: T
store4u on i64: T
store4u on u64: T
store4u on f16: T
store4u on f32: T
store4u on f64: T
storea on i8: T
storea on u8: T
storea on i16: T
storea on u16: T
storea on i32: T
storea on u32: T
storea on i64: T
storea on u64: T
storea on f16: T
storea on f32: _mm512_store_ps
storea on f64: _mm512_store_pd
storela on i8: E
storela on u8: E
storela on i16: E
storela on u16: E
storela on i32: E
storela on u32: E
storela on i64: E
storela on u64: E
storela on f16: E
storela on f32: E
storela on f64: E
storelu on i8: E
storelu on u8: E
storelu on i16: E
storelu on u16: E
storelu on i32: E
storelu on u32: E
storelu on i64: E
storelu on u64: E
storelu on f16: E
storelu on f32: E
storelu on f64: E
storeu on i8: T
storeu on u8: T
storeu on i16: T
storeu on u16: T
storeu on i32: T
storeu on u32: T
storeu on i64: T
storeu on u64: T
storeu on f16: T
storeu on f32: _mm512_storeu_ps
storeu on f64: _mm512_storeu_pd
sub on i8: _mm512_sub_epi8
sub on u8: _mm512_sub_epi8
sub on i16: _mm512_sub_epi16
sub on u16: _mm512_sub_epi16
sub on i32: _mm512_sub_epi32
sub on u32: _mm512_sub_epi32
sub on i64: _mm512_sub_epi64
sub on u64: _mm512_sub_epi64
sub on f16: T
sub on f32: _mm512_sub_ps
sub on f64: _mm512_sub_pd
subs on i8: _mm512_subs_epi8
subs on u8: _mm512_subs_epu8
subs on i16: _mm512_subs_epi16
subs on u16: _mm512_subs_epu16
subs on i32: T
subs on u32: E
subs on i64: T
subs on u64: E
subs on f16: T
subs on f32: T
subs on f64: T
to_logical on i8: T
to_logical on u8: T
to_logical on i16: T
to_logical on u16: T
to_logical on i32: T
to_logical on u32: T
to_logical on i64: T
to_logical on u64: T
to_logical on f16: T
to_logical on f32: T
to_logical on f64: T
to_mask on i8: _mm512_movm_epi8
to_mask on u8: _mm512_movm_epi8
to_mask on i16: _mm512_movm_epi16
to_mask on u16: _mm512_movm_epi16
to_mask on i32: _mm512_movm_epi32
to_mask on u32: _mm512_movm_epi32
to_mask on i64: _mm512_movm_epi64
to_mask on u64: _mm512_movm_epi64
to_mask on f16: T
to_mask on f32: _mm512_movm_epi32
to_mask on f64: _mm512_movm_epi64
trunc on i8: NOOP
trunc on u8: NOOP
trunc on i16: NOOP
trunc on u16: NOOP
trunc on i32: NOOP
trunc on u32: NOOP
trunc on i64: NOOP
trunc on u64: NOOP
trunc on f16: T
trunc on f32: T
trunc on f64: T
unzip on i8: T
unzip on u8: T
unzip on i16: T
unzip on u16: T
unzip on i32: T
unzip on u32: T
unzip on i64: T
unzip on u64: T
unzip on f16: T
unzip on f32: T
unzip on f64: T
unziphi on i8: T
unziphi on u8: T
unziphi on i16: T
unziphi on u16: T
unziphi on i32: T
unziphi on u32: T
unziphi on i64: T
unziphi on u64: T
unziphi on f16: T
unziphi on f32: T
unziphi on f64: T
unziplo on i8: T
unziplo on u8: T
unziplo on i16: T
unziplo on u16: T
unziplo on i32: T
unziplo on u32: T
unziplo on i64: T
unziplo on u64: T
unziplo on f16: T
unziplo on f32: T
unziplo on f64: T
xorb on i8: _mm512_xor_si512
xorb on u8: _mm512_xor_si512
xorb on i16: _mm512_xor_si512
xorb on u16: _mm512_xor_si512
xorb on i32: _mm512_xor_si512
xorb on u32: _mm512_xor_si512
xorb on i64: _mm512_xor_si512
xorb on u64: _mm512_xor_si512
xorb on f16: T
xorb on f32: _mm512_xor_ps
xorb on f64: _mm512_xor_pd
xorl on i8: T
xorl on u8: T
xorl on i16: T
xorl on u16: T
xorl on i32: T
xorl on u32: T
xorl on i64: T
xorl on u64: T
xorl on f16: T
xorl on f32: T
xorl on f64: T
zip on i8: T
zip on u8: T
zip on i16: T
zip on u16: T
zip on i32: T
zip on u32: T
zip on i64: T
zip on u64: T
zip on f16: T
zip on f32: T
zip on f64: T
ziphi on i8: T
ziphi on u8: T
ziphi on i16: T
ziphi on u16: T
ziphi on i32: T
ziphi on u32: T
ziphi on i64: T
ziphi on u64: T
ziphi on f16: T
ziphi on f32: T
ziphi on f64: T
ziplo on i8: T
ziplo on u8: T
ziplo on i16: T
ziplo on u16: T
ziplo on i32: T
ziplo on u32: T
ziplo on i64: T
ziplo on u64: T
ziplo on f16: T
ziplo on f32: T
ziplo on f64: T
cvt from i8 to i8: NOOP
cvt from i8 to u8: NOOP
cvt from u8 to i8: NOOP
cvt from u8 to u8: NOOP
cvt from i16 to i16: NOOP
cvt from i16 to u16: NOOP
cvt from i16 to f16: T
cvt from u16 to i16: NOOP
cvt from u16 to u16: NOOP
cvt from u16 to f16: T
cvt from i32 to i32: NOOP
cvt from i32 to u32: NOOP
cvt from i32 to f32: _mm512_cvtepi32_ps
cvt from u32 to i32: NOOP
cvt from u32 to u32: NOOP
cvt from u32 to f32: _mm512_cvtepu32_ps
cvt from i64 to i64: NOOP
cvt from i64 to u64: NOOP
cvt from i64 to f64: _mm512_cvtepi64_pd
cvt from u64 to i64: NOOP
cvt from u64 to u64: NOOP
cvt from u64 to f64: _mm512_cvtepi64_pd
cvt from f16 to i16: E
cvt from f16 to u16: E
cvt from f16 to f16: NOOP
cvt from f32 to i32: _mm512_cvtps_epi32
cvt from f32 to u32: _mm512_cvtps_epu32
cvt from f32 to f32: NOOP
cvt from f64 to i64: _mm512_cvtpd_epi64
cvt from f64 to u64: _mm512_cvtpd_epi64
cvt from f64 to f64: NOOP
reinterpret from i8 to i8: NOOP
reinterpret from i8 to u8: NOOP
reinterpret from u8 to i8: NOOP
reinterpret from u8 to u8: NOOP
reinterpret from i16 to i16: NOOP
reinterpret from i16 to u16: NOOP
reinterpret from i16 to f16: T
reinterpret from u16 to i16: NOOP
reinterpret from u16 to u16: NOOP
reinterpret from u16 to f16: T
reinterpret from i32 to i32: NOOP
reinterpret from i32 to u32: NOOP
reinterpret from i32 to f32: NOOP
reinterpret from u32 to i32: NOOP
reinterpret from u32 to u32: NOOP
reinterpret from u32 to f32: NOOP
reinterpret from i64 to i64: NOOP
reinterpret from i64 to u64: NOOP
reinterpret from i64 to f64: NOOP
reinterpret from u64 to i64: NOOP
reinterpret from u64 to u64: NOOP
reinterpret from u64 to f64: NOOP
reinterpret from f16 to i16: T
reinterpret from f16 to u16: T
reinterpret from f16 to f16: NOOP
reinterpret from f32 to i32: NOOP
reinterpret from f32 to u32: NOOP
reinterpret from f32 to f32: NOOP
reinterpret from f64 to i64: NOOP
reinterpret from f64 to u64: NOOP
reinterpret from f64 to f64: NOOP
reinterpretl from i8 to i8: NOOP
reinterpretl from i8 to u8: NOOP
reinterpretl from u8 to i8: NOOP
reinterpretl from u8 to u8: NOOP
reinterpretl from i16 to i16: NOOP
reinterpretl from i16 to u16: NOOP
reinterpretl from i16 to f16: T
reinterpretl from u16 to i16: NOOP
reinterpretl from u16 to u16: NOOP
reinterpretl from u16 to f16: T
reinterpretl from i32 to i32: NOOP
reinterpretl from i32 to u32: NOOP
reinterpretl from i32 to f32: NOOP
reinterpretl from u32 to i32: NOOP
reinterpretl from u32 to u32: NOOP
reinterpretl from u32 to f32: NOOP
reinterpretl from i64 to i64: NOOP
reinterpretl from i64 to u64: NOOP
reinterpretl from i64 to f64: NOOP
reinterpretl from u64 to i64: NOOP
reinterpretl from u64 to u64: NOOP
reinterpretl from u64 to f64: NOOP
reinterpretl from f16 to i16: T
reinterpretl from f16 to u16: T
reinterpretl from f16 to f16: NOOP
reinterpretl from f32 to i32: NOOP
reinterpretl from f32 to u32: NOOP
reinterpretl from f32 to f32: NOOP
reinterpretl from f64 to i64: NOOP
reinterpretl from f64 to u64: NOOP
reinterpretl from f64 to f64: NOOP
upcvt from i8 to i16: T
upcvt from i8 to u16: T
upcvt from i8 to f16: T
upcvt from u8 to i16: T
upcvt from u8 to u16: T
upcvt from u8 to f16: T
upcvt from i16 to i32: T
upcvt from i16 to u32: T
upcvt from i16 to f32: T
upcvt from u16 to i32: T
upcvt from u16 to u32: T
upcvt from u16 to f32: T
upcvt from i32 to i64: T
upcvt from i32 to u64: T
upcvt from i32 to f64: T
upcvt from u32 to i64: T
upcvt from u32 to u64: T
upcvt from u32 to f64: T
upcvt from f16 to i32: T
upcvt from f16 to u32: T
upcvt from f16 to f32: T
upcvt from f32 to i64: T
upcvt from f32 to u64: T
upcvt from f32 to f64: T
downcvt from i16 to i8: T
downcvt from i16 to u8: E
downcvt from u16 to i8: E
downcvt from u16 to u8: E
downcvt from i32 to i16: T
downcvt from i32 to u16: E
downcvt from i32 to f16: T
downcvt from u32 to i16: E
downcvt from u32 to u16: E
downcvt from u32 to f16: T
downcvt from i64 to i32: T
downcvt from i64 to u32: E
downcvt from i64 to f32: T
downcvt from u64 to i32: E
downcvt from u64 to u32: E
downcvt from u64 to f32: E
downcvt from f16 to i8: E
downcvt from f16 to u8: E
downcvt from f32 to i16: E
downcvt from f32 to u16: E
downcvt from f32 to f16: T
downcvt from f64 to i32: E
downcvt from f64 to u32: E
downcvt from f64 to f32: T