Notations are as follows:
T
for trick usually using other intrinsics
E
for scalar emulation
NOOP
for no operation
NA
means the operator does not exist for the given type
intrinsic
for the actual wrapped intrinsic
abs on i8: vec_abs
abs on u8: NOOP
abs on i16: vec_abs
abs on u16: NOOP
abs on i32: vec_abs
abs on u32: NOOP
abs on i64: T
abs on u64: NOOP
abs on f16: T
abs on f32: vec_abs
abs on f64: T
add on i8: vec_add
add on u8: vec_add
add on i16: vec_add
add on u16: vec_add
add on i32: vec_add
add on u32: vec_add
add on i64: T
add on u64: T
add on f16: T
add on f32: vec_add
add on f64: T
adds on i8: vec_adds
adds on u8: vec_adds
adds on i16: vec_adds
adds on u16: vec_adds
adds on i32: vec_adds
adds on u32: vec_adds
adds on i64: T
adds on u64: T
adds on f16: T
adds on f32: vec_add
adds on f64: T
addv on i8: NA
addv on u8: NA
addv on i16: NA
addv on u16: NA
addv on i32: NA
addv on u32: NA
addv on i64: NA
addv on u64: NA
addv on f16: T
addv on f32: T
addv on f64: T
all on i8: T
all on u8: T
all on i16: T
all on u16: T
all on i32: T
all on u32: T
all on i64: T
all on u64: T
all on f16: T
all on f32: T
all on f64: T
andb on i8: vec_and
andb on u8: vec_and
andb on i16: vec_and
andb on u16: vec_and
andb on i32: vec_and
andb on u32: vec_and
andb on i64: T
andb on u64: T
andb on f16: T
andb on f32: vec_and
andb on f64: T
andl on i8: vec_and
andl on u8: vec_and
andl on i16: vec_and
andl on u16: vec_and
andl on i32: vec_and
andl on u32: vec_and
andl on i64: T
andl on u64: T
andl on f16: T
andl on f32: vec_and
andl on f64: T
andnotb on i8: vec_andc
andnotb on u8: vec_andc
andnotb on i16: vec_andc
andnotb on u16: vec_andc
andnotb on i32: vec_andc
andnotb on u32: vec_andc
andnotb on i64: T
andnotb on u64: T
andnotb on f16: T
andnotb on f32: vec_andc
andnotb on f64: T
andnotl on i8: vec_andc
andnotl on u8: vec_andc
andnotl on i16: vec_andc
andnotl on u16: vec_andc
andnotl on i32: vec_andc
andnotl on u32: vec_andc
andnotl on i64: T
andnotl on u64: T
andnotl on f16: T
andnotl on f32: vec_andc
andnotl on f64: T
any on i8: T
any on u8: T
any on i16: T
any on u16: T
any on i32: T
any on u32: T
any on i64: T
any on u64: T
any on f16: T
any on f32: T
any on f64: T
ceil on i8: NOOP
ceil on u8: NOOP
ceil on i16: NOOP
ceil on u16: NOOP
ceil on i32: NOOP
ceil on u32: NOOP
ceil on i64: NOOP
ceil on u64: NOOP
ceil on f16: T
ceil on f32: vec_ceil
ceil on f64: T
div on i8: T
div on u8: T
div on i16: T
div on u16: T
div on i32: T
div on u32: T
div on i64: T
div on u64: T
div on f16: T
div on f32: vec_div
div on f64: T
eq on i8: vec_cmpeq
eq on u8: vec_cmpeq
eq on i16: vec_cmpeq
eq on u16: vec_cmpeq
eq on i32: vec_cmpeq
eq on u32: vec_cmpeq
eq on i64: T
eq on u64: T
eq on f16: T
eq on f32: T
eq on f64: T
floor on i8: NOOP
floor on u8: NOOP
floor on i16: NOOP
floor on u16: NOOP
floor on i32: NOOP
floor on u32: NOOP
floor on i64: NOOP
floor on u64: NOOP
floor on f16: T
floor on f32: vec_floor
floor on f64: T
fma on i8: T
fma on u8: T
fma on i16: T
fma on u16: T
fma on i32: T
fma on u32: T
fma on i64: T
fma on u64: T
fma on f16: T
fma on f32: vec_madd
fma on f64: T
fms on i8: T
fms on u8: T
fms on i16: T
fms on u16: T
fms on i32: T
fms on u32: T
fms on i64: T
fms on u64: T
fms on f16: T
fms on f32: vec_msub
fms on f64: T
fnma on i8: T
fnma on u8: T
fnma on i16: T
fnma on u16: T
fnma on i32: T
fnma on u32: T
fnma on i64: T
fnma on u64: T
fnma on f16: T
fnma on f32: vec_nmsub
fnma on f64: T
fnms on i8: T
fnms on u8: T
fnms on i16: T
fnms on u16: T
fnms on i32: T
fnms on u32: T
fnms on i64: T
fnms on u64: T
fnms on f16: T
fnms on f32: vec_nmadd
fnms on f64: T
gather on i8: NA
gather on u8: NA
gather on i16: T
gather on u16: T
gather on i32: T
gather on u32: T
gather on i64: T
gather on u64: T
gather on f16: T
gather on f32: T
gather on f64: T
gather_linear on i8: T
gather_linear on u8: T
gather_linear on i16: T
gather_linear on u16: T
gather_linear on i32: T
gather_linear on u32: T
gather_linear on i64: T
gather_linear on u64: T
gather_linear on f16: T
gather_linear on f32: T
gather_linear on f64: T
ge on i8: vec_cmpge
ge on u8: vec_cmpge
ge on i16: vec_cmpge
ge on u16: vec_cmpge
ge on i32: vec_cmpge
ge on u32: vec_cmpge
ge on i64: T
ge on u64: T
ge on f16: T
ge on f32: T
ge on f64: T
gt on i8: vec_cmpgt
gt on u8: vec_cmpgt
gt on i16: vec_cmpgt
gt on u16: vec_cmpgt
gt on i32: vec_cmpgt
gt on u32: vec_cmpgt
gt on i64: T
gt on u64: T
gt on f16: T
gt on f32: T
gt on f64: T
if_else1 on i8: vec_sel
if_else1 on u8: vec_sel
if_else1 on i16: vec_sel
if_else1 on u16: vec_sel
if_else1 on i32: vec_sel
if_else1 on u32: vec_sel
if_else1 on i64: T
if_else1 on u64: T
if_else1 on f16: T
if_else1 on f32: vec_sel
if_else1 on f64: T
iota on i8: T
iota on u8: T
iota on i16: T
iota on u16: T
iota on i32: T
iota on u32: T
iota on i64: T
iota on u64: T
iota on f16: T
iota on f32: T
iota on f64: T
le on i8: vec_cmple
le on u8: vec_cmple
le on i16: vec_cmple
le on u16: vec_cmple
le on i32: vec_cmple
le on u32: vec_cmple
le on i64: T
le on u64: T
le on f16: T
le on f32: T
le on f64: T
len on i8: NOOP
len on u8: NOOP
len on i16: NOOP
len on u16: NOOP
len on i32: NOOP
len on u32: NOOP
len on i64: NOOP
len on u64: NOOP
len on f16: NOOP
len on f32: NOOP
len on f64: NOOP
load2a on i8: T
load2a on u8: T
load2a on i16: T
load2a on u16: T
load2a on i32: T
load2a on u32: T
load2a on i64: T
load2a on u64: T
load2a on f16: T
load2a on f32: T
load2a on f64: T
load2u on i8: T
load2u on u8: T
load2u on i16: T
load2u on u16: T
load2u on i32: T
load2u on u32: T
load2u on i64: T
load2u on u64: T
load2u on f16: T
load2u on f32: T
load2u on f64: T
load3a on i8: T
load3a on u8: T
load3a on i16: T
load3a on u16: T
load3a on i32: T
load3a on u32: T
load3a on i64: T
load3a on u64: T
load3a on f16: T
load3a on f32: T
load3a on f64: T
load3u on i8: T
load3u on u8: T
load3u on i16: T
load3u on u16: T
load3u on i32: T
load3u on u32: T
load3u on i64: T
load3u on u64: T
load3u on f16: T
load3u on f32: T
load3u on f64: T
load4a on i8: T
load4a on u8: T
load4a on i16: T
load4a on u16: T
load4a on i32: T
load4a on u32: T
load4a on i64: T
load4a on u64: T
load4a on f16: T
load4a on f32: T
load4a on f64: T
load4u on i8: T
load4u on u8: T
load4u on i16: T
load4u on u16: T
load4u on i32: T
load4u on u32: T
load4u on i64: T
load4u on u64: T
load4u on f16: T
load4u on f32: T
load4u on f64: T
loada on i8: vec_ld
loada on u8: vec_ld
loada on i16: vec_ld
loada on u16: vec_ld
loada on i32: vec_ld
loada on u32: vec_ld
loada on i64: T
loada on u64: T
loada on f16: T
loada on f32: vec_ld
loada on f64: T
loadla on i8: T
loadla on u8: T
loadla on i16: T
loadla on u16: T
loadla on i32: T
loadla on u32: T
loadla on i64: T
loadla on u64: T
loadla on f16: T
loadla on f32: T
loadla on f64: T
loadlu on i8: T
loadlu on u8: T
loadlu on i16: T
loadlu on u16: T
loadlu on i32: T
loadlu on u32: T
loadlu on i64: T
loadlu on u64: T
loadlu on f16: T
loadlu on f32: T
loadlu on f64: T
loadu on i8: NOOP
loadu on u8: NOOP
loadu on i16: NOOP
loadu on u16: NOOP
loadu on i32: NOOP
loadu on u32: NOOP
loadu on i64: T
loadu on u64: T
loadu on f16: T
loadu on f32: NOOP
loadu on f64: T
lt on i8: vec_cmplt
lt on u8: vec_cmplt
lt on i16: vec_cmplt
lt on u16: vec_cmplt
lt on i32: vec_cmplt
lt on u32: vec_cmplt
lt on i64: T
lt on u64: T
lt on f16: T
lt on f32: T
lt on f64: T
mask_for_loop_tail on i8: T
mask_for_loop_tail on u8: T
mask_for_loop_tail on i16: T
mask_for_loop_tail on u16: T
mask_for_loop_tail on i32: T
mask_for_loop_tail on u32: T
mask_for_loop_tail on i64: T
mask_for_loop_tail on u64: T
mask_for_loop_tail on f16: T
mask_for_loop_tail on f32: T
mask_for_loop_tail on f64: T
mask_storea1 on i8: T
mask_storea1 on u8: T
mask_storea1 on i16: T
mask_storea1 on u16: T
mask_storea1 on i32: T
mask_storea1 on u32: T
mask_storea1 on i64: T
mask_storea1 on u64: T
mask_storea1 on f16: T
mask_storea1 on f32: T
mask_storea1 on f64: T
mask_storeu1 on i8: T
mask_storeu1 on u8: T
mask_storeu1 on i16: T
mask_storeu1 on u16: T
mask_storeu1 on i32: T
mask_storeu1 on u32: T
mask_storeu1 on i64: T
mask_storeu1 on u64: T
mask_storeu1 on f16: T
mask_storeu1 on f32: T
mask_storeu1 on f64: T
masko_loada1 on i8: T
masko_loada1 on u8: T
masko_loada1 on i16: T
masko_loada1 on u16: T
masko_loada1 on i32: T
masko_loada1 on u32: T
masko_loada1 on i64: T
masko_loada1 on u64: T
masko_loada1 on f16: T
masko_loada1 on f32: T
masko_loada1 on f64: T
masko_loadu1 on i8: T
masko_loadu1 on u8: T
masko_loadu1 on i16: T
masko_loadu1 on u16: T
masko_loadu1 on i32: T
masko_loadu1 on u32: T
masko_loadu1 on i64: T
masko_loadu1 on u64: T
masko_loadu1 on f16: T
masko_loadu1 on f32: T
masko_loadu1 on f64: T
maskz_loada1 on i8: T
maskz_loada1 on u8: T
maskz_loada1 on i16: T
maskz_loada1 on u16: T
maskz_loada1 on i32: T
maskz_loada1 on u32: T
maskz_loada1 on i64: T
maskz_loada1 on u64: T
maskz_loada1 on f16: T
maskz_loada1 on f32: T
maskz_loada1 on f64: T
maskz_loadu1 on i8: T
maskz_loadu1 on u8: T
maskz_loadu1 on i16: T
maskz_loadu1 on u16: T
maskz_loadu1 on i32: T
maskz_loadu1 on u32: T
maskz_loadu1 on i64: T
maskz_loadu1 on u64: T
maskz_loadu1 on f16: T
maskz_loadu1 on f32: T
maskz_loadu1 on f64: T
max on i8: vec_max
max on u8: vec_max
max on i16: vec_max
max on u16: vec_max
max on i32: vec_max
max on u32: vec_max
max on i64: T
max on u64: T
max on f16: T
max on f32: vec_max
max on f64: T
min on i8: vec_min
min on u8: vec_min
min on i16: vec_min
min on u16: vec_min
min on i32: vec_min
min on u32: vec_min
min on i64: T
min on u64: T
min on f16: T
min on f32: vec_min
min on f64: T
mul on i8: vec_mul
mul on u8: vec_mul
mul on i16: vec_mul
mul on u16: vec_mul
mul on i32: vec_mul
mul on u32: vec_mul
mul on i64: T
mul on u64: T
mul on f16: T
mul on f32: vec_mul
mul on f64: T
nbtrue on i8: T
nbtrue on u8: T
nbtrue on i16: T
nbtrue on u16: T
nbtrue on i32: T
nbtrue on u32: T
nbtrue on i64: T
nbtrue on u64: T
nbtrue on f16: T
nbtrue on f32: T
nbtrue on f64: T
ne on i8: T
ne on u8: T
ne on i16: T
ne on u16: T
ne on i32: T
ne on u32: T
ne on i64: T
ne on u64: T
ne on f16: T
ne on f32: T
ne on f64: T
neg on i8: vec_neg
neg on u8: T
neg on i16: vec_neg
neg on u16: T
neg on i32: vec_neg
neg on u32: T
neg on i64: T
neg on u64: T
neg on f16: T
neg on f32: vec_neg
neg on f64: T
notb on i8: vec_nor
notb on u8: vec_nor
notb on i16: vec_nor
notb on u16: vec_nor
notb on i32: vec_nor
notb on u32: vec_nor
notb on i64: T
notb on u64: T
notb on f16: T
notb on f32: vec_nor
notb on f64: T
notl on i8: vec_nor
notl on u8: vec_nor
notl on i16: vec_nor
notl on u16: vec_nor
notl on i32: vec_nor
notl on u32: vec_nor
notl on i64: T
notl on u64: T
notl on f16: T
notl on f32: vec_nor
notl on f64: T
orb on i8: vec_or
orb on u8: vec_or
orb on i16: vec_or
orb on u16: vec_or
orb on i32: vec_or
orb on u32: vec_or
orb on i64: T
orb on u64: T
orb on f16: T
orb on f32: vec_or
orb on f64: T
orl on i8: vec_or
orl on u8: vec_or
orl on i16: vec_or
orl on u16: vec_or
orl on i32: vec_or
orl on u32: vec_or
orl on i64: T
orl on u64: T
orl on f16: T
orl on f32: vec_or
orl on f64: T
rec on i8: NA
rec on u8: NA
rec on i16: NA
rec on u16: NA
rec on i32: NA
rec on u32: NA
rec on i64: NA
rec on u64: NA
rec on f16: T
rec on f32: T
rec on f64: T
rec11 on i8: NA
rec11 on u8: NA
rec11 on i16: NA
rec11 on u16: NA
rec11 on i32: NA
rec11 on u32: NA
rec11 on i64: NA
rec11 on u64: NA
rec11 on f16: T
rec11 on f32: vec_re
rec11 on f64: T
rec8 on i8: NA
rec8 on u8: NA
rec8 on i16: NA
rec8 on u16: NA
rec8 on i32: NA
rec8 on u32: NA
rec8 on i64: NA
rec8 on u64: NA
rec8 on f16: T
rec8 on f32: vec_re
rec8 on f64: T
round_to_even on i8: NOOP
round_to_even on u8: NOOP
round_to_even on i16: NOOP
round_to_even on u16: NOOP
round_to_even on i32: NOOP
round_to_even on u32: NOOP
round_to_even on i64: NOOP
round_to_even on u64: NOOP
round_to_even on f16: T
round_to_even on f32: T
round_to_even on f64: T
rsqrt11 on i8: NA
rsqrt11 on u8: NA
rsqrt11 on i16: NA
rsqrt11 on u16: NA
rsqrt11 on i32: NA
rsqrt11 on u32: NA
rsqrt11 on i64: NA
rsqrt11 on u64: NA
rsqrt11 on f16: T
rsqrt11 on f32: vec_rsqrte
rsqrt11 on f64: T
rsqrt8 on i8: NA
rsqrt8 on u8: NA
rsqrt8 on i16: NA
rsqrt8 on u16: NA
rsqrt8 on i32: NA
rsqrt8 on u32: NA
rsqrt8 on i64: NA
rsqrt8 on u64: NA
rsqrt8 on f16: T
rsqrt8 on f32: vec_rsqrte
rsqrt8 on f64: T
scatter on i8: NA
scatter on u8: NA
scatter on i16: T
scatter on u16: T
scatter on i32: T
scatter on u32: T
scatter on i64: T
scatter on u64: T
scatter on f16: T
scatter on f32: T
scatter on f64: T
scatter_linear on i8: T
scatter_linear on u8: T
scatter_linear on i16: T
scatter_linear on u16: T
scatter_linear on i32: T
scatter_linear on u32: T
scatter_linear on i64: T
scatter_linear on u64: T
scatter_linear on f16: T
scatter_linear on f32: T
scatter_linear on f64: T
set1 on i8: vec_splats
set1 on u8: vec_splats
set1 on i16: vec_splats
set1 on u16: vec_splats
set1 on i32: vec_splats
set1 on u32: vec_splats
set1 on i64: T
set1 on u64: T
set1 on f16: T
set1 on f32: vec_splats
set1 on f64: T
set1l on i8: T
set1l on u8: T
set1l on i16: T
set1l on u16: T
set1l on i32: T
set1l on u32: T
set1l on i64: T
set1l on u64: T
set1l on f16: T
set1l on f32: T
set1l on f64: T
shl on i8: T
shl on u8: T
shl on i16: T
shl on u16: T
shl on i32: T
shl on u32: T
shl on i64: T
shl on u64: T
shl on f16: NA
shl on f32: NA
shl on f64: NA
shr on i8: T
shr on u8: T
shr on i16: T
shr on u16: T
shr on i32: T
shr on u32: T
shr on i64: T
shr on u64: T
shr on f16: NA
shr on f32: NA
shr on f64: NA
shra on i8: T
shra on u8: T
shra on i16: T
shra on u16: T
shra on i32: T
shra on u32: T
shra on i64: T
shra on u64: T
shra on f16: NA
shra on f32: NA
shra on f64: NA
sqrt on i8: NA
sqrt on u8: NA
sqrt on i16: NA
sqrt on u16: NA
sqrt on i32: NA
sqrt on u32: NA
sqrt on i64: NA
sqrt on u64: NA
sqrt on f16: T
sqrt on f32: vec_sqrt
sqrt on f64: T
store2a on i8: T
store2a on u8: T
store2a on i16: T
store2a on u16: T
store2a on i32: T
store2a on u32: T
store2a on i64: T
store2a on u64: T
store2a on f16: T
store2a on f32: T
store2a on f64: T
store2u on i8: T
store2u on u8: T
store2u on i16: T
store2u on u16: T
store2u on i32: T
store2u on u32: T
store2u on i64: T
store2u on u64: T
store2u on f16: T
store2u on f32: T
store2u on f64: T
store3a on i8: T
store3a on u8: T
store3a on i16: T
store3a on u16: T
store3a on i32: T
store3a on u32: T
store3a on i64: T
store3a on u64: T
store3a on f16: T
store3a on f32: T
store3a on f64: T
store3u on i8: T
store3u on u8: T
store3u on i16: T
store3u on u16: T
store3u on i32: T
store3u on u32: T
store3u on i64: T
store3u on u64: T
store3u on f16: T
store3u on f32: T
store3u on f64: T
store4a on i8: T
store4a on u8: T
store4a on i16: T
store4a on u16: T
store4a on i32: T
store4a on u32: T
store4a on i64: T
store4a on u64: T
store4a on f16: T
store4a on f32: T
store4a on f64: T
store4u on i8: T
store4u on u8: T
store4u on i16: T
store4u on u16: T
store4u on i32: T
store4u on u32: T
store4u on i64: T
store4u on u64: T
store4u on f16: T
store4u on f32: T
store4u on f64: T
storea on i8: vec_st
storea on u8: vec_st
storea on i16: vec_st
storea on u16: vec_st
storea on i32: vec_st
storea on u32: vec_st
storea on i64: T
storea on u64: T
storea on f16: T
storea on f32: vec_st
storea on f64: T
storela on i8: T
storela on u8: T
storela on i16: T
storela on u16: T
storela on i32: T
storela on u32: T
storela on i64: T
storela on u64: T
storela on f16: T
storela on f32: T
storela on f64: T
storelu on i8: T
storelu on u8: T
storelu on i16: T
storelu on u16: T
storelu on i32: T
storelu on u32: T
storelu on i64: T
storelu on u64: T
storelu on f16: T
storelu on f32: T
storelu on f64: T
storeu on i8: =
storeu on u8: =
storeu on i16: =
storeu on u16: =
storeu on i32: =
storeu on u32: =
storeu on i64: T
storeu on u64: T
storeu on f16: T
storeu on f32: =
storeu on f64: T
sub on i8: vec_sub
sub on u8: vec_sub
sub on i16: vec_sub
sub on u16: vec_sub
sub on i32: vec_sub
sub on u32: vec_sub
sub on i64: T
sub on u64: T
sub on f16: T
sub on f32: vec_sub
sub on f64: T
subs on i8: vec_subs
subs on u8: vec_subs
subs on i16: vec_subs
subs on u16: vec_subs
subs on i32: vec_subs
subs on u32: vec_subs
subs on i64: T
subs on u64: T
subs on f16: T
subs on f32: vec_sub
subs on f64: T
to_logical on i8: T
to_logical on u8: T
to_logical on i16: T
to_logical on u16: T
to_logical on i32: T
to_logical on u32: T
to_logical on i64: T
to_logical on u64: T
to_logical on f16: T
to_logical on f32: T
to_logical on f64: T
to_mask on i8: NOOP
to_mask on u8: NOOP
to_mask on i16: NOOP
to_mask on u16: NOOP
to_mask on i32: NOOP
to_mask on u32: NOOP
to_mask on i64: T
to_mask on u64: T
to_mask on f16: T
to_mask on f32: NOOP
to_mask on f64: T
trunc on i8: NOOP
trunc on u8: NOOP
trunc on i16: NOOP
trunc on u16: NOOP
trunc on i32: NOOP
trunc on u32: NOOP
trunc on i64: NOOP
trunc on u64: NOOP
trunc on f16: T
trunc on f32: vec_trunc
trunc on f64: T
unzip on i8: T
unzip on u8: T
unzip on i16: T
unzip on u16: T
unzip on i32: T
unzip on u32: T
unzip on i64: T
unzip on u64: T
unzip on f16: T
unzip on f32: T
unzip on f64: T
unziphi on i8: T
unziphi on u8: T
unziphi on i16: T
unziphi on u16: T
unziphi on i32: T
unziphi on u32: T
unziphi on i64: T
unziphi on u64: T
unziphi on f16: T
unziphi on f32: T
unziphi on f64: T
unziplo on i8: T
unziplo on u8: T
unziplo on i16: T
unziplo on u16: T
unziplo on i32: T
unziplo on u32: T
unziplo on i64: T
unziplo on u64: T
unziplo on f16: T
unziplo on f32: T
unziplo on f64: T
xorb on i8: vec_xor
xorb on u8: vec_xor
xorb on i16: vec_xor
xorb on u16: vec_xor
xorb on i32: vec_xor
xorb on u32: vec_xor
xorb on i64: T
xorb on u64: T
xorb on f16: T
xorb on f32: vec_xor
xorb on f64: T
xorl on i8: vec_xor
xorl on u8: vec_xor
xorl on i16: vec_xor
xorl on u16: vec_xor
xorl on i32: vec_xor
xorl on u32: vec_xor
xorl on i64: T
xorl on u64: T
xorl on f16: T
xorl on f32: vec_xor
xorl on f64: T
zip on i8: T
zip on u8: T
zip on i16: T
zip on u16: T
zip on i32: T
zip on u32: T
zip on i64: T
zip on u64: T
zip on f16: T
zip on f32: T
zip on f64: T
ziphi on i8: vec_mergel
ziphi on u8: vec_mergel
ziphi on i16: vec_mergel
ziphi on u16: vec_mergel
ziphi on i32: vec_mergel
ziphi on u32: vec_mergel
ziphi on i64: T
ziphi on u64: T
ziphi on f16: T
ziphi on f32: vec_mergel
ziphi on f64: T
ziplo on i8: vec_mergeh
ziplo on u8: vec_mergeh
ziplo on i16: vec_mergeh
ziplo on u16: vec_mergeh
ziplo on i32: vec_mergeh
ziplo on u32: vec_mergeh
ziplo on i64: T
ziplo on u64: T
ziplo on f16: T
ziplo on f32: vec_mergeh
ziplo on f64: T
cvt from i8 to i8: NOOP
cvt from i8 to u8: NOOP
cvt from u8 to i8: NOOP
cvt from u8 to u8: NOOP
cvt from i16 to i16: NOOP
cvt from i16 to u16: NOOP
cvt from i16 to f16: T
cvt from u16 to i16: NOOP
cvt from u16 to u16: NOOP
cvt from u16 to f16: T
cvt from i32 to i32: NOOP
cvt from i32 to u32: NOOP
cvt from i32 to f32: vec_ctf
cvt from u32 to i32: NOOP
cvt from u32 to u32: NOOP
cvt from u32 to f32: vec_ctf
cvt from i64 to i64: NOOP
cvt from i64 to u64: T
cvt from i64 to f64: T
cvt from u64 to i64: T
cvt from u64 to u64: NOOP
cvt from u64 to f64: T
cvt from f16 to i16: T
cvt from f16 to u16: T
cvt from f16 to f16: NOOP
cvt from f32 to i32: vec_cts
cvt from f32 to u32: vec_ctu
cvt from f32 to f32: NOOP
cvt from f64 to i64: T
cvt from f64 to u64: T
cvt from f64 to f64: NOOP
reinterpret from i8 to i8: NOOP
reinterpret from i8 to u8: NOOP
reinterpret from u8 to i8: NOOP
reinterpret from u8 to u8: NOOP
reinterpret from i16 to i16: NOOP
reinterpret from i16 to u16: NOOP
reinterpret from i16 to f16: T
reinterpret from u16 to i16: NOOP
reinterpret from u16 to u16: NOOP
reinterpret from u16 to f16: T
reinterpret from i32 to i32: NOOP
reinterpret from i32 to u32: NOOP
reinterpret from i32 to f32: NOOP
reinterpret from u32 to i32: NOOP
reinterpret from u32 to u32: NOOP
reinterpret from u32 to f32: NOOP
reinterpret from i64 to i64: NOOP
reinterpret from i64 to u64: T
reinterpret from i64 to f64: T
reinterpret from u64 to i64: T
reinterpret from u64 to u64: NOOP
reinterpret from u64 to f64: T
reinterpret from f16 to i16: T
reinterpret from f16 to u16: T
reinterpret from f16 to f16: NOOP
reinterpret from f32 to i32: NOOP
reinterpret from f32 to u32: NOOP
reinterpret from f32 to f32: NOOP
reinterpret from f64 to i64: T
reinterpret from f64 to u64: T
reinterpret from f64 to f64: NOOP
reinterpretl from i8 to i8: NOOP
reinterpretl from i8 to u8: NOOP
reinterpretl from u8 to i8: NOOP
reinterpretl from u8 to u8: NOOP
reinterpretl from i16 to i16: NOOP
reinterpretl from i16 to u16: NOOP
reinterpretl from i16 to f16: T
reinterpretl from u16 to i16: NOOP
reinterpretl from u16 to u16: NOOP
reinterpretl from u16 to f16: T
reinterpretl from i32 to i32: NOOP
reinterpretl from i32 to u32: NOOP
reinterpretl from i32 to f32: NOOP
reinterpretl from u32 to i32: NOOP
reinterpretl from u32 to u32: NOOP
reinterpretl from u32 to f32: NOOP
reinterpretl from i64 to i64: NOOP
reinterpretl from i64 to u64: T
reinterpretl from i64 to f64: T
reinterpretl from u64 to i64: T
reinterpretl from u64 to u64: NOOP
reinterpretl from u64 to f64: T
reinterpretl from f16 to i16: T
reinterpretl from f16 to u16: T
reinterpretl from f16 to f16: NOOP
reinterpretl from f32 to i32: NOOP
reinterpretl from f32 to u32: NOOP
reinterpretl from f32 to f32: NOOP
reinterpretl from f64 to i64: T
reinterpretl from f64 to u64: T
reinterpretl from f64 to f64: NOOP
upcvt from i8 to i16: T
upcvt from i8 to u16: T
upcvt from i8 to f16: T
upcvt from u8 to i16: T
upcvt from u8 to u16: T
upcvt from u8 to f16: T
upcvt from i16 to i32: T
upcvt from i16 to u32: T
upcvt from i16 to f32: T
upcvt from u16 to i32: T
upcvt from u16 to u32: T
upcvt from u16 to f32: T
upcvt from i32 to i64: T
upcvt from i32 to u64: T
upcvt from i32 to f64: T
upcvt from u32 to i64: T
upcvt from u32 to u64: T
upcvt from u32 to f64: T
upcvt from f16 to i32: T
upcvt from f16 to u32: T
upcvt from f16 to f32: T
upcvt from f32 to i64: T
upcvt from f32 to u64: T
upcvt from f32 to f64: T
downcvt from i16 to i8: vec_pack
downcvt from i16 to u8: vec_pack
downcvt from u16 to i8: vec_pack
downcvt from u16 to u8: vec_pack
downcvt from i32 to i16: vec_pack
downcvt from i32 to u16: vec_pack
downcvt from i32 to f16: T
downcvt from u32 to i16: vec_pack
downcvt from u32 to u16: vec_pack
downcvt from u32 to f16: T
downcvt from i64 to i32: T
downcvt from i64 to u32: T
downcvt from i64 to f32: T
downcvt from u64 to i32: T
downcvt from u64 to u32: T
downcvt from u64 to f32: T
downcvt from f16 to i8: T
downcvt from f16 to u8: T
downcvt from f32 to i16: T
downcvt from f32 to u16: T
downcvt from f32 to f16: T
downcvt from f64 to i32: T
downcvt from f64 to u32: T
downcvt from f64 to f32: T