Notations are as follows:
T
for trick usually using other intrinsics
E
for scalar emulation
NOOP
for no operation
NA
means the operator does not exist for the given type
intrinsic
for the actual wrapped intrinsic
abs on i8: _mm_abs_epi8
abs on u8: NOOP
abs on i16: _mm_abs_epi16
abs on u16: NOOP
abs on i32: _mm_abs_epi32
abs on u32: NOOP
abs on i64: T
abs on u64: NOOP
abs on f16: T
abs on f32: T
abs on f64: T
add on i8: _mm_add_epi8
add on u8: _mm_add_epi8
add on i16: _mm_add_epi16
add on u16: _mm_add_epi16
add on i32: _mm_add_epi32
add on u32: _mm_add_epi32
add on i64: _mm_add_epi64
add on u64: _mm_add_epi64
add on f16: T
add on f32: _mm_add_ps
add on f64: _mm_add_pd
adds on i8: _mm_adds_epi8
adds on u8: _mm_adds_epu8
adds on i16: _mm_adds_epi16
adds on u16: _mm_adds_epu16
adds on i32: T
adds on u32: T
adds on i64: T
adds on u64: T
adds on f16: T
adds on f32: T
adds on f64: T
addv on i8: NA
addv on u8: NA
addv on i16: NA
addv on u16: NA
addv on i32: NA
addv on u32: NA
addv on i64: NA
addv on u64: NA
addv on f16: T
addv on f32: T
addv on f64: T
all on i8: T
all on u8: T
all on i16: T
all on u16: T
all on i32: T
all on u32: T
all on i64: T
all on u64: T
all on f16: T
all on f32: T
all on f64: T
andb on i8: _mm_and_si128
andb on u8: _mm_and_si128
andb on i16: _mm_and_si128
andb on u16: _mm_and_si128
andb on i32: _mm_and_si128
andb on u32: _mm_and_si128
andb on i64: _mm_and_si128
andb on u64: _mm_and_si128
andb on f16: T
andb on f32: _mm_and_ps
andb on f64: _mm_and_pd
andl on i8: _mm_and_si128
andl on u8: _mm_and_si128
andl on i16: _mm_and_si128
andl on u16: _mm_and_si128
andl on i32: _mm_and_si128
andl on u32: _mm_and_si128
andl on i64: _mm_and_si128
andl on u64: _mm_and_si128
andl on f16: T
andl on f32: _mm_and_ps
andl on f64: _mm_and_pd
andnotb on i8: _mm_andnot_si128
andnotb on u8: _mm_andnot_si128
andnotb on i16: _mm_andnot_si128
andnotb on u16: _mm_andnot_si128
andnotb on i32: _mm_andnot_si128
andnotb on u32: _mm_andnot_si128
andnotb on i64: _mm_andnot_si128
andnotb on u64: _mm_andnot_si128
andnotb on f16: T
andnotb on f32: _mm_andnot_ps
andnotb on f64: _mm_andnot_pd
andnotl on i8: _mm_andnot_si128
andnotl on u8: _mm_andnot_si128
andnotl on i16: _mm_andnot_si128
andnotl on u16: _mm_andnot_si128
andnotl on i32: _mm_andnot_si128
andnotl on u32: _mm_andnot_si128
andnotl on i64: _mm_andnot_si128
andnotl on u64: _mm_andnot_si128
andnotl on f16: T
andnotl on f32: _mm_andnot_ps
andnotl on f64: _mm_andnot_pd
any on i8: T
any on u8: T
any on i16: T
any on u16: T
any on i32: T
any on u32: T
any on i64: T
any on u64: T
any on f16: T
any on f32: T
any on f64: T
ceil on i8: NOOP
ceil on u8: NOOP
ceil on i16: NOOP
ceil on u16: NOOP
ceil on i32: NOOP
ceil on u32: NOOP
ceil on i64: NOOP
ceil on u64: NOOP
ceil on f16: T
ceil on f32: _mm_ceil_ps
ceil on f64: _mm_ceil_pd
div on i8: T
div on u8: T
div on i16: T
div on u16: T
div on i32: T
div on u32: T
div on i64: E
div on u64: E
div on f16: T
div on f32: _mm_div_ps
div on f64: _mm_div_pd
eq on i8: _mm_cmpeq_epi8
eq on u8: _mm_cmpeq_epi8
eq on i16: _mm_cmpeq_epi16
eq on u16: _mm_cmpeq_epi16
eq on i32: _mm_cmpeq_epi32
eq on u32: _mm_cmpeq_epi32
eq on i64: _mm_cmpeq_epi64
eq on u64: _mm_cmpeq_epi64
eq on f16: T
eq on f32: _mm_cmpeq_ps
eq on f64: _mm_cmpeq_pd
floor on i8: NOOP
floor on u8: NOOP
floor on i16: NOOP
floor on u16: NOOP
floor on i32: NOOP
floor on u32: NOOP
floor on i64: NOOP
floor on u64: NOOP
floor on f16: T
floor on f32: _mm_floor_ps
floor on f64: _mm_floor_pd
fma on i8: T
fma on u8: T
fma on i16: T
fma on u16: T
fma on i32: T
fma on u32: T
fma on i64: T
fma on u64: T
fma on f16: T
fma on f32: T
fma on f64: T
fms on i8: T
fms on u8: T
fms on i16: T
fms on u16: T
fms on i32: T
fms on u32: T
fms on i64: T
fms on u64: T
fms on f16: T
fms on f32: T
fms on f64: T
fnma on i8: T
fnma on u8: T
fnma on i16: T
fnma on u16: T
fnma on i32: T
fnma on u32: T
fnma on i64: T
fnma on u64: T
fnma on f16: T
fnma on f32: T
fnma on f64: T
fnms on i8: T
fnms on u8: T
fnms on i16: T
fnms on u16: T
fnms on i32: T
fnms on u32: T
fnms on i64: T
fnms on u64: T
fnms on f16: T
fnms on f32: T
fnms on f64: T
gather on i8: NA
gather on u8: NA
gather on i16: E
gather on u16: E
gather on i32: E
gather on u32: E
gather on i64: E
gather on u64: E
gather on f16: E
gather on f32: E
gather on f64: E
gather_linear on i8: E
gather_linear on u8: E
gather_linear on i16: E
gather_linear on u16: E
gather_linear on i32: E
gather_linear on u32: E
gather_linear on i64: E
gather_linear on u64: E
gather_linear on f16: E
gather_linear on f32: E
gather_linear on f64: E
ge on i8: T
ge on u8: T
ge on i16: T
ge on u16: T
ge on i32: T
ge on u32: T
ge on i64: T
ge on u64: T
ge on f16: T
ge on f32: _mm_cmpge_ps
ge on f64: _mm_cmpge_pd
gt on i8: _mm_cmpgt_epi8
gt on u8: T
gt on i16: _mm_cmpgt_epi16
gt on u16: T
gt on i32: _mm_cmpgt_epi32
gt on u32: T
gt on i64: _mm_cmpgt_epi64
gt on u64: T
gt on f16: T
gt on f32: _mm_cmpgt_ps
gt on f64: _mm_cmpgt_pd
if_else1 on i8: _mm_blendv_epi8
if_else1 on u8: _mm_blendv_epi8
if_else1 on i16: _mm_blendv_epi8
if_else1 on u16: _mm_blendv_epi8
if_else1 on i32: _mm_blendv_epi8
if_else1 on u32: _mm_blendv_epi8
if_else1 on i64: _mm_blendv_epi8
if_else1 on u64: _mm_blendv_epi8
if_else1 on f16: T
if_else1 on f32: _mm_blendv_ps
if_else1 on f64: _mm_blendv_pd
iota on i8: T
iota on u8: T
iota on i16: T
iota on u16: T
iota on i32: T
iota on u32: T
iota on i64: T
iota on u64: T
iota on f16: T
iota on f32: T
iota on f64: T
le on i8: T
le on u8: T
le on i16: T
le on u16: T
le on i32: T
le on u32: T
le on i64: T
le on u64: T
le on f16: T
le on f32: _mm_cmple_ps
le on f64: _mm_cmple_pd
len on i8: NOOP
len on u8: NOOP
len on i16: NOOP
len on u16: NOOP
len on i32: NOOP
len on u32: NOOP
len on i64: NOOP
len on u64: NOOP
len on f16: NOOP
len on f32: NOOP
len on f64: NOOP
load2a on i8: T
load2a on u8: T
load2a on i16: T
load2a on u16: T
load2a on i32: T
load2a on u32: T
load2a on i64: T
load2a on u64: T
load2a on f16: T
load2a on f32: T
load2a on f64: T
load2u on i8: T
load2u on u8: T
load2u on i16: T
load2u on u16: T
load2u on i32: T
load2u on u32: T
load2u on i64: T
load2u on u64: T
load2u on f16: T
load2u on f32: T
load2u on f64: T
load3a on i8: T
load3a on u8: T
load3a on i16: E
load3a on u16: T
load3a on i32: T
load3a on u32: T
load3a on i64: T
load3a on u64: T
load3a on f16: T
load3a on f32: T
load3a on f64: T
load3u on i8: T
load3u on u8: T
load3u on i16: E
load3u on u16: T
load3u on i32: T
load3u on u32: T
load3u on i64: T
load3u on u64: T
load3u on f16: T
load3u on f32: T
load3u on f64: T
load4a on i8: T
load4a on u8: T
load4a on i16: T
load4a on u16: T
load4a on i32: T
load4a on u32: T
load4a on i64: T
load4a on u64: T
load4a on f16: T
load4a on f32: T
load4a on f64: T
load4u on i8: T
load4u on u8: T
load4u on i16: T
load4u on u16: T
load4u on i32: T
load4u on u32: T
load4u on i64: T
load4u on u64: T
load4u on f16: T
load4u on f32: T
load4u on f64: T
loada on i8: T
loada on u8: T
loada on i16: T
loada on u16: T
loada on i32: T
loada on u32: T
loada on i64: T
loada on u64: T
loada on f16: T
loada on f32: _mm_load_ps
loada on f64: _mm_load_pd
loadla on i8: T
loadla on u8: T
loadla on i16: T
loadla on u16: T
loadla on i32: T
loadla on u32: T
loadla on i64: T
loadla on u64: T
loadla on f16: T
loadla on f32: T
loadla on f64: T
loadlu on i8: T
loadlu on u8: T
loadlu on i16: T
loadlu on u16: T
loadlu on i32: T
loadlu on u32: T
loadlu on i64: T
loadlu on u64: T
loadlu on f16: T
loadlu on f32: T
loadlu on f64: T
loadu on i8: T
loadu on u8: T
loadu on i16: T
loadu on u16: T
loadu on i32: T
loadu on u32: T
loadu on i64: T
loadu on u64: T
loadu on f16: T
loadu on f32: _mm_loadu_ps
loadu on f64: _mm_loadu_pd
lt on i8: T
lt on u8: T
lt on i16: T
lt on u16: T
lt on i32: T
lt on u32: T
lt on i64: T
lt on u64: T
lt on f16: T
lt on f32: T
lt on f64: T
mask_for_loop_tail on i8: T
mask_for_loop_tail on u8: T
mask_for_loop_tail on i16: T
mask_for_loop_tail on u16: T
mask_for_loop_tail on i32: T
mask_for_loop_tail on u32: T
mask_for_loop_tail on i64: T
mask_for_loop_tail on u64: T
mask_for_loop_tail on f16: T
mask_for_loop_tail on f32: T
mask_for_loop_tail on f64: T
mask_storea1 on i8: _mm_maskmoveu_si128
mask_storea1 on u8: _mm_maskmoveu_si128
mask_storea1 on i16: _mm_maskmoveu_si128
mask_storea1 on u16: _mm_maskmoveu_si128
mask_storea1 on i32: _mm_maskmoveu_si128
mask_storea1 on u32: _mm_maskmoveu_si128
mask_storea1 on i64: _mm_maskmoveu_si128
mask_storea1 on u64: _mm_maskmoveu_si128
mask_storea1 on f16: E
mask_storea1 on f32: _mm_maskmoveu_si128
mask_storea1 on f64: _mm_maskmoveu_si128
mask_storeu1 on i8: _mm_maskmoveu_si128
mask_storeu1 on u8: _mm_maskmoveu_si128
mask_storeu1 on i16: _mm_maskmoveu_si128
mask_storeu1 on u16: _mm_maskmoveu_si128
mask_storeu1 on i32: _mm_maskmoveu_si128
mask_storeu1 on u32: _mm_maskmoveu_si128
mask_storeu1 on i64: _mm_maskmoveu_si128
mask_storeu1 on u64: _mm_maskmoveu_si128
mask_storeu1 on f16: E
mask_storeu1 on f32: _mm_maskmoveu_si128
mask_storeu1 on f64: _mm_maskmoveu_si128
masko_loada1 on i8: E
masko_loada1 on u8: E
masko_loada1 on i16: E
masko_loada1 on u16: E
masko_loada1 on i32: E
masko_loada1 on u32: E
masko_loada1 on i64: E
masko_loada1 on u64: E
masko_loada1 on f16: E
masko_loada1 on f32: E
masko_loada1 on f64: E
masko_loadu1 on i8: E
masko_loadu1 on u8: E
masko_loadu1 on i16: E
masko_loadu1 on u16: E
masko_loadu1 on i32: E
masko_loadu1 on u32: E
masko_loadu1 on i64: E
masko_loadu1 on u64: E
masko_loadu1 on f16: E
masko_loadu1 on f32: E
masko_loadu1 on f64: E
maskz_loada1 on i8: E
maskz_loada1 on u8: E
maskz_loada1 on i16: E
maskz_loada1 on u16: E
maskz_loada1 on i32: E
maskz_loada1 on u32: E
maskz_loada1 on i64: E
maskz_loada1 on u64: E
maskz_loada1 on f16: E
maskz_loada1 on f32: E
maskz_loada1 on f64: E
maskz_loadu1 on i8: E
maskz_loadu1 on u8: E
maskz_loadu1 on i16: E
maskz_loadu1 on u16: E
maskz_loadu1 on i32: E
maskz_loadu1 on u32: E
maskz_loadu1 on i64: E
maskz_loadu1 on u64: E
maskz_loadu1 on f16: E
maskz_loadu1 on f32: E
maskz_loadu1 on f64: E
max on i8: _mm_max_epi8
max on u8: _mm_max_epu8
max on i16: _mm_max_epi16
max on u16: _mm_max_epu16
max on i32: _mm_max_epi32
max on u32: _mm_max_epu32
max on i64: T
max on u64: T
max on f16: T
max on f32: _mm_max_ps
max on f64: _mm_max_pd
min on i8: _mm_min_epi8
min on u8: _mm_min_epu8
min on i16: _mm_min_epi16
min on u16: _mm_min_epu16
min on i32: _mm_min_epi32
min on u32: _mm_min_epu32
min on i64: T
min on u64: T
min on f16: T
min on f32: _mm_min_ps
min on f64: _mm_min_pd
mul on i8: T
mul on u8: T
mul on i16: _mm_mullo_epi16
mul on u16: _mm_mullo_epi16
mul on i32: _mm_mullo_epi32
mul on u32: _mm_mullo_epi32
mul on i64: E
mul on u64: E
mul on f16: T
mul on f32: _mm_mul_ps
mul on f64: _mm_mul_pd
nbtrue on i8: T
nbtrue on u8: T
nbtrue on i16: T
nbtrue on u16: T
nbtrue on i32: T
nbtrue on u32: T
nbtrue on i64: T
nbtrue on u64: T
nbtrue on f16: T
nbtrue on f32: T
nbtrue on f64: T
ne on i8: T
ne on u8: T
ne on i16: T
ne on u16: T
ne on i32: T
ne on u32: T
ne on i64: T
ne on u64: T
ne on f16: T
ne on f32: _mm_cmpneq_ps
ne on f64: _mm_cmpneq_pd
neg on i8: T
neg on u8: T
neg on i16: T
neg on u16: T
neg on i32: T
neg on u32: T
neg on i64: T
neg on u64: T
neg on f16: T
neg on f32: T
neg on f64: T
notb on i8: T
notb on u8: T
notb on i16: T
notb on u16: T
notb on i32: T
notb on u32: T
notb on i64: T
notb on u64: T
notb on f16: T
notb on f32: T
notb on f64: T
notl on i8: T
notl on u8: T
notl on i16: T
notl on u16: T
notl on i32: T
notl on u32: T
notl on i64: T
notl on u64: T
notl on f16: T
notl on f32: T
notl on f64: T
orb on i8: _mm_or_si128
orb on u8: _mm_or_si128
orb on i16: _mm_or_si128
orb on u16: _mm_or_si128
orb on i32: _mm_or_si128
orb on u32: _mm_or_si128
orb on i64: _mm_or_si128
orb on u64: _mm_or_si128
orb on f16: T
orb on f32: _mm_or_ps
orb on f64: _mm_or_pd
orl on i8: _mm_or_si128
orl on u8: _mm_or_si128
orl on i16: _mm_or_si128
orl on u16: _mm_or_si128
orl on i32: _mm_or_si128
orl on u32: _mm_or_si128
orl on i64: _mm_or_si128
orl on u64: _mm_or_si128
orl on f16: T
orl on f32: _mm_or_ps
orl on f64: _mm_or_pd
rec on i8: NA
rec on u8: NA
rec on i16: NA
rec on u16: NA
rec on i32: NA
rec on u32: NA
rec on i64: NA
rec on u64: NA
rec on f16: T
rec on f32: T
rec on f64: T
rec11 on i8: NA
rec11 on u8: NA
rec11 on i16: NA
rec11 on u16: NA
rec11 on i32: NA
rec11 on u32: NA
rec11 on i64: NA
rec11 on u64: NA
rec11 on f16: T
rec11 on f32: _mm_rcp_ps
rec11 on f64: T
rec8 on i8: NA
rec8 on u8: NA
rec8 on i16: NA
rec8 on u16: NA
rec8 on i32: NA
rec8 on u32: NA
rec8 on i64: NA
rec8 on u64: NA
rec8 on f16: T
rec8 on f32: _mm_rcp_ps
rec8 on f64: T
round_to_even on i8: NOOP
round_to_even on u8: NOOP
round_to_even on i16: NOOP
round_to_even on u16: NOOP
round_to_even on i32: NOOP
round_to_even on u32: NOOP
round_to_even on i64: NOOP
round_to_even on u64: NOOP
round_to_even on f16: T
round_to_even on f32: T
round_to_even on f64: T
rsqrt11 on i8: NA
rsqrt11 on u8: NA
rsqrt11 on i16: NA
rsqrt11 on u16: NA
rsqrt11 on i32: NA
rsqrt11 on u32: NA
rsqrt11 on i64: NA
rsqrt11 on u64: NA
rsqrt11 on f16: T
rsqrt11 on f32: _mm_rsqrt_ps
rsqrt11 on f64: T
rsqrt8 on i8: NA
rsqrt8 on u8: NA
rsqrt8 on i16: NA
rsqrt8 on u16: NA
rsqrt8 on i32: NA
rsqrt8 on u32: NA
rsqrt8 on i64: NA
rsqrt8 on u64: NA
rsqrt8 on f16: T
rsqrt8 on f32: _mm_rsqrt_ps
rsqrt8 on f64: T
scatter on i8: NA
scatter on u8: NA
scatter on i16: E
scatter on u16: E
scatter on i32: E
scatter on u32: E
scatter on i64: E
scatter on u64: E
scatter on f16: E
scatter on f32: E
scatter on f64: E
scatter_linear on i8: E
scatter_linear on u8: E
scatter_linear on i16: E
scatter_linear on u16: E
scatter_linear on i32: E
scatter_linear on u32: E
scatter_linear on i64: E
scatter_linear on u64: E
scatter_linear on f16: E
scatter_linear on f32: E
scatter_linear on f64: E
set1 on i8: _mm_set1_epi8
set1 on u8: T
set1 on i16: _mm_set1_epi16
set1 on u16: T
set1 on i32: _mm_set1_epi32
set1 on u32: T
set1 on i64: _mm_set1_epi64x
set1 on u64: T
set1 on f16: T
set1 on f32: _mm_set1_ps
set1 on f64: _mm_set1_pd
set1l on i8: T
set1l on u8: T
set1l on i16: T
set1l on u16: T
set1l on i32: T
set1l on u32: T
set1l on i64: T
set1l on u64: T
set1l on f16: T
set1l on f32: T
set1l on f64: T
shl on i8: T
shl on u8: T
shl on i16: T
shl on u16: T
shl on i32: T
shl on u32: T
shl on i64: T
shl on u64: T
shl on f16: NA
shl on f32: NA
shl on f64: NA
shr on i8: T
shr on u8: T
shr on i16: T
shr on u16: T
shr on i32: T
shr on u32: T
shr on i64: T
shr on u64: T
shr on f16: NA
shr on f32: NA
shr on f64: NA
shra on i8: T
shra on u8: T
shra on i16: T
shra on u16: T
shra on i32: T
shra on u32: T
shra on i64: E
shra on u64: T
shra on f16: NA
shra on f32: NA
shra on f64: NA
sqrt on i8: NA
sqrt on u8: NA
sqrt on i16: NA
sqrt on u16: NA
sqrt on i32: NA
sqrt on u32: NA
sqrt on i64: NA
sqrt on u64: NA
sqrt on f16: T
sqrt on f32: _mm_sqrt_ps
sqrt on f64: _mm_sqrt_pd
store2a on i8: T
store2a on u8: T
store2a on i16: T
store2a on u16: T
store2a on i32: T
store2a on u32: T
store2a on i64: T
store2a on u64: T
store2a on f16: T
store2a on f32: T
store2a on f64: T
store2u on i8: T
store2u on u8: T
store2u on i16: T
store2u on u16: T
store2u on i32: T
store2u on u32: T
store2u on i64: T
store2u on u64: T
store2u on f16: T
store2u on f32: T
store2u on f64: T
store3a on i8: T
store3a on u8: T
store3a on i16: E
store3a on u16: T
store3a on i32: T
store3a on u32: T
store3a on i64: T
store3a on u64: T
store3a on f16: T
store3a on f32: T
store3a on f64: T
store3u on i8: T
store3u on u8: T
store3u on i16: E
store3u on u16: T
store3u on i32: T
store3u on u32: T
store3u on i64: T
store3u on u64: T
store3u on f16: T
store3u on f32: T
store3u on f64: T
store4a on i8: T
store4a on u8: T
store4a on i16: T
store4a on u16: T
store4a on i32: T
store4a on u32: T
store4a on i64: T
store4a on u64: T
store4a on f16: T
store4a on f32: T
store4a on f64: T
store4u on i8: T
store4u on u8: T
store4u on i16: T
store4u on u16: T
store4u on i32: T
store4u on u32: T
store4u on i64: T
store4u on u64: T
store4u on f16: T
store4u on f32: T
store4u on f64: T
storea on i8: T
storea on u8: T
storea on i16: T
storea on u16: T
storea on i32: T
storea on u32: T
storea on i64: T
storea on u64: T
storea on f16: T
storea on f32: _mm_store_ps
storea on f64: _mm_store_pd
storela on i8: T
storela on u8: T
storela on i16: T
storela on u16: T
storela on i32: T
storela on u32: T
storela on i64: T
storela on u64: T
storela on f16: T
storela on f32: T
storela on f64: T
storelu on i8: T
storelu on u8: T
storelu on i16: T
storelu on u16: T
storelu on i32: T
storelu on u32: T
storelu on i64: T
storelu on u64: T
storelu on f16: T
storelu on f32: T
storelu on f64: T
storeu on i8: T
storeu on u8: T
storeu on i16: T
storeu on u16: T
storeu on i32: T
storeu on u32: T
storeu on i64: T
storeu on u64: T
storeu on f16: T
storeu on f32: _mm_storeu_ps
storeu on f64: _mm_storeu_pd
sub on i8: _mm_sub_epi8
sub on u8: _mm_sub_epi8
sub on i16: _mm_sub_epi16
sub on u16: _mm_sub_epi16
sub on i32: _mm_sub_epi32
sub on u32: _mm_sub_epi32
sub on i64: _mm_sub_epi64
sub on u64: _mm_sub_epi64
sub on f16: T
sub on f32: _mm_sub_ps
sub on f64: _mm_sub_pd
subs on i8: _mm_subs_epi8
subs on u8: _mm_subs_epu8
subs on i16: _mm_subs_epi16
subs on u16: _mm_subs_epu16
subs on i32: T
subs on u32: E
subs on i64: T
subs on u64: E
subs on f16: T
subs on f32: T
subs on f64: T
to_logical on i8: T
to_logical on u8: T
to_logical on i16: T
to_logical on u16: T
to_logical on i32: T
to_logical on u32: T
to_logical on i64: T
to_logical on u64: T
to_logical on f16: T
to_logical on f32: T
to_logical on f64: T
to_mask on i8: NOOP
to_mask on u8: NOOP
to_mask on i16: NOOP
to_mask on u16: NOOP
to_mask on i32: NOOP
to_mask on u32: NOOP
to_mask on i64: NOOP
to_mask on u64: NOOP
to_mask on f16: T
to_mask on f32: NOOP
to_mask on f64: NOOP
trunc on i8: NOOP
trunc on u8: NOOP
trunc on i16: NOOP
trunc on u16: NOOP
trunc on i32: NOOP
trunc on u32: NOOP
trunc on i64: NOOP
trunc on u64: NOOP
trunc on f16: T
trunc on f32: T
trunc on f64: T
unzip on i8: T
unzip on u8: T
unzip on i16: T
unzip on u16: T
unzip on i32: T
unzip on u32: T
unzip on i64: T
unzip on u64: T
unzip on f16: T
unzip on f32: T
unzip on f64: T
unziphi on i8: E
unziphi on u8: E
unziphi on i16: T
unziphi on u16: T
unziphi on i32: T
unziphi on u32: T
unziphi on i64: T
unziphi on u64: T
unziphi on f16: T
unziphi on f32: T
unziphi on f64: T
unziplo on i8: E
unziplo on u8: E
unziplo on i16: T
unziplo on u16: T
unziplo on i32: T
unziplo on u32: T
unziplo on i64: T
unziplo on u64: T
unziplo on f16: T
unziplo on f32: T
unziplo on f64: T
xorb on i8: _mm_xor_si128
xorb on u8: _mm_xor_si128
xorb on i16: _mm_xor_si128
xorb on u16: _mm_xor_si128
xorb on i32: _mm_xor_si128
xorb on u32: _mm_xor_si128
xorb on i64: _mm_xor_si128
xorb on u64: _mm_xor_si128
xorb on f16: T
xorb on f32: _mm_xor_ps
xorb on f64: _mm_xor_pd
xorl on i8: _mm_xor_si128
xorl on u8: _mm_xor_si128
xorl on i16: _mm_xor_si128
xorl on u16: _mm_xor_si128
xorl on i32: _mm_xor_si128
xorl on u32: _mm_xor_si128
xorl on i64: _mm_xor_si128
xorl on u64: _mm_xor_si128
xorl on f16: T
xorl on f32: _mm_xor_ps
xorl on f64: _mm_xor_pd
zip on i8: T
zip on u8: T
zip on i16: T
zip on u16: T
zip on i32: T
zip on u32: T
zip on i64: T
zip on u64: T
zip on f16: T
zip on f32: T
zip on f64: T
ziphi on i8: _mm_unpackhi_epi8
ziphi on u8: _mm_unpackhi_epi8
ziphi on i16: _mm_unpackhi_epi16
ziphi on u16: _mm_unpackhi_epi16
ziphi on i32: _mm_unpackhi_epi32
ziphi on u32: _mm_unpackhi_epi32
ziphi on i64: _mm_unpackhi_epi64
ziphi on u64: _mm_unpackhi_epi64
ziphi on f16: T
ziphi on f32: _mm_unpackhi_ps
ziphi on f64: _mm_unpackhi_pd
ziplo on i8: _mm_unpacklo_epi8
ziplo on u8: _mm_unpacklo_epi8
ziplo on i16: _mm_unpacklo_epi16
ziplo on u16: _mm_unpacklo_epi16
ziplo on i32: _mm_unpacklo_epi32
ziplo on u32: _mm_unpacklo_epi32
ziplo on i64: _mm_unpacklo_epi64
ziplo on u64: _mm_unpacklo_epi64
ziplo on f16: T
ziplo on f32: _mm_unpacklo_ps
ziplo on f64: _mm_unpacklo_pd
cvt from i8 to i8: NOOP
cvt from i8 to u8: NOOP
cvt from u8 to i8: NOOP
cvt from u8 to u8: NOOP
cvt from i16 to i16: NOOP
cvt from i16 to u16: NOOP
cvt from i16 to f16: T
cvt from u16 to i16: NOOP
cvt from u16 to u16: NOOP
cvt from u16 to f16: T
cvt from i32 to i32: NOOP
cvt from i32 to u32: NOOP
cvt from i32 to f32: _mm_cvtepi32_ps
cvt from u32 to i32: NOOP
cvt from u32 to u32: NOOP
cvt from u32 to f32: E
cvt from i64 to i64: NOOP
cvt from i64 to u64: NOOP
cvt from i64 to f64: E
cvt from u64 to i64: NOOP
cvt from u64 to u64: NOOP
cvt from u64 to f64: E
cvt from f16 to i16: E
cvt from f16 to u16: E
cvt from f16 to f16: NOOP
cvt from f32 to i32: _mm_cvtps_epi32
cvt from f32 to u32: E
cvt from f32 to f32: NOOP
cvt from f64 to i64: E
cvt from f64 to u64: E
cvt from f64 to f64: NOOP
reinterpret from i8 to i8: NOOP
reinterpret from i8 to u8: NOOP
reinterpret from u8 to i8: NOOP
reinterpret from u8 to u8: NOOP
reinterpret from i16 to i16: NOOP
reinterpret from i16 to u16: NOOP
reinterpret from i16 to f16: T
reinterpret from u16 to i16: NOOP
reinterpret from u16 to u16: NOOP
reinterpret from u16 to f16: T
reinterpret from i32 to i32: NOOP
reinterpret from i32 to u32: NOOP
reinterpret from i32 to f32: NOOP
reinterpret from u32 to i32: NOOP
reinterpret from u32 to u32: NOOP
reinterpret from u32 to f32: NOOP
reinterpret from i64 to i64: NOOP
reinterpret from i64 to u64: NOOP
reinterpret from i64 to f64: NOOP
reinterpret from u64 to i64: NOOP
reinterpret from u64 to u64: NOOP
reinterpret from u64 to f64: NOOP
reinterpret from f16 to i16: T
reinterpret from f16 to u16: T
reinterpret from f16 to f16: NOOP
reinterpret from f32 to i32: NOOP
reinterpret from f32 to u32: NOOP
reinterpret from f32 to f32: NOOP
reinterpret from f64 to i64: NOOP
reinterpret from f64 to u64: NOOP
reinterpret from f64 to f64: NOOP
reinterpretl from i8 to i8: NOOP
reinterpretl from i8 to u8: NOOP
reinterpretl from u8 to i8: NOOP
reinterpretl from u8 to u8: NOOP
reinterpretl from i16 to i16: NOOP
reinterpretl from i16 to u16: NOOP
reinterpretl from i16 to f16: T
reinterpretl from u16 to i16: NOOP
reinterpretl from u16 to u16: NOOP
reinterpretl from u16 to f16: T
reinterpretl from i32 to i32: NOOP
reinterpretl from i32 to u32: NOOP
reinterpretl from i32 to f32: NOOP
reinterpretl from u32 to i32: NOOP
reinterpretl from u32 to u32: NOOP
reinterpretl from u32 to f32: NOOP
reinterpretl from i64 to i64: NOOP
reinterpretl from i64 to u64: NOOP
reinterpretl from i64 to f64: NOOP
reinterpretl from u64 to i64: NOOP
reinterpretl from u64 to u64: NOOP
reinterpretl from u64 to f64: NOOP
reinterpretl from f16 to i16: E
reinterpretl from f16 to u16: E
reinterpretl from f16 to f16: NOOP
reinterpretl from f32 to i32: NOOP
reinterpretl from f32 to u32: NOOP
reinterpretl from f32 to f32: NOOP
reinterpretl from f64 to i64: NOOP
reinterpretl from f64 to u64: NOOP
reinterpretl from f64 to f64: NOOP
upcvt from i8 to i16: T
upcvt from i8 to u16: T
upcvt from i8 to f16: T
upcvt from u8 to i16: T
upcvt from u8 to u16: T
upcvt from u8 to f16: T
upcvt from i16 to i32: T
upcvt from i16 to u32: T
upcvt from i16 to f32: T
upcvt from u16 to i32: T
upcvt from u16 to u32: T
upcvt from u16 to f32: T
upcvt from i32 to i64: T
upcvt from i32 to u64: T
upcvt from i32 to f64: T
upcvt from u32 to i64: T
upcvt from u32 to u64: T
upcvt from u32 to f64: T
upcvt from f16 to i32: T
upcvt from f16 to u32: T
upcvt from f16 to f32: T
upcvt from f32 to i64: T
upcvt from f32 to u64: T
upcvt from f32 to f64: T
downcvt from i16 to i8: E
downcvt from i16 to u8: E
downcvt from u16 to i8: E
downcvt from u16 to u8: E
downcvt from i32 to i16: E
downcvt from i32 to u16: E
downcvt from i32 to f16: T
downcvt from u32 to i16: E
downcvt from u32 to u16: E
downcvt from u32 to f16: T
downcvt from i64 to i32: E
downcvt from i64 to u32: E
downcvt from i64 to f32: E
downcvt from u64 to i32: E
downcvt from u64 to u32: E
downcvt from u64 to f32: E
downcvt from f16 to i8: E
downcvt from f16 to u8: E
downcvt from f32 to i16: E
downcvt from f32 to u16: E
downcvt from f32 to f16: T
downcvt from f64 to i32: E
downcvt from f64 to u32: E
downcvt from f64 to f32: T