Notations are as follows:
T
for trick usually using other intrinsics
E
for scalar emulation
NOOP
for no operation
NA
means the operator does not exist for the given type
intrinsic
for the actual wrapped intrinsic
abs on i8: vabsq_s8
abs on u8: NOOP
abs on i16: vabsq_s16
abs on u16: NOOP
abs on i32: vabsq_s32
abs on u32: NOOP
abs on i64: vabsq_s64
abs on u64: NOOP
abs on f16: T
abs on f32: vabsq_f32
abs on f64: vabsq_f64
add on i8: vaddq_s8
add on u8: vaddq_u8
add on i16: vaddq_s16
add on u16: vaddq_u16
add on i32: vaddq_s32
add on u32: vaddq_u32
add on i64: vaddq_s64
add on u64: vaddq_u64
add on f16: T
add on f32: vaddq_f32
add on f64: vaddq_f64
adds on i8: vqaddq_s8
adds on u8: vqaddq_u8
adds on i16: vqaddq_s16
adds on u16: vqaddq_u16
adds on i32: vqaddq_s32
adds on u32: vqaddq_u32
adds on i64: vqaddq_s64
adds on u64: vqaddq_u64
adds on f16: T
adds on f32: T
adds on f64: T
addv on i8: NA
addv on u8: NA
addv on i16: NA
addv on u16: NA
addv on i32: NA
addv on u32: NA
addv on i64: NA
addv on u64: NA
addv on f16: T
addv on f32: vaddvq_f32
addv on f64: vaddvq_f64
all on i8: T
all on u8: T
all on i16: T
all on u16: T
all on i32: T
all on u32: T
all on i64: T
all on u64: T
all on f16: T
all on f32: T
all on f64: T
andb on i8: vandq_s8
andb on u8: vandq_u8
andb on i16: vandq_s16
andb on u16: vandq_u16
andb on i32: vandq_s32
andb on u32: vandq_u32
andb on i64: vandq_s64
andb on u64: vandq_u64
andb on f16: T
andb on f32: vandq_u32
andb on f64: vandq_u64
andl on i8: vandq_u8
andl on u8: vandq_u8
andl on i16: vandq_u16
andl on u16: vandq_u16
andl on i32: vandq_u32
andl on u32: vandq_u32
andl on i64: vandq_u64
andl on u64: vandq_u64
andl on f16: T
andl on f32: vandq_u32
andl on f64: vandq_u64
andnotb on i8: vbicq_s8
andnotb on u8: vbicq_u8
andnotb on i16: vbicq_s16
andnotb on u16: vbicq_u16
andnotb on i32: vbicq_s32
andnotb on u32: vbicq_u32
andnotb on i64: vbicq_s64
andnotb on u64: vbicq_u64
andnotb on f16: T
andnotb on f32: vbicq_u32
andnotb on f64: vbicq_u64
andnotl on i8: vbicq_u8
andnotl on u8: vbicq_u8
andnotl on i16: vbicq_u16
andnotl on u16: vbicq_u16
andnotl on i32: vbicq_u32
andnotl on u32: vbicq_u32
andnotl on i64: vbicq_u64
andnotl on u64: vbicq_u64
andnotl on f16: T
andnotl on f32: vbicq_u32
andnotl on f64: vbicq_u64
any on i8: T
any on u8: T
any on i16: T
any on u16: T
any on i32: T
any on u32: T
any on i64: T
any on u64: T
any on f16: T
any on f32: T
any on f64: T
ceil on i8: NOOP
ceil on u8: NOOP
ceil on i16: NOOP
ceil on u16: NOOP
ceil on i32: NOOP
ceil on u32: NOOP
ceil on i64: NOOP
ceil on u64: NOOP
ceil on f16: T
ceil on f32: vrndpq_f32
ceil on f64: vrndpq_f64
div on i8: E
div on u8: E
div on i16: E
div on u16: E
div on i32: E
div on u32: E
div on i64: E
div on u64: E
div on f16: T
div on f32: vdivq_f32
div on f64: vdivq_f64
eq on i8: vceqq_s8
eq on u8: vceqq_u8
eq on i16: vceqq_s16
eq on u16: vceqq_u16
eq on i32: vceqq_s32
eq on u32: vceqq_u32
eq on i64: vceqq_s64
eq on u64: vceqq_u64
eq on f16: T
eq on f32: vceqq_f32
eq on f64: vceqq_f64
floor on i8: NOOP
floor on u8: NOOP
floor on i16: NOOP
floor on u16: NOOP
floor on i32: NOOP
floor on u32: NOOP
floor on i64: NOOP
floor on u64: NOOP
floor on f16: T
floor on f32: vrndmq_f32
floor on f64: vrndmq_f64
fma on i8: vmlaq_s8
fma on u8: vmlaq_u8
fma on i16: vmlaq_s16
fma on u16: vmlaq_u16
fma on i32: vmlaq_s32
fma on u32: vmlaq_u32
fma on i64: E
fma on u64: E
fma on f16: E
fma on f32: vfmaq_f32
fma on f64: vfmaq_f64
fms on i8: T
fms on u8: T
fms on i16: T
fms on u16: T
fms on i32: T
fms on u32: T
fms on i64: T
fms on u64: T
fms on f16: T
fms on f32: T
fms on f64: T
fnma on i8: vmlsq_s8
fnma on u8: vmlsq_u8
fnma on i16: vmlsq_s16
fnma on u16: vmlsq_u16
fnma on i32: vmlsq_s32
fnma on u32: vmlsq_u32
fnma on i64: E
fnma on u64: E
fnma on f16: E
fnma on f32: vfmsq_f32
fnma on f64: vfmsq_f64
fnms on i8: T
fnms on u8: T
fnms on i16: T
fnms on u16: T
fnms on i32: T
fnms on u32: T
fnms on i64: T
fnms on u64: T
fnms on f16: T
fnms on f32: T
fnms on f64: T
gather on i8: NA
gather on u8: NA
gather on i16: T
gather on u16: T
gather on i32: T
gather on u32: T
gather on i64: T
gather on u64: T
gather on f16: T
gather on f32: T
gather on f64: T
gather_linear on i8: T
gather_linear on u8: T
gather_linear on i16: T
gather_linear on u16: T
gather_linear on i32: T
gather_linear on u32: T
gather_linear on i64: T
gather_linear on u64: T
gather_linear on f16: E
gather_linear on f32: T
gather_linear on f64: T
ge on i8: vcgeq_s8
ge on u8: vcgeq_u8
ge on i16: vcgeq_s16
ge on u16: vcgeq_u16
ge on i32: vcgeq_s32
ge on u32: vcgeq_u32
ge on i64: vcgeq_s64
ge on u64: vcgeq_u64
ge on f16: T
ge on f32: vcgeq_f32
ge on f64: vcgeq_f64
gt on i8: vcgtq_s8
gt on u8: vcgtq_u8
gt on i16: vcgtq_s16
gt on u16: vcgtq_u16
gt on i32: vcgtq_s32
gt on u32: vcgtq_u32
gt on i64: vcgtq_s64
gt on u64: vcgtq_u64
gt on f16: T
gt on f32: vcgtq_f32
gt on f64: vcgtq_f64
if_else1 on i8: vbslq_s8
if_else1 on u8: vbslq_u8
if_else1 on i16: vbslq_s16
if_else1 on u16: vbslq_u16
if_else1 on i32: vbslq_s32
if_else1 on u32: vbslq_u32
if_else1 on i64: vbslq_s64
if_else1 on u64: vbslq_u64
if_else1 on f16: T
if_else1 on f32: vbslq_f32
if_else1 on f64: vbslq_f64
iota on i8: T
iota on u8: T
iota on i16: T
iota on u16: T
iota on i32: T
iota on u32: T
iota on i64: T
iota on u64: T
iota on f16: T
iota on f32: T
iota on f64: T
le on i8: vcleq_s8
le on u8: vcleq_u8
le on i16: vcleq_s16
le on u16: vcleq_u16
le on i32: vcleq_s32
le on u32: vcleq_u32
le on i64: vcleq_s64
le on u64: vcleq_u64
le on f16: T
le on f32: vcleq_f32
le on f64: vcleq_f64
len on i8: NOOP
len on u8: NOOP
len on i16: NOOP
len on u16: NOOP
len on i32: NOOP
len on u32: NOOP
len on i64: NOOP
len on u64: NOOP
len on f16: NOOP
len on f32: NOOP
len on f64: NOOP
load2a on i8: T
load2a on u8: T
load2a on i16: T
load2a on u16: T
load2a on i32: T
load2a on u32: T
load2a on i64: T
load2a on u64: T
load2a on f16: T
load2a on f32: T
load2a on f64: T
load2u on i8: T
load2u on u8: T
load2u on i16: T
load2u on u16: T
load2u on i32: T
load2u on u32: T
load2u on i64: T
load2u on u64: T
load2u on f16: T
load2u on f32: T
load2u on f64: T
load3a on i8: T
load3a on u8: T
load3a on i16: T
load3a on u16: T
load3a on i32: T
load3a on u32: T
load3a on i64: T
load3a on u64: T
load3a on f16: T
load3a on f32: T
load3a on f64: T
load3u on i8: T
load3u on u8: T
load3u on i16: T
load3u on u16: T
load3u on i32: T
load3u on u32: T
load3u on i64: T
load3u on u64: T
load3u on f16: T
load3u on f32: T
load3u on f64: T
load4a on i8: T
load4a on u8: T
load4a on i16: T
load4a on u16: T
load4a on i32: T
load4a on u32: T
load4a on i64: T
load4a on u64: T
load4a on f16: T
load4a on f32: T
load4a on f64: T
load4u on i8: T
load4u on u8: T
load4u on i16: T
load4u on u16: T
load4u on i32: T
load4u on u32: T
load4u on i64: T
load4u on u64: T
load4u on f16: T
load4u on f32: T
load4u on f64: T
loada on i8: vld1q_s8
loada on u8: vld1q_u8
loada on i16: vld1q_s16
loada on u16: vld1q_u16
loada on i32: vld1q_s32
loada on u32: vld1q_u32
loada on i64: vld1q_s64
loada on u64: vld1q_u64
loada on f16: T
loada on f32: vld1q_f32
loada on f64: vld1q_f64
loadla on i8: T
loadla on u8: T
loadla on i16: T
loadla on u16: T
loadla on i32: T
loadla on u32: T
loadla on i64: T
loadla on u64: T
loadla on f16: T
loadla on f32: T
loadla on f64: T
loadlu on i8: T
loadlu on u8: T
loadlu on i16: T
loadlu on u16: T
loadlu on i32: T
loadlu on u32: T
loadlu on i64: T
loadlu on u64: T
loadlu on f16: T
loadlu on f32: T
loadlu on f64: T
loadu on i8: vld1q_s8
loadu on u8: vld1q_u8
loadu on i16: vld1q_s16
loadu on u16: vld1q_u16
loadu on i32: vld1q_s32
loadu on u32: vld1q_u32
loadu on i64: vld1q_s64
loadu on u64: vld1q_u64
loadu on f16: T
loadu on f32: vld1q_f32
loadu on f64: vld1q_f64
lt on i8: vcltq_s8
lt on u8: vcltq_u8
lt on i16: vcltq_s16
lt on u16: vcltq_u16
lt on i32: vcltq_s32
lt on u32: vcltq_u32
lt on i64: vcltq_s64
lt on u64: vcltq_u64
lt on f16: T
lt on f32: vcltq_f32
lt on f64: vcltq_f64
mask_for_loop_tail on i8: T
mask_for_loop_tail on u8: T
mask_for_loop_tail on i16: T
mask_for_loop_tail on u16: T
mask_for_loop_tail on i32: T
mask_for_loop_tail on u32: T
mask_for_loop_tail on i64: T
mask_for_loop_tail on u64: T
mask_for_loop_tail on f16: T
mask_for_loop_tail on f32: T
mask_for_loop_tail on f64: T
mask_storea1 on i8: E
mask_storea1 on u8: E
mask_storea1 on i16: E
mask_storea1 on u16: E
mask_storea1 on i32: E
mask_storea1 on u32: E
mask_storea1 on i64: E
mask_storea1 on u64: E
mask_storea1 on f16: E
mask_storea1 on f32: E
mask_storea1 on f64: E
mask_storeu1 on i8: E
mask_storeu1 on u8: E
mask_storeu1 on i16: E
mask_storeu1 on u16: E
mask_storeu1 on i32: E
mask_storeu1 on u32: E
mask_storeu1 on i64: E
mask_storeu1 on u64: E
mask_storeu1 on f16: E
mask_storeu1 on f32: E
mask_storeu1 on f64: E
masko_loada1 on i8: E
masko_loada1 on u8: E
masko_loada1 on i16: E
masko_loada1 on u16: E
masko_loada1 on i32: E
masko_loada1 on u32: E
masko_loada1 on i64: E
masko_loada1 on u64: E
masko_loada1 on f16: E
masko_loada1 on f32: E
masko_loada1 on f64: E
masko_loadu1 on i8: E
masko_loadu1 on u8: E
masko_loadu1 on i16: E
masko_loadu1 on u16: E
masko_loadu1 on i32: E
masko_loadu1 on u32: E
masko_loadu1 on i64: E
masko_loadu1 on u64: E
masko_loadu1 on f16: E
masko_loadu1 on f32: E
masko_loadu1 on f64: E
maskz_loada1 on i8: E
maskz_loada1 on u8: E
maskz_loada1 on i16: E
maskz_loada1 on u16: E
maskz_loada1 on i32: E
maskz_loada1 on u32: E
maskz_loada1 on i64: E
maskz_loada1 on u64: E
maskz_loada1 on f16: E
maskz_loada1 on f32: E
maskz_loada1 on f64: E
maskz_loadu1 on i8: E
maskz_loadu1 on u8: E
maskz_loadu1 on i16: E
maskz_loadu1 on u16: E
maskz_loadu1 on i32: E
maskz_loadu1 on u32: E
maskz_loadu1 on i64: E
maskz_loadu1 on u64: E
maskz_loadu1 on f16: E
maskz_loadu1 on f32: E
maskz_loadu1 on f64: E
max on i8: vmaxq_s8
max on u8: vmaxq_u8
max on i16: vmaxq_s16
max on u16: vmaxq_u16
max on i32: vmaxq_s32
max on u32: vmaxq_u32
max on i64: T
max on u64: T
max on f16: T
max on f32: vmaxq_f32
max on f64: vmaxq_f64
min on i8: vminq_s8
min on u8: vminq_u8
min on i16: vminq_s16
min on u16: vminq_u16
min on i32: vminq_s32
min on u32: vminq_u32
min on i64: T
min on u64: T
min on f16: T
min on f32: vminq_f32
min on f64: vminq_f64
mul on i8: vmulq_s8
mul on u8: vmulq_u8
mul on i16: vmulq_s16
mul on u16: vmulq_u16
mul on i32: vmulq_s32
mul on u32: vmulq_u32
mul on i64: E
mul on u64: E
mul on f16: T
mul on f32: vmulq_f32
mul on f64: vmulq_f64
nbtrue on i8: vaddvq_s8
nbtrue on u8: vaddvq_s8
nbtrue on i16: vaddvq_s16
nbtrue on u16: vaddvq_s16
nbtrue on i32: vaddvq_s32
nbtrue on u32: vaddvq_s32
nbtrue on i64: T
nbtrue on u64: T
nbtrue on f16: T
nbtrue on f32: vaddvq_s32
nbtrue on f64: T
ne on i8: T
ne on u8: T
ne on i16: T
ne on u16: T
ne on i32: T
ne on u32: T
ne on i64: T
ne on u64: T
ne on f16: T
ne on f32: T
ne on f64: T
neg on i8: vnegq_s8
neg on u8: vnegq_s8
neg on i16: vnegq_s16
neg on u16: vnegq_s16
neg on i32: vnegq_s32
neg on u32: vnegq_s32
neg on i64: vnegq_s64
neg on u64: vnegq_s64
neg on f16: T
neg on f32: vnegq_f32
neg on f64: vnegq_f64
notb on i8: vmvnq_s8
notb on u8: vmvnq_u8
notb on i16: vmvnq_s16
notb on u16: vmvnq_u16
notb on i32: vmvnq_s32
notb on u32: vmvnq_u32
notb on i64: vmvnq_u32
notb on u64: vmvnq_u32
notb on f16: T
notb on f32: vmvnq_u32
notb on f64: vmvnq_u32
notl on i8: vmvnq_u8
notl on u8: vmvnq_u8
notl on i16: vmvnq_u16
notl on u16: vmvnq_u16
notl on i32: vmvnq_u32
notl on u32: vmvnq_u32
notl on i64: vmvnq_u32
notl on u64: vmvnq_u32
notl on f16: T
notl on f32: vmvnq_u32
notl on f64: vmvnq_u32
orb on i8: vorrq_s8
orb on u8: vorrq_u8
orb on i16: vorrq_s16
orb on u16: vorrq_u16
orb on i32: vorrq_s32
orb on u32: vorrq_u32
orb on i64: vorrq_s64
orb on u64: vorrq_u64
orb on f16: T
orb on f32: vorrq_u32
orb on f64: vorrq_u64
orl on i8: vorrq_u8
orl on u8: vorrq_u8
orl on i16: vorrq_u16
orl on u16: vorrq_u16
orl on i32: vorrq_u32
orl on u32: vorrq_u32
orl on i64: vorrq_u64
orl on u64: vorrq_u64
orl on f16: T
orl on f32: vorrq_u32
orl on f64: vorrq_u64
rec on i8: NA
rec on u8: NA
rec on i16: NA
rec on u16: NA
rec on i32: NA
rec on u32: NA
rec on i64: NA
rec on u64: NA
rec on f16: T
rec on f32: T
rec on f64: T
rec11 on i8: NA
rec11 on u8: NA
rec11 on i16: NA
rec11 on u16: NA
rec11 on i32: NA
rec11 on u32: NA
rec11 on i64: NA
rec11 on u64: NA
rec11 on f16: T
rec11 on f32: T
rec11 on f64: T
rec8 on i8: NA
rec8 on u8: NA
rec8 on i16: NA
rec8 on u16: NA
rec8 on i32: NA
rec8 on u32: NA
rec8 on i64: NA
rec8 on u64: NA
rec8 on f16: T
rec8 on f32: vrecpeq_f32
rec8 on f64: vrecpeq_f64
round_to_even on i8: NOOP
round_to_even on u8: NOOP
round_to_even on i16: NOOP
round_to_even on u16: NOOP
round_to_even on i32: NOOP
round_to_even on u32: NOOP
round_to_even on i64: NOOP
round_to_even on u64: NOOP
round_to_even on f16: T
round_to_even on f32: vrndnq_f32
round_to_even on f64: vrndnq_f64
rsqrt11 on i8: NA
rsqrt11 on u8: NA
rsqrt11 on i16: NA
rsqrt11 on u16: NA
rsqrt11 on i32: NA
rsqrt11 on u32: NA
rsqrt11 on i64: NA
rsqrt11 on u64: NA
rsqrt11 on f16: T
rsqrt11 on f32: T
rsqrt11 on f64: T
rsqrt8 on i8: NA
rsqrt8 on u8: NA
rsqrt8 on i16: NA
rsqrt8 on u16: NA
rsqrt8 on i32: NA
rsqrt8 on u32: NA
rsqrt8 on i64: NA
rsqrt8 on u64: NA
rsqrt8 on f16: T
rsqrt8 on f32: vrsqrteq_f32
rsqrt8 on f64: vrsqrteq_f64
scatter on i8: NA
scatter on u8: NA
scatter on i16: T
scatter on u16: T
scatter on i32: T
scatter on u32: T
scatter on i64: T
scatter on u64: T
scatter on f16: T
scatter on f32: T
scatter on f64: T
scatter_linear on i8: T
scatter_linear on u8: T
scatter_linear on i16: T
scatter_linear on u16: T
scatter_linear on i32: T
scatter_linear on u32: T
scatter_linear on i64: T
scatter_linear on u64: T
scatter_linear on f16: E
scatter_linear on f32: T
scatter_linear on f64: T
set1 on i8: vdupq_n_s8
set1 on u8: vdupq_n_u8
set1 on i16: vdupq_n_s16
set1 on u16: vdupq_n_u16
set1 on i32: vdupq_n_s32
set1 on u32: vdupq_n_u32
set1 on i64: vdupq_n_s64
set1 on u64: vdupq_n_u64
set1 on f16: T
set1 on f32: vdupq_n_f32
set1 on f64: vdupq_n_f64
set1l on i8: T
set1l on u8: T
set1l on i16: T
set1l on u16: T
set1l on i32: T
set1l on u32: T
set1l on i64: T
set1l on u64: T
set1l on f16: T
set1l on f32: T
set1l on f64: T
shl on i8: T
shl on u8: T
shl on i16: T
shl on u16: T
shl on i32: T
shl on u32: T
shl on i64: T
shl on u64: T
shl on f16: NA
shl on f32: NA
shl on f64: NA
shr on i8: T
shr on u8: T
shr on i16: T
shr on u16: T
shr on i32: T
shr on u32: T
shr on i64: T
shr on u64: T
shr on f16: NA
shr on f32: NA
shr on f64: NA
shra on i8: T
shra on u8: T
shra on i16: T
shra on u16: T
shra on i32: T
shra on u32: T
shra on i64: T
shra on u64: T
shra on f16: NA
shra on f32: NA
shra on f64: NA
sqrt on i8: NA
sqrt on u8: NA
sqrt on i16: NA
sqrt on u16: NA
sqrt on i32: NA
sqrt on u32: NA
sqrt on i64: NA
sqrt on u64: NA
sqrt on f16: T
sqrt on f32: vsqrtq_f32
sqrt on f64: vsqrtq_f64
store2a on i8: T
store2a on u8: T
store2a on i16: T
store2a on u16: T
store2a on i32: T
store2a on u32: T
store2a on i64: T
store2a on u64: T
store2a on f16: T
store2a on f32: T
store2a on f64: T
store2u on i8: T
store2u on u8: T
store2u on i16: T
store2u on u16: T
store2u on i32: T
store2u on u32: T
store2u on i64: T
store2u on u64: T
store2u on f16: T
store2u on f32: T
store2u on f64: T
store3a on i8: T
store3a on u8: T
store3a on i16: T
store3a on u16: T
store3a on i32: T
store3a on u32: T
store3a on i64: T
store3a on u64: T
store3a on f16: T
store3a on f32: T
store3a on f64: T
store3u on i8: T
store3u on u8: T
store3u on i16: T
store3u on u16: T
store3u on i32: T
store3u on u32: T
store3u on i64: T
store3u on u64: T
store3u on f16: T
store3u on f32: T
store3u on f64: T
store4a on i8: T
store4a on u8: T
store4a on i16: T
store4a on u16: T
store4a on i32: T
store4a on u32: T
store4a on i64: T
store4a on u64: T
store4a on f16: T
store4a on f32: T
store4a on f64: T
store4u on i8: T
store4u on u8: T
store4u on i16: T
store4u on u16: T
store4u on i32: T
store4u on u32: T
store4u on i64: T
store4u on u64: T
store4u on f16: T
store4u on f32: T
store4u on f64: T
storea on i8: vst1q_s8
storea on u8: vst1q_u8
storea on i16: vst1q_s16
storea on u16: vst1q_u16
storea on i32: vst1q_s32
storea on u32: vst1q_u32
storea on i64: vst1q_s64
storea on u64: vst1q_u64
storea on f16: T
storea on f32: vst1q_f32
storea on f64: vst1q_f64
storela on i8: T
storela on u8: T
storela on i16: T
storela on u16: T
storela on i32: T
storela on u32: T
storela on i64: T
storela on u64: T
storela on f16: T
storela on f32: T
storela on f64: T
storelu on i8: T
storelu on u8: T
storelu on i16: T
storelu on u16: T
storelu on i32: T
storelu on u32: T
storelu on i64: T
storelu on u64: T
storelu on f16: T
storelu on f32: T
storelu on f64: T
storeu on i8: vst1q_s8
storeu on u8: vst1q_u8
storeu on i16: vst1q_s16
storeu on u16: vst1q_u16
storeu on i32: vst1q_s32
storeu on u32: vst1q_u32
storeu on i64: vst1q_s64
storeu on u64: vst1q_u64
storeu on f16: T
storeu on f32: vst1q_f32
storeu on f64: vst1q_f64
sub on i8: vsubq_s8
sub on u8: vsubq_u8
sub on i16: vsubq_s16
sub on u16: vsubq_u16
sub on i32: vsubq_s32
sub on u32: vsubq_u32
sub on i64: vsubq_s64
sub on u64: vsubq_u64
sub on f16: T
sub on f32: vsubq_f32
sub on f64: vsubq_f64
subs on i8: vqsubq_s8
subs on u8: vqsubq_u8
subs on i16: vqsubq_s16
subs on u16: vqsubq_u16
subs on i32: vqsubq_s32
subs on u32: vqsubq_u32
subs on i64: vqsubq_s64
subs on u64: vqsubq_u64
subs on f16: T
subs on f32: T
subs on f64: T
to_logical on i8: T
to_logical on u8: T
to_logical on i16: T
to_logical on u16: T
to_logical on i32: T
to_logical on u32: T
to_logical on i64: T
to_logical on u64: T
to_logical on f16: T
to_logical on f32: T
to_logical on f64: T
to_mask on i8: NOOP
to_mask on u8: NOOP
to_mask on i16: NOOP
to_mask on u16: NOOP
to_mask on i32: NOOP
to_mask on u32: NOOP
to_mask on i64: NOOP
to_mask on u64: NOOP
to_mask on f16: T
to_mask on f32: NOOP
to_mask on f64: NOOP
trunc on i8: NOOP
trunc on u8: NOOP
trunc on i16: NOOP
trunc on u16: NOOP
trunc on i32: NOOP
trunc on u32: NOOP
trunc on i64: NOOP
trunc on u64: NOOP
trunc on f16: T
trunc on f32: vrndq_f32
trunc on f64: vrndq_f64
unzip on i8: T
unzip on u8: T
unzip on i16: T
unzip on u16: T
unzip on i32: T
unzip on u32: T
unzip on i64: T
unzip on u64: T
unzip on f16: T
unzip on f32: T
unzip on f64: T
unziphi on i8: vuzp2q_s8
unziphi on u8: vuzp2q_u8
unziphi on i16: vuzp2q_s16
unziphi on u16: vuzp2q_u16
unziphi on i32: vuzp2q_s32
unziphi on u32: vuzp2q_u32
unziphi on i64: vuzp2q_s64
unziphi on u64: vuzp2q_u64
unziphi on f16: T
unziphi on f32: vuzp2q_f32
unziphi on f64: vuzp2q_f64
unziplo on i8: vuzp1q_s8
unziplo on u8: vuzp1q_u8
unziplo on i16: vuzp1q_s16
unziplo on u16: vuzp1q_u16
unziplo on i32: vuzp1q_s32
unziplo on u32: vuzp1q_u32
unziplo on i64: vuzp1q_s64
unziplo on u64: vuzp1q_u64
unziplo on f16: T
unziplo on f32: vuzp1q_f32
unziplo on f64: vuzp1q_f64
xorb on i8: veorq_s8
xorb on u8: veorq_u8
xorb on i16: veorq_s16
xorb on u16: veorq_u16
xorb on i32: veorq_s32
xorb on u32: veorq_u32
xorb on i64: veorq_s64
xorb on u64: veorq_u64
xorb on f16: T
xorb on f32: veorq_u32
xorb on f64: veorq_u64
xorl on i8: veorq_u8
xorl on u8: veorq_u8
xorl on i16: veorq_u16
xorl on u16: veorq_u16
xorl on i32: veorq_u32
xorl on u32: veorq_u32
xorl on i64: veorq_u64
xorl on u64: veorq_u64
xorl on f16: T
xorl on f32: veorq_u32
xorl on f64: veorq_u64
zip on i8: T
zip on u8: T
zip on i16: T
zip on u16: T
zip on i32: T
zip on u32: T
zip on i64: T
zip on u64: T
zip on f16: T
zip on f32: T
zip on f64: T
ziphi on i8: vzip2q_s8
ziphi on u8: vzip2q_u8
ziphi on i16: vzip2q_s16
ziphi on u16: vzip2q_u16
ziphi on i32: vzip2q_s32
ziphi on u32: vzip2q_u32
ziphi on i64: vzip2q_s64
ziphi on u64: vzip2q_u64
ziphi on f16: T
ziphi on f32: vzip2q_f32
ziphi on f64: vzip2q_f64
ziplo on i8: vzip1q_s8
ziplo on u8: vzip1q_u8
ziplo on i16: vzip1q_s16
ziplo on u16: vzip1q_u16
ziplo on i32: vzip1q_s32
ziplo on u32: vzip1q_u32
ziplo on i64: vzip1q_s64
ziplo on u64: vzip1q_u64
ziplo on f16: T
ziplo on f32: vzip1q_f32
ziplo on f64: vzip1q_f64
cvt from i8 to i8: NOOP
cvt from i8 to u8: NOOP
cvt from u8 to i8: NOOP
cvt from u8 to u8: NOOP
cvt from i16 to i16: NOOP
cvt from i16 to u16: NOOP
cvt from i16 to f16: T
cvt from u16 to i16: NOOP
cvt from u16 to u16: NOOP
cvt from u16 to f16: T
cvt from i32 to i32: NOOP
cvt from i32 to u32: NOOP
cvt from i32 to f32: vcvtq_f32_s32
cvt from u32 to i32: NOOP
cvt from u32 to u32: NOOP
cvt from u32 to f32: vcvtq_f32_u32
cvt from i64 to i64: NOOP
cvt from i64 to u64: NOOP
cvt from i64 to f64: vcvtq_f64_s64
cvt from u64 to i64: NOOP
cvt from u64 to u64: NOOP
cvt from u64 to f64: vcvtq_f64_u64
cvt from f16 to i16: T
cvt from f16 to u16: T
cvt from f16 to f16: NOOP
cvt from f32 to i32: vcvtq_s32_f32
cvt from f32 to u32: vcvtq_u32_f32
cvt from f32 to f32: NOOP
cvt from f64 to i64: vcvtq_s64_f64
cvt from f64 to u64: vcvtq_u64_f64
cvt from f64 to f64: NOOP
reinterpret from i8 to i8: NOOP
reinterpret from i8 to u8: NOOP
reinterpret from u8 to i8: NOOP
reinterpret from u8 to u8: NOOP
reinterpret from i16 to i16: NOOP
reinterpret from i16 to u16: NOOP
reinterpret from i16 to f16: T
reinterpret from u16 to i16: NOOP
reinterpret from u16 to u16: NOOP
reinterpret from u16 to f16: T
reinterpret from i32 to i32: NOOP
reinterpret from i32 to u32: NOOP
reinterpret from i32 to f32: NOOP
reinterpret from u32 to i32: NOOP
reinterpret from u32 to u32: NOOP
reinterpret from u32 to f32: NOOP
reinterpret from i64 to i64: NOOP
reinterpret from i64 to u64: NOOP
reinterpret from i64 to f64: NOOP
reinterpret from u64 to i64: NOOP
reinterpret from u64 to u64: NOOP
reinterpret from u64 to f64: NOOP
reinterpret from f16 to i16: T
reinterpret from f16 to u16: T
reinterpret from f16 to f16: NOOP
reinterpret from f32 to i32: NOOP
reinterpret from f32 to u32: NOOP
reinterpret from f32 to f32: NOOP
reinterpret from f64 to i64: NOOP
reinterpret from f64 to u64: NOOP
reinterpret from f64 to f64: NOOP
reinterpretl from i8 to i8: NOOP
reinterpretl from i8 to u8: NOOP
reinterpretl from u8 to i8: NOOP
reinterpretl from u8 to u8: NOOP
reinterpretl from i16 to i16: NOOP
reinterpretl from i16 to u16: NOOP
reinterpretl from i16 to f16: T
reinterpretl from u16 to i16: NOOP
reinterpretl from u16 to u16: NOOP
reinterpretl from u16 to f16: T
reinterpretl from i32 to i32: NOOP
reinterpretl from i32 to u32: NOOP
reinterpretl from i32 to f32: NOOP
reinterpretl from u32 to i32: NOOP
reinterpretl from u32 to u32: NOOP
reinterpretl from u32 to f32: NOOP
reinterpretl from i64 to i64: NOOP
reinterpretl from i64 to u64: NOOP
reinterpretl from i64 to f64: NOOP
reinterpretl from u64 to i64: NOOP
reinterpretl from u64 to u64: NOOP
reinterpretl from u64 to f64: NOOP
reinterpretl from f16 to i16: T
reinterpretl from f16 to u16: T
reinterpretl from f16 to f16: NOOP
reinterpretl from f32 to i32: NOOP
reinterpretl from f32 to u32: NOOP
reinterpretl from f32 to f32: NOOP
reinterpretl from f64 to i64: NOOP
reinterpretl from f64 to u64: NOOP
reinterpretl from f64 to f64: NOOP
upcvt from i8 to i16: T
upcvt from i8 to u16: T
upcvt from i8 to f16: T
upcvt from u8 to i16: T
upcvt from u8 to u16: T
upcvt from u8 to f16: T
upcvt from i16 to i32: T
upcvt from i16 to u32: T
upcvt from i16 to f32: T
upcvt from u16 to i32: T
upcvt from u16 to u32: T
upcvt from u16 to f32: T
upcvt from i32 to i64: T
upcvt from i32 to u64: T
upcvt from i32 to f64: T
upcvt from u32 to i64: T
upcvt from u32 to u64: T
upcvt from u32 to f64: T
upcvt from f16 to i32: T
upcvt from f16 to u32: T
upcvt from f16 to f32: T
upcvt from f32 to i64: T
upcvt from f32 to u64: T
upcvt from f32 to f64: T
downcvt from i16 to i8: T
downcvt from i16 to u8: T
downcvt from u16 to i8: T
downcvt from u16 to u8: T
downcvt from i32 to i16: T
downcvt from i32 to u16: T
downcvt from i32 to f16: T
downcvt from u32 to i16: T
downcvt from u32 to u16: T
downcvt from u32 to f16: T
downcvt from i64 to i32: T
downcvt from i64 to u32: T
downcvt from i64 to f32: T
downcvt from u64 to i32: T
downcvt from u64 to u32: T
downcvt from u64 to f32: T
downcvt from f16 to i8: T
downcvt from f16 to u8: T
downcvt from f32 to i16: T
downcvt from f32 to u16: T
downcvt from f32 to f16: T
downcvt from f64 to i32: T
downcvt from f64 to u32: T
downcvt from f64 to f32: T