diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 9fc011f6f8ee32..e4910526fdd92d 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -14,6 +14,7 @@ enum CORINFO_InstructionSet { InstructionSet_ILLEGAL = 0, + InstructionSet_Vector = 126, InstructionSet_NONE = 127, #ifdef TARGET_ARM64 InstructionSet_ArmBase=1, diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 0544cd33ec9be4..a1a98b9c132fda 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -368,6 +368,7 @@ set( JIT_HEADERS host.h hostallocator.h hwintrinsic.h + hwintrinsiclist.h ICorJitInfo_names_generated.h ICorJitInfo_wrapper_generated.hpp inline.h diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 1a4eb8af5af569..29043e95b67844 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -340,16 +340,12 @@ static Range GetRange(Compiler* comp, GenTree* tree, BasicBlock* block, ASSERT_V switch (id) { #if defined(TARGET_XARCH) - case NI_Vector256_ExtractMostSignificantBits: - case NI_Vector512_ExtractMostSignificantBits: case NI_X86Base_MoveMask: case NI_AVX_MoveMask: case NI_AVX2_MoveMask: case NI_AVX512_MoveMask: -#elif defined(TARGET_ARM64) - case NI_Vector64_ExtractMostSignificantBits: #endif - case NI_Vector128_ExtractMostSignificantBits: + case NI_Vector_ExtractMostSignificantBits: { // We have 1 bit per element, remaining upper bits are 0 @@ -1948,23 +1944,10 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree) GenTreeHWIntrinsic* hwi = op1->AsHWIntrinsic(); switch (hwi->GetHWIntrinsicId()) { -#if defined(TARGET_XARCH) - case NI_Vector128_op_Equality: - case NI_Vector256_op_Equality: - case NI_Vector512_op_Equality: -#elif defined(TARGET_ARM64) - case NI_Vector64_op_Equality: - case NI_Vector128_op_Equality: -#endif + case NI_Vector_op_Equality: break; -#if defined(TARGET_XARCH) - case NI_Vector128_op_Inequality: - case NI_Vector256_op_Inequality: - case NI_Vector512_op_Inequality: -#elif defined(TARGET_ARM64) - case NI_Vector64_op_Inequality: - case NI_Vector128_op_Inequality: -#endif + + case NI_Vector_op_Inequality: equals = !equals; break; diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index ecb82d3a367eb1..b68d1f572eaa66 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5431,12 +5431,11 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) GenTreeHWIntrinsic* hwintrinsic = data->AsHWIntrinsic(); NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); var_types baseType = hwintrinsic->GetSimdBaseType(); + unsigned simdSize = hwintrinsic->GetSimdSize(); switch (intrinsicId) { - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: + case NI_Vector_ToScalar: case NI_X86Base_ConvertToInt32: case NI_X86Base_ConvertToUInt32: case NI_X86Base_X64_ConvertToInt64: @@ -5457,9 +5456,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) break; } - case NI_Vector128_GetElement: + case NI_Vector_GetElement: { assert(baseType == TYP_FLOAT); + assert(simdSize == 16); FALLTHROUGH; } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f11aa03e81c123..5977508f16df97 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3524,6 +3524,7 @@ class Compiler GenTree* gtNewSimdGetIndicesNode(var_types type, var_types simdBaseType, unsigned simdSize); +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) GenTree* gtNewSimdGetLowerNode(var_types type, GenTree* op1, var_types simdBaseType, @@ -3533,6 +3534,7 @@ class Compiler GenTree* op1, var_types simdBaseType, unsigned simdSize); +#endif // !TARGET_XARCH && !TARGET_ARM64 GenTree* gtNewSimdIsEvenIntegerNode(var_types type, GenTree* op1, @@ -3724,6 +3726,7 @@ class Compiler var_types simdBaseType, unsigned simdSize); +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) GenTree* gtNewSimdWithLowerNode(var_types type, GenTree* op1, GenTree* op2, @@ -3735,6 +3738,7 @@ class Compiler GenTree* op2, var_types simdBaseType, unsigned simdSize); +#endif // !TARGET_XARCH && !TARGET_ARM64 GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID); @@ -5358,6 +5362,16 @@ class Compiler unsigned simdSize, bool mustExpand); + GenTree* impXplatIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig + R2RARG(CORINFO_CONST_LOOKUP* entryPoint), + var_types simdBaseType, + var_types retType, + unsigned simdSize, + bool mustExpand); + GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass); GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types simdBaseType); GenTree* addRangeCheckIfNeeded( diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index 749ab0fdcd5e45..bfeee4d9cfae76 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -1909,16 +1909,12 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use) switch (hwintrinsicTree->GetHWIntrinsicId()) { - case NI_Vector128_GetElement: - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: + case NI_Vector_GetElement: { return DecomposeHWIntrinsicGetElement(use, hwintrinsicTree); } - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: + case NI_Vector_ToScalar: { return DecomposeHWIntrinsicToScalar(use, hwintrinsicTree); } @@ -1939,7 +1935,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use) } //------------------------------------------------------------------------ -// DecomposeHWIntrinsicGetElement: Decompose GT_HWINTRINSIC -- NI_Vector*_GetElement. +// DecomposeHWIntrinsicGetElement: Decompose GT_HWINTRINSIC -- NI_Vector_GetElement. // // Decompose a get[i] node on Vector*. For: // @@ -1953,7 +1949,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use) // hi_result = GT_HWINTRINSIC{GetElement}[int](tmp_simd_var, tmp_index_times_two + 1) // return: GT_LONG(lo_result, hi_result) // -// This isn't optimal codegen, since NI_Vector*_GetElement sometimes requires +// This isn't optimal codegen, since NI_Vector_GetElement sometimes requires // temps that could be shared, for example. // // Arguments: @@ -2048,7 +2044,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW } //------------------------------------------------------------------------ -// DecomposeHWIntrinsicToScalar: Decompose GT_HWINTRINSIC -- NI_Vector*_ToScalar. +// DecomposeHWIntrinsicToScalar: Decompose GT_HWINTRINSIC -- NI_Vector_ToScalar. // // create: // diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index f9abf2058dd1d4..71d25be333fcfd 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1191,13 +1191,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed case NI_ArmBase_Arm64_ReverseElementBits: case NI_ArmBase_LeadingZeroCount: case NI_ArmBase_ReverseElementBits: - case NI_Vector64_Create: - case NI_Vector64_CreateScalar: - case NI_Vector64_CreateScalarUnsafe: #endif // TARGET_ARM64 - case NI_Vector128_Create: - case NI_Vector128_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: + case NI_Vector_Create: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: #if defined(TARGET_XARCH) case NI_AVX2_LeadingZeroCount: case NI_AVX2_TrailingZeroCount: @@ -1205,12 +1202,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed case NI_AVX2_X64_TrailingZeroCount: case NI_X86Base_PopCount: case NI_X86Base_X64_PopCount: - case NI_Vector256_Create: - case NI_Vector512_Create: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: case NI_X86Base_BitScanForward: case NI_X86Base_X64_BitScanForward: case NI_X86Base_BitScanReverse: @@ -1425,67 +1416,21 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed } #if defined(FEATURE_HW_INTRINSICS) -#if defined(TARGET_ARM64) - case NI_Vector64_As: - case NI_Vector64_AsByte: - case NI_Vector64_AsDouble: - case NI_Vector64_AsInt16: - case NI_Vector64_AsInt32: - case NI_Vector64_AsInt64: - case NI_Vector64_AsNInt: - case NI_Vector64_AsNUInt: - case NI_Vector64_AsSByte: - case NI_Vector64_AsSingle: - case NI_Vector64_AsUInt16: - case NI_Vector64_AsUInt32: - case NI_Vector64_AsUInt64: - case NI_Vector64_op_UnaryPlus: -#endif // TARGET_ARM64 - case NI_Vector128_As: - case NI_Vector128_AsByte: - case NI_Vector128_AsDouble: - case NI_Vector128_AsInt16: - case NI_Vector128_AsInt32: - case NI_Vector128_AsInt64: - case NI_Vector128_AsNInt: - case NI_Vector128_AsNUInt: - case NI_Vector128_AsSByte: - case NI_Vector128_AsSingle: - case NI_Vector128_AsUInt16: - case NI_Vector128_AsUInt32: - case NI_Vector128_AsUInt64: - case NI_Vector128_AsVector4: - case NI_Vector128_op_UnaryPlus: -#if defined(TARGET_XARCH) - case NI_Vector256_As: - case NI_Vector256_AsByte: - case NI_Vector256_AsDouble: - case NI_Vector256_AsInt16: - case NI_Vector256_AsInt32: - case NI_Vector256_AsInt64: - case NI_Vector256_AsNInt: - case NI_Vector256_AsNUInt: - case NI_Vector256_AsSByte: - case NI_Vector256_AsSingle: - case NI_Vector256_AsUInt16: - case NI_Vector256_AsUInt32: - case NI_Vector256_AsUInt64: - case NI_Vector256_op_UnaryPlus: - case NI_Vector512_As: - case NI_Vector512_AsByte: - case NI_Vector512_AsDouble: - case NI_Vector512_AsInt16: - case NI_Vector512_AsInt32: - case NI_Vector512_AsInt64: - case NI_Vector512_AsNInt: - case NI_Vector512_AsNUInt: - case NI_Vector512_AsSByte: - case NI_Vector512_AsSingle: - case NI_Vector512_AsUInt16: - case NI_Vector512_AsUInt32: - case NI_Vector512_AsUInt64: - case NI_Vector512_op_UnaryPlus: -#endif // TARGET_XARCH + case NI_Vector_As: + case NI_Vector_AsByte: + case NI_Vector_AsDouble: + case NI_Vector_AsInt16: + case NI_Vector_AsInt32: + case NI_Vector_AsInt64: + case NI_Vector_AsNInt: + case NI_Vector_AsNUInt: + case NI_Vector_AsSByte: + case NI_Vector_AsSingle: + case NI_Vector_AsUInt16: + case NI_Vector_AsUInt32: + case NI_Vector_AsUInt64: + case NI_Vector_AsVector4: + case NI_Vector_op_UnaryPlus: #endif // FEATURE_HW_INTRINSICS case NI_SRCS_UNSAFE_As: case NI_SRCS_UNSAFE_AsRef: @@ -1502,58 +1447,18 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed } #if defined(FEATURE_HW_INTRINSICS) -#if defined(TARGET_ARM64) - case NI_Vector64_get_AllBitsSet: - case NI_Vector64_get_E: - case NI_Vector64_get_Epsilon: - case NI_Vector64_get_NaN: - case NI_Vector64_get_NegativeInfinity: - case NI_Vector64_get_NegativeOne: - case NI_Vector64_get_NegativeZero: - case NI_Vector64_get_One: - case NI_Vector64_get_Pi: - case NI_Vector64_get_PositiveInfinity: - case NI_Vector64_get_Tau: - case NI_Vector64_get_Zero: -#endif // TARGET_ARM64 - case NI_Vector128_get_AllBitsSet: - case NI_Vector128_get_E: - case NI_Vector128_get_Epsilon: - case NI_Vector128_get_NaN: - case NI_Vector128_get_NegativeInfinity: - case NI_Vector128_get_NegativeOne: - case NI_Vector128_get_NegativeZero: - case NI_Vector128_get_One: - case NI_Vector128_get_Pi: - case NI_Vector128_get_PositiveInfinity: - case NI_Vector128_get_Tau: - case NI_Vector128_get_Zero: -#if defined(TARGET_XARCH) - case NI_Vector256_get_AllBitsSet: - case NI_Vector256_get_E: - case NI_Vector256_get_Epsilon: - case NI_Vector256_get_NaN: - case NI_Vector256_get_NegativeInfinity: - case NI_Vector256_get_NegativeOne: - case NI_Vector256_get_NegativeZero: - case NI_Vector256_get_One: - case NI_Vector256_get_Pi: - case NI_Vector256_get_PositiveInfinity: - case NI_Vector256_get_Tau: - case NI_Vector256_get_Zero: - case NI_Vector512_get_AllBitsSet: - case NI_Vector512_get_E: - case NI_Vector512_get_Epsilon: - case NI_Vector512_get_NaN: - case NI_Vector512_get_NegativeInfinity: - case NI_Vector512_get_NegativeOne: - case NI_Vector512_get_NegativeZero: - case NI_Vector512_get_One: - case NI_Vector512_get_Pi: - case NI_Vector512_get_PositiveInfinity: - case NI_Vector512_get_Tau: - case NI_Vector512_get_Zero: -#endif // TARGET_XARCH + case NI_Vector_get_AllBitsSet: + case NI_Vector_get_E: + case NI_Vector_get_Epsilon: + case NI_Vector_get_NaN: + case NI_Vector_get_NegativeInfinity: + case NI_Vector_get_NegativeOne: + case NI_Vector_get_NegativeZero: + case NI_Vector_get_One: + case NI_Vector_get_Pi: + case NI_Vector_get_PositiveInfinity: + case NI_Vector_get_Tau: + case NI_Vector_get_Zero: #endif // FEATURE_HW_INTRINSICS { // These always produce a vector constant diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 790ea987d31e8d..deb7a5fb148672 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -3478,6 +3478,7 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) { GenTreeHWIntrinsic* hwintrinsic = tree->AsHWIntrinsic(); NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); + unsigned simdSize = hwintrinsic->GetSimdSize(); if (hwintrinsic->OperIsMemoryLoad()) { @@ -3516,9 +3517,9 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) break; } - case NI_Vector128_op_Division: - case NI_Vector256_op_Division: + case NI_Vector_op_Division: { + assert((simdSize == 16) || (simdSize == 32)); break; } #endif // TARGET_XARCH diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 81d50ebbc9cc94..d2c48e3f060f08 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -4106,9 +4106,7 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) { switch (intrinsicId) { - case NI_Vector128_ConditionalSelect: - case NI_Vector256_ConditionalSelect: - case NI_Vector512_ConditionalSelect: + case NI_Vector_ConditionalSelect: { // We either become `(o2 & op1) | (op3 & ~op1)` // or we get optimized into some kind of single @@ -4119,9 +4117,7 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) break; } - case NI_Vector128_Create: - case NI_Vector256_Create: - case NI_Vector512_Create: + case NI_Vector_Create: { // We shouldn't have "all constants" as they get transformed to CNS_VEC @@ -4145,12 +4141,8 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) break; } - case NI_Vector128_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalar: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalar: - case NI_Vector512_CreateScalarUnsafe: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: { // We shouldn't have "all constants" as they get transformed to CNS_VEC @@ -4173,9 +4165,7 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) break; } - case NI_Vector128_Dot: - case NI_Vector256_Dot: - case NI_Vector512_Dot: + case NI_Vector_Dot: { uint32_t elementCount = static_cast(16 / genTypeSize(simdBaseType)); @@ -4206,9 +4196,7 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) break; } - case NI_Vector128_ExtractMostSignificantBits: - case NI_Vector256_ExtractMostSignificantBits: - case NI_Vector512_ExtractMostSignificantBits: + case NI_Vector_ExtractMostSignificantBits: { costEx = 3; @@ -4228,9 +4216,7 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) break; } - case NI_Vector128_GetElement: - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: + case NI_Vector_GetElement: { GenTree* op2 = hwTree->Op(2); @@ -4291,33 +4277,34 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) break; } - case NI_Vector256_GetLower: - case NI_Vector512_GetLower: - case NI_Vector512_GetLower128: + case NI_Vector_GetLower: { + assert((simdSize == 32) || (simdSize == 64)); + assert(genTypeSize(retType) == (simdSize / 2)); costEx = 1; break; } - case NI_Vector256_GetUpper: - case NI_Vector512_GetUpper: + case NI_Vector_GetLower128: { + assert(simdSize == 64); + assert(retType == TYP_SIMD16); + costEx = 1; + break; + } + + case NI_Vector_GetUpper: + { + assert((simdSize == 32) || (simdSize == 64)); + assert(genTypeSize(retType) == (simdSize / 2)); costEx = 3; break; } - case NI_Vector128_Shuffle: - case NI_Vector128_ShuffleNative: - case NI_Vector128_ShuffleNativeFallback: - case NI_Vector256_Shuffle: - case NI_Vector256_ShuffleNative: - case NI_Vector256_ShuffleNativeFallback: - case NI_Vector512_Shuffle: - case NI_Vector512_ShuffleNative: - case NI_Vector512_ShuffleNativeFallback: - case NI_Vector128_CreateGeometricSequence: - case NI_Vector256_CreateGeometricSequence: - case NI_Vector512_CreateGeometricSequence: + case NI_Vector_Shuffle: + case NI_Vector_ShuffleNative: + case NI_Vector_ShuffleNativeFallback: + case NI_Vector_CreateGeometricSequence: { // These are likely becoming calls costEx = 5 + (3 * IND_COST_EX); @@ -4325,27 +4312,23 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) break; } - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: + case NI_Vector_ToScalar: { costEx = varTypeIsIntegral(simdBaseType) ? 3 : 1; break; } - case NI_Vector128_ToVector512: - case NI_Vector256_ToVector512: - case NI_Vector128_ToVector256: - case NI_Vector128_ToVector256Unsafe: - case NI_Vector256_ToVector512Unsafe: + case NI_Vector_ToVector256: + case NI_Vector_ToVector256Unsafe: + case NI_Vector_ToVector512: + case NI_Vector_ToVector512Unsafe: { + assert((simdSize == 16) || (simdSize == 32)); costEx = 1; break; } - case NI_Vector128_WithElement: - case NI_Vector256_WithElement: - case NI_Vector512_WithElement: + case NI_Vector_WithElement: { GenTree* op2 = hwTree->Op(2); @@ -4407,18 +4390,16 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) break; } - case NI_Vector256_WithLower: - case NI_Vector256_WithUpper: - case NI_Vector512_WithLower: - case NI_Vector512_WithUpper: + case NI_Vector_WithLower: + case NI_Vector_WithUpper: { + assert((simdSize == 32) || (simdSize == 64)); + assert(genTypeSize(retType) == simdSize); costEx = 3; break; } - case NI_Vector128_op_Division: - case NI_Vector256_op_Division: - case NI_Vector512_op_Division: + case NI_Vector_op_Division: { // We generate a fairly complex sequence involving // comparisons, two branches, conversions, and a fp @@ -4429,12 +4410,8 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) break; } - case NI_Vector128_op_Equality: - case NI_Vector128_op_Inequality: - case NI_Vector256_op_Equality: - case NI_Vector256_op_Inequality: - case NI_Vector512_op_Equality: - case NI_Vector512_op_Inequality: + case NI_Vector_op_Equality: + case NI_Vector_op_Inequality: { // We emit a simd compare, get mask, integer compare, // and a branch or setcc @@ -8621,8 +8598,8 @@ ExceptionSetFlags GenTree::OperExceptions(Compiler* comp) #ifdef TARGET_XARCH NamedIntrinsic intrinsicId = hwIntrinsicNode->GetHWIntrinsicId(); - if ((intrinsicId == NI_Vector128_op_Division) || (intrinsicId == NI_Vector256_op_Division) || - (intrinsicId == NI_Vector512_op_Division)) + + if (intrinsicId == NI_Vector_op_Division) { // We currently don't try to avoid setting these flags and GTF_EXCEPT when // we know that the operation in fact cannot overflow/divide by zero. @@ -22118,10 +22095,7 @@ bool GenTree::isContainableHWIntrinsic() const FALLTHROUGH; } - case NI_Vector128_GetElement: - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: + case NI_Vector_ToScalar: case NI_X86Base_ConvertToInt32: case NI_X86Base_ConvertToUInt32: case NI_X86Base_Extract: @@ -22159,12 +22133,14 @@ bool GenTree::isContainableHWIntrinsic() const return true; } - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: + case NI_Vector_GetElement: + { + // These HWIntrinsic operations are contained as part of a store + return node->GetSimdSize() == 16; + } + + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: { // These HWIntrinsic operations are contained as part of scalar ops return true; @@ -22815,8 +22791,7 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, var_types simd assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); #elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdAbsNode"); - return nullptr; + return gtNewSimdHWIntrinsicNode(type, op1, NI_PackedSimd_Abs, simdBaseType, simdSize); #else #error Unsupported platform #endif @@ -23071,7 +23046,8 @@ GenTree* Compiler::gtNewSimdBinOpNode( } else { - assert(op2->OperIsHWIntrinsic(NI_Vector128_CreateScalar)); + assert(op2->OperIsHWIntrinsic(NI_Vector_CreateScalar)); + assert(op2->AsHWIntrinsic()->GetSimdSize() == 16); GenTree* shiftCountDup = fgMakeMultiUse(&op2->AsHWIntrinsic()->Op(1)); if (op == GT_RSH) @@ -23178,14 +23154,14 @@ GenTree* Compiler::gtNewSimdBinOpNode( if (compOpportunisticallyDependsOn(InstructionSet_AVX512) && simdSize == 32) { - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Vector256_op_Division, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Vector_op_Division, simdBaseType, simdSize); } assert(simdSize == 16); if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Vector128_op_Division, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Vector_op_Division, simdBaseType, simdSize); } GenTree* op1Dup = fgMakeMultiUse(&op1); @@ -23197,9 +23173,9 @@ GenTree* Compiler::gtNewSimdBinOpNode( GenTree* op2Hi = gtNewSimdHWIntrinsicNode(type, op2, op2Dup, NI_X86Base_MoveHighToLow, TYP_FLOAT, simdSize); GenTree* divLo = - gtNewSimdHWIntrinsicNode(type, op1Dup2, op2Dup2, NI_Vector128_op_Division, simdBaseType, simdSize); + gtNewSimdHWIntrinsicNode(type, op1Dup2, op2Dup2, NI_Vector_op_Division, simdBaseType, simdSize); GenTree* divHi = - gtNewSimdHWIntrinsicNode(type, op1Hi, op2Hi, NI_Vector128_op_Division, simdBaseType, simdSize); + gtNewSimdHWIntrinsicNode(type, op1Hi, op2Hi, NI_Vector_op_Division, simdBaseType, simdSize); GenTree* div = gtNewSimdHWIntrinsicNode(type, divHi, divLo, NI_X86Base_MoveLowToHigh, TYP_FLOAT, simdSize); return gtNewSimdHWIntrinsicNode(type, div, gtNewIconNode(0x4E), NI_X86Base_Shuffle, simdBaseType, @@ -23211,12 +23187,13 @@ GenTree* Compiler::gtNewSimdBinOpNode( case GT_MUL: { -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_WASM) if (varTypeIsByte(simdBaseType)) { // If we can widen to the next vector size up, we can get by // with a single multiply, then narrow back down. +#if defined(TARGET_XARCH) if (((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_AVX2)) || ((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX512))) { @@ -23271,6 +23248,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( return gtNewSimdGetLowerNode(type, shuffledProduct, simdBaseType, widenedSimdSize); } } +#endif // Otherwise, we multiply twice and blend the values back together. // This logic depends on the following facts: @@ -23305,7 +23283,10 @@ GenTree* Compiler::gtNewSimdBinOpNode( // return (evenMasked | oddProduct).AsByte(); return gtNewSimdBinOpNode(GT_OR, type, evenMasked, oddProduct, simdBaseType, simdSize); } - else if (varTypeIsLong(simdBaseType)) +#endif + +#if defined(TARGET_XARCH) + if (varTypeIsLong(simdBaseType)) { // This fallback path will be used only if the vpmullq instruction is not available. // The implementation is a simple decomposition using pmuludq, which multiplies @@ -23395,8 +23376,6 @@ GenTree* Compiler::gtNewSimdBinOpNode( // return Vector128.Create(lower, upper) return gtNewSimdWithElementNode(type, lower, gtNewIconNode(1), upper, simdBaseType, simdSize); } -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdWithElementNode"); #endif // !TARGET_XARCH && !TARGET_ARM64 unreached(); } @@ -23444,7 +23423,7 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, var_types sim intrinsic = NI_AdvSimd_Ceiling; } #elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdCeilNode"); + intrinsic = NI_PackedSimd_Ceiling; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -23606,11 +23585,11 @@ GenTree* Compiler::gtNewSimdCvtNode( #elif defined(TARGET_ARM64) return gtNewSimdCvtNativeNode(type, op1, simdTargetBaseType, simdSourceBaseType, simdSize); #elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdCvtNativeNode"); + NYI_WASM_SIMD("gtNewSimdCvtNode"); return nullptr; #else #error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 +#endif } GenTree* Compiler::gtNewSimdCvtNativeNode( @@ -23821,13 +23800,11 @@ GenTree* Compiler::gtNewSimdCvtNativeNode( default: unreached(); } - #elif defined(TARGET_WASM) NYI_WASM_SIMD("gtNewSimdCvtNativeNode"); - return nullptr; #else #error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 +#endif assert(hwIntrinsicID != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdSourceBaseType, simdSize); @@ -24063,68 +24040,14 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( switch (op) { -#if defined(TARGET_XARCH) case GT_EQ: { - if (simdSize == 32) - { - assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); - intrinsic = NI_Vector256_op_Equality; - } - else if (simdSize == 64) - { - intrinsic = NI_Vector512_op_Equality; - } - else - { - intrinsic = NI_Vector128_op_Equality; - } - break; - } - - case GT_GE: - case GT_GT: - case GT_LE: - case GT_LT: - { - // We want to generate a comparison along the lines of - // GT_XX(op1, op2).As() == Vector128.AllBitsSet - - if (simdSize == 32) - { - // TODO-XArch-CQ: It's a non-trivial amount of work to support these - // for floating-point while only utilizing AVX. It would require, among - // other things, inverting the comparison and potentially support for a - // new Avx.TestNotZ intrinsic to ensure the codegen remains efficient. - assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - intrinsic = NI_Vector256_op_Equality; - } - else if (simdSize == 64) - { - intrinsic = NI_Vector512_op_Equality; - } - else - { - intrinsic = NI_Vector128_op_Equality; - } - - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseType, simdSize); - op2 = gtNewAllBitsSetConNode(simdType); +#if defined(TARGET_XARCH) + assert((simdSize != 32) || varTypeIsFloating(simdBaseType) || + compIsaSupportedDebugOnly(InstructionSet_AVX2)); +#endif - if (simdBaseType == TYP_FLOAT) - { - simdBaseType = TYP_INT; - } - else if (simdBaseType == TYP_DOUBLE) - { - simdBaseType = TYP_LONG; - } - break; - } -#elif defined(TARGET_ARM64) - case GT_EQ: - { - intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality; + intrinsic = NI_Vector_op_Equality; break; } @@ -24133,17 +24056,18 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( case GT_LE: case GT_LT: { +#if defined(TARGET_XARCH) + // TODO-XArch-CQ: It's a non-trivial amount of work to support these + // for floating-point while only utilizing AVX. It would require, among + // other things, inverting the comparison and potentially support for a + // new Avx.TestNotZ intrinsic to ensure the codegen remains efficient. + assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); +#endif + // We want to generate a comparison along the lines of - // GT_XX(op1, op2).As() == Vector128.AllBitsSet + // GT_XX(op1, op2).As() == Vector.AllBitsSet - if (simdSize == 8) - { - intrinsic = NI_Vector64_op_Equality; - } - else - { - intrinsic = NI_Vector128_op_Equality; - } + intrinsic = NI_Vector_op_Equality; op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseType, simdSize); op2 = gtNewAllBitsSetConNode(simdType); @@ -24158,12 +24082,6 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } break; } -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdCmpOpNode"); - return nullptr; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 default: { @@ -24195,78 +24113,24 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( switch (op) { -#if defined(TARGET_XARCH) case GT_EQ: case GT_GE: case GT_GT: case GT_LE: case GT_LT: { - // We want to generate a comparison along the lines of - // GT_XX(op1, op2).As() != Vector128.Zero - - if (simdSize == 32) - { - // TODO-XArch-CQ: It's a non-trivial amount of work to support these - // for floating-point while only utilizing AVX. It would require, among - // other things, inverting the comparison and potentially support for a - // new Avx.TestNotZ intrinsic to ensure the codegen remains efficient. - assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - - intrinsic = NI_Vector256_op_Inequality; - } - else if (simdSize == 64) - { - intrinsic = NI_Vector512_op_Inequality; - } - else - { - intrinsic = NI_Vector128_op_Inequality; - } - - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseType, simdSize); - op2 = gtNewZeroConNode(simdType); - - if (simdBaseType == TYP_FLOAT) - { - simdBaseType = TYP_INT; - } - else if (simdBaseType == TYP_DOUBLE) - { - simdBaseType = TYP_LONG; - } - break; - } - - case GT_NE: - { - if (simdSize == 64) - { - intrinsic = NI_Vector512_op_Inequality; - } - else if (simdSize == 32) - { - assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); +#if defined(TARGET_XARCH) + // TODO-XArch-CQ: It's a non-trivial amount of work to support these + // for floating-point while only utilizing AVX. It would require, among + // other things, inverting the comparison and potentially support for a + // new Avx.TestNotZ intrinsic to ensure the codegen remains efficient. + assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); +#endif - intrinsic = NI_Vector256_op_Inequality; - } - else - { - intrinsic = NI_Vector128_op_Inequality; - } - break; - } -#elif defined(TARGET_ARM64) - case GT_EQ: - case GT_GE: - case GT_GT: - case GT_LE: - case GT_LT: - { // We want to generate a comparison along the lines of // GT_XX(op1, op2).As() != Vector128.Zero - intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; + intrinsic = NI_Vector_op_Inequality; op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseType, simdSize); op2 = gtNewZeroConNode(simdType); @@ -24284,28 +24148,14 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( case GT_NE: { - intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; - break; - } -#elif defined(TARGET_WASM) - case GT_EQ: - case GT_GE: - case GT_GT: - case GT_LE: - case GT_LT: - { - NYI_WASM_SIMD("gtNewSimdCmpOpAnyNode"); - break; - } +#if defined(TARGET_XARCH) + assert((simdSize != 32) || varTypeIsFloating(simdBaseType) || + compIsaSupportedDebugOnly(InstructionSet_AVX2)); +#endif - case GT_NE: - { - NYI_WASM_SIMD("gtNewSimdCmpOpAnyNode"); + intrinsic = NI_Vector_op_Inequality; break; } -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 default: { @@ -24337,25 +24187,11 @@ GenTree* Compiler::gtNewSimdCndSelNode( NamedIntrinsic intrinsic = NI_Illegal; #if defined(TARGET_XARCH) - if (simdSize == 64) - { - assert(canUseEvexEncodingDebugOnly()); - intrinsic = NI_Vector512_ConditionalSelect; - } - else if (simdSize == 32) - { - intrinsic = NI_Vector256_ConditionalSelect; - } - else - { - intrinsic = NI_Vector128_ConditionalSelect; - } - return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, NI_Vector_ConditionalSelect, simdBaseType, simdSize); #elif defined(TARGET_ARM64) return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, NI_AdvSimd_BitwiseSelect, simdBaseType, simdSize); #elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdCndSelNode"); - return nullptr; + return gtNewSimdHWIntrinsicNode(type, op2, op3, op1, NI_PackedSimd_BitwiseSelect, simdBaseType, simdSize); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -24375,7 +24211,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( // GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { - NamedIntrinsic hwIntrinsicID = NI_Vector128_Create; + NamedIntrinsic hwIntrinsicID = NI_Vector_Create; if (op1->IsIntegralConst() || op1->IsCnsFltOrDbl()) { @@ -24462,26 +24298,6 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type, GenTree* op1, va return vecCon; } -#if defined(TARGET_XARCH) - if (simdSize == 64) - { - hwIntrinsicID = NI_Vector512_Create; - } - else if (simdSize == 32) - { - hwIntrinsicID = NI_Vector256_Create; - } -#elif defined(TARGET_ARM64) - if (simdSize == 8) - { - hwIntrinsicID = NI_Vector64_Create; - } -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdCreateBroadcastNode"); -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_WASM - return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdBaseType, simdSize); } @@ -24499,7 +24315,7 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type, GenTree* op1, va // GenTree* Compiler::gtNewSimdCreateScalarNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { - NamedIntrinsic hwIntrinsicID = NI_Vector128_CreateScalar; + NamedIntrinsic hwIntrinsicID = NI_Vector_CreateScalar; if (op1->IsIntegralConst() || op1->IsCnsFltOrDbl()) { @@ -24563,26 +24379,6 @@ GenTree* Compiler::gtNewSimdCreateScalarNode(var_types type, GenTree* op1, var_t return vecCon; } -#if defined(TARGET_XARCH) - if (simdSize == 32) - { - hwIntrinsicID = NI_Vector256_CreateScalar; - } - else if (simdSize == 64) - { - hwIntrinsicID = NI_Vector512_CreateScalar; - } -#elif defined(TARGET_ARM64) - if (simdSize == 8) - { - hwIntrinsicID = (genTypeSize(simdBaseType) == 8) ? NI_Vector64_Create : NI_Vector64_CreateScalar; - } -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdCreateScalarNode: Unsupported SIMD size"); -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_WASM - return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdBaseType, simdSize); } @@ -24606,7 +24402,7 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type, var_types simdBaseType, unsigned simdSize) { - NamedIntrinsic hwIntrinsicID = NI_Vector128_CreateScalarUnsafe; + NamedIntrinsic hwIntrinsicID = NI_Vector_CreateScalarUnsafe; if (op1->IsIntegralConst() || op1->IsCnsFltOrDbl()) { @@ -24699,26 +24495,6 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type, return vecCon; } -#if defined(TARGET_XARCH) - if (simdSize == 32) - { - hwIntrinsicID = NI_Vector256_CreateScalarUnsafe; - } - else if (simdSize == 64) - { - hwIntrinsicID = NI_Vector512_CreateScalarUnsafe; - } -#elif defined(TARGET_ARM64) - if (simdSize == 8) - { - hwIntrinsicID = (genTypeSize(simdBaseType) == 8) ? NI_Vector64_Create : NI_Vector64_CreateScalarUnsafe; - } -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdCreateScalarUnsafeNode"); -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdBaseType, simdSize); } @@ -24931,27 +24707,12 @@ GenTree* Compiler::gtNewSimdDotProdNode( #if defined(TARGET_XARCH) assert(!varTypeIsByte(simdBaseType) && !varTypeIsLong(simdBaseType)); assert(simdSize != 64); - - if (simdSize == 32) - { - assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); - intrinsic = NI_Vector256_Dot; - } - else - { - intrinsic = NI_Vector128_Dot; - } + assert((simdSize != 32) || varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); #elif defined(TARGET_ARM64) assert(!varTypeIsLong(simdBaseType)); - intrinsic = (simdSize == 8) ? NI_Vector64_Dot : NI_Vector128_Dot; -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdDotNode"); -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 +#endif - assert(intrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Vector_Dot, simdBaseType, simdSize); } GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) @@ -24990,7 +24751,7 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, var_types si intrinsic = NI_AdvSimd_Floor; } #elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdFloorNode"); + intrinsic = NI_PackedSimd_Floor; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -25055,40 +24816,15 @@ GenTree* Compiler::gtNewSimdFmaNode( GenTree* Compiler::gtNewSimdGetElementNode( var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) { - NamedIntrinsic intrinsicId = NI_Vector128_GetElement; + NamedIntrinsic intrinsicId = NI_Vector_GetElement; assert(varTypeIsArithmetic(simdBaseType)); -#if defined(TARGET_XARCH) - if (op2->IsIntegralConst(0)) - { - return gtNewSimdToScalarNode(type, op1, simdBaseType, simdSize); - } - - if (simdSize == 64) - { - intrinsicId = NI_Vector512_GetElement; - } - else if (simdSize == 32) - { - intrinsicId = NI_Vector256_GetElement; - } -#elif defined(TARGET_ARM64) if (op2->IsIntegralConst(0)) { return gtNewSimdToScalarNode(type, op1, simdBaseType, simdSize); } - if (simdSize == 8) - { - intrinsicId = NI_Vector64_GetElement; - } -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdGetElementNode"); -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - int immUpperBound = getSIMDVectorLength(simdSize, simdBaseType) - 1; bool rangeCheckNeeded = !op2->OperIsConst(); @@ -25196,65 +24932,51 @@ GenTree* Compiler::gtNewSimdGetIndicesNode(var_types type, var_types simdBaseTyp return indices; } +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) GenTree* Compiler::gtNewSimdGetLowerNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsArithmetic(simdBaseType)); - NamedIntrinsic intrinsicId = NI_Illegal; - #if defined(TARGET_XARCH) if (simdSize == 32) { assert(type == TYP_SIMD16); - intrinsicId = NI_Vector256_GetLower; } else { assert((type == TYP_SIMD32) && (simdSize == 64)); - intrinsicId = NI_Vector512_GetLower; } #elif defined(TARGET_ARM64) assert((type == TYP_SIMD8) && (simdSize == 16)); - intrinsicId = NI_Vector128_GetLower; -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdGetLowerNode"); #else -#error Unsupported platform + unreached(); #endif // !TARGET_XARCH && !TARGET_ARM64 - assert(intrinsicId != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, NI_Vector_GetLower, simdBaseType, simdSize); } GenTree* Compiler::gtNewSimdGetUpperNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsArithmetic(simdBaseType)); - NamedIntrinsic intrinsicId = NI_Illegal; - #if defined(TARGET_XARCH) if (simdSize == 32) { assert(type == TYP_SIMD16); - intrinsicId = NI_Vector256_GetUpper; } else { assert((type == TYP_SIMD32) && (simdSize == 64)); - intrinsicId = NI_Vector512_GetUpper; } #elif defined(TARGET_ARM64) assert((type == TYP_SIMD8) && (simdSize == 16)); - intrinsicId = NI_Vector128_GetUpper; -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdGetUpperNode"); #else -#error Unsupported platform + unreached(); #endif // !TARGET_XARCH && !TARGET_ARM64 - assert(intrinsicId != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, NI_Vector_GetUpper, simdBaseType, simdSize); } +#endif // !TARGET_XARCH && !TARGET_ARM64 //---------------------------------------------------------------------------------------------- // Compiler::gtNewSimdIsEvenIntegerNode: Creates a new simd IsEvenInteger node @@ -25825,16 +25547,13 @@ GenTree* Compiler::gtNewSimdLoadAlignedNode(var_types type, GenTree* op1, var_ty assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); -#elif defined(TARGET_ARM64) - // ARM64 doesn't have aligned loads, but aligned loads are only validated to be +#elif defined(TARGET_ARM64) || defined(TARGET_WASM) + // ARM64/WASM doesn't have aligned loads, but aligned loads are only validated to be // aligned when optimizations are disable, so only skip the intrinsic handling // if optimizations are enabled assert(opts.OptimizationEnabled()); return gtNewSimdLoadNode(type, op1, simdBaseType, simdSize); -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdLoadAlignedNode"); - return nullptr; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -25908,16 +25627,13 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, GenTree* op1, va assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); -#elif defined(TARGET_ARM64) - // ARM64 doesn't have aligned loads, but aligned loads are only validated to be +#elif defined(TARGET_ARM64) || defined(TARGET_WASM) + // ARM64/WASM doesn't have aligned loads, but aligned loads are only validated to be // aligned when optimizations are disable, so only skip the intrinsic handling // if optimizations are enabled assert(opts.OptimizationEnabled()); return gtNewSimdLoadNode(type, op1, simdBaseType, simdSize); -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdLoadNonTemporalNode"); - return nullptr; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -26877,7 +26593,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_MoveLowToHigh, TYP_FLOAT, simdSize); } - intrinsicId = (simdSize == 64) ? NI_Vector256_ToVector512Unsafe : NI_Vector128_ToVector256Unsafe; + intrinsicId = (simdSize == 64) ? NI_Vector_ToVector512Unsafe : NI_Vector_ToVector256Unsafe; tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, intrinsicId, simdBaseType, simdSize / 2); return gtNewSimdWithUpperNode(type, tmp1, tmp2, simdBaseType, simdSize); @@ -26997,7 +26713,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_AVX_ConvertToVector128Single, opBaseType, simdSize); tmp2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_AVX_ConvertToVector128Single, opBaseType, simdSize); - tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_ToVector256Unsafe, simdBaseType, 16); + tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector_ToVector256Unsafe, simdBaseType, 16); return gtNewSimdWithUpperNode(type, tmp1, tmp2, simdBaseType, simdSize); } @@ -27153,7 +26869,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( var_types tmp2BaseType = TYP_DOUBLE; - tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128Unsafe, simdBaseType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector_ToVector128Unsafe, simdBaseType, simdSize); tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, tmp2BaseType, 16); return gtNewSimdHWIntrinsicNode(type, tmp2, NI_AdvSimd_Arm64_ConvertToSingleLower, simdBaseType, simdSize); @@ -27164,7 +26880,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( // var tmp2 = tmp1.WithUpper(op2); // return AdvSimd.ExtractNarrowingLower(tmp2); - tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128Unsafe, simdBaseType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector_ToVector128Unsafe, simdBaseType, simdSize); tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, simdBaseType, 16); return gtNewSimdHWIntrinsicNode(type, tmp2, NI_AdvSimd_ExtractNarrowingLower, simdBaseType, simdSize); @@ -27422,6 +27138,7 @@ GenTree* Compiler::gtNewSimdConcatNode(var_types type, #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) var_types halfType = getSIMDTypeForSize(simdSize / 2); GenTree* upper; @@ -27452,17 +27169,16 @@ GenTree* Compiler::gtNewSimdConcatNode(var_types type, #if defined(TARGET_XARCH) GenTree* result = (simdSize == 32) - ? gtNewSimdHWIntrinsicNode(type, lower, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize / 2) - : gtNewSimdHWIntrinsicNode(type, lower, NI_Vector256_ToVector512Unsafe, simdBaseType, simdSize / 2); + ? gtNewSimdHWIntrinsicNode(type, lower, NI_Vector_ToVector256Unsafe, simdBaseType, simdSize / 2) + : gtNewSimdHWIntrinsicNode(type, lower, NI_Vector_ToVector512Unsafe, simdBaseType, simdSize / 2); #elif defined(TARGET_ARM64) - GenTree* result = gtNewSimdHWIntrinsicNode(type, lower, NI_Vector64_ToVector128Unsafe, simdBaseType, simdSize / 2); -#elif defined(TARGET_WASM) - GenTree* result = nullptr; + GenTree* result = gtNewSimdHWIntrinsicNode(type, lower, NI_Vector_ToVector128Unsafe, simdBaseType, simdSize / 2); #else #error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_WASM +#endif // !TARGET_XARCH && !TARGET_ARM64 return gtNewSimdWithUpperNode(type, result, upper, simdBaseType, simdSize); +#endif // !TARGET_XARCH && !TARGET_ARM64 } //---------------------------------------------------------------------------------------------- @@ -27588,8 +27304,8 @@ GenTree* Compiler::gtNewSimdZipNode( GenTree* result = (simdSize == 32) - ? gtNewSimdHWIntrinsicNode(type, lower, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize / 2) - : gtNewSimdHWIntrinsicNode(type, lower, NI_Vector256_ToVector512Unsafe, simdBaseType, simdSize / 2); + ? gtNewSimdHWIntrinsicNode(type, lower, NI_Vector_ToVector256Unsafe, simdBaseType, simdSize / 2) + : gtNewSimdHWIntrinsicNode(type, lower, NI_Vector_ToVector512Unsafe, simdBaseType, simdSize / 2); return gtNewSimdWithUpperNode(type, result, higher, simdBaseType, simdSize); #endif // TARGET_XARCH } @@ -27681,10 +27397,9 @@ GenTree* Compiler::gtNewSimdUnzipNode( assert(IsValidForShuffle(shuffle, wideSimdSize, simdBaseType, nullptr, false)); - GenTree* result = - gtNewSimdHWIntrinsicNode(wideType, op1, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize); - result = gtNewSimdWithUpperNode(wideType, result, op2, simdBaseType, wideSimdSize); - result = gtNewSimdShuffleNode(wideType, result, shuffle, simdBaseType, wideSimdSize, false); + GenTree* result = gtNewSimdHWIntrinsicNode(wideType, op1, NI_Vector_ToVector256Unsafe, simdBaseType, simdSize); + result = gtNewSimdWithUpperNode(wideType, result, op2, simdBaseType, wideSimdSize); + result = gtNewSimdShuffleNode(wideType, result, shuffle, simdBaseType, wideSimdSize, false); return gtNewSimdGetLowerNode(type, result, simdBaseType, wideSimdSize); } @@ -27752,8 +27467,8 @@ GenTree* Compiler::gtNewSimdUnzipNode( GenTree* result = (simdSize == 32) - ? gtNewSimdHWIntrinsicNode(type, lower, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize / 2) - : gtNewSimdHWIntrinsicNode(type, lower, NI_Vector256_ToVector512Unsafe, simdBaseType, simdSize / 2); + ? gtNewSimdHWIntrinsicNode(type, lower, NI_Vector_ToVector256Unsafe, simdBaseType, simdSize / 2) + : gtNewSimdHWIntrinsicNode(type, lower, NI_Vector_ToVector512Unsafe, simdBaseType, simdSize / 2); return gtNewSimdWithUpperNode(type, result, higher, simdBaseType, simdSize); #elif defined(TARGET_WASM) NYI_WASM_SIMD("gtNewSimdUnzipNode"); @@ -27862,7 +27577,7 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, var_types si intrinsic = NI_AdvSimd_RoundToNearest; } #elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdRoundNode"); + intrinsic = NI_PackedSimd_RoundToNearest; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -29114,7 +28829,7 @@ GenTree* Compiler::gtNewSimdSqrtNode(var_types type, GenTree* op1, var_types sim intrinsic = NI_AdvSimd_Arm64_Sqrt; } #elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdSqrtNode"); + intrinsic = NI_PackedSimd_Sqrt; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -29187,16 +28902,13 @@ GenTree* Compiler::gtNewSimdStoreAlignedNode(GenTree* op1, GenTree* op2, var_typ } return gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, intrinsic, simdBaseType, simdSize); -#elif defined(TARGET_ARM64) - // ARM64 doesn't have aligned stores, but aligned stores are only validated to be +#elif defined(TARGET_ARM64) || defined(TARGET_WASM) + // ARM64/WASM doesn't have aligned stores, but aligned stores are only validated to be // aligned when optimizations are disable, so only skip the intrinsic handling // if optimizations are enabled assert(opts.OptimizationEnabled()); return gtNewSimdStoreNode(op1, op2, simdBaseType, simdSize); -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdStoreAlignedNode"); - return nullptr; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -29241,16 +28953,13 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, GenTree* op2, var } return gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, intrinsic, simdBaseType, simdSize); -#elif defined(TARGET_ARM64) - // ARM64 doesn't have aligned stores, but aligned stores are only validated to be +#elif defined(TARGET_ARM64) || defined(TARGET_WASM) + // ARM64/WASM doesn't have aligned stores, but aligned stores are only validated to be // aligned when optimizations are disable, so only skip the intrinsic handling // if optimizations are enabled assert(opts.OptimizationEnabled()); return gtNewSimdStoreNode(op1, op2, simdBaseType, simdSize); -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdStoreNonTemporalNode"); - return nullptr; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -29365,7 +29074,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, var_types simd GenTree* op1Lane2 = fgMakeMultiUse(&op1); GenTree* op1Lane3 = fgMakeMultiUse(&op1); - GenTree* op1Lane0 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector512_GetLower128, simdBaseType, 64); + GenTree* op1Lane0 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector_GetLower128, simdBaseType, 64); op1Lane1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lane1, gtNewIconNode(1), NI_AVX512_ExtractVector128, simdBaseType, 64); op1Lane2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lane2, gtNewIconNode(2), NI_AVX512_ExtractVector128, @@ -29581,38 +29290,7 @@ GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, var_types assert(varTypeIsArithmetic(simdBaseType)); - NamedIntrinsic intrinsic = NI_Illegal; - -#ifdef TARGET_XARCH - if (simdSize == 64) - { - intrinsic = NI_Vector512_ToScalar; - } - else if (simdSize == 32) - { - intrinsic = NI_Vector256_ToScalar; - } - else - { - intrinsic = NI_Vector128_ToScalar; - } -#elif defined(TARGET_ARM64) - if (simdSize == 8) - { - intrinsic = NI_Vector64_ToScalar; - } - else - { - intrinsic = NI_Vector128_ToScalar; - } -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdToScalarNode"); -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - assert(intrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, NI_Vector_ToScalar, simdBaseType, simdSize); } //------------------------------------------------------------------------ @@ -29662,7 +29340,7 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, var_types si intrinsic = NI_AdvSimd_RoundToZero; } #elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdTruncNode"); + intrinsic = NI_PackedSimd_Truncate; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -30179,22 +29857,13 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, var_typ GenTree* Compiler::gtNewSimdWithElementNode( var_types type, GenTree* op1, GenTree* op2, GenTree* op3, var_types simdBaseType, unsigned simdSize) { - NamedIntrinsic hwIntrinsicID = NI_Vector128_WithElement; + NamedIntrinsic hwIntrinsicID = NI_Vector_WithElement; assert(varTypeIsArithmetic(simdBaseType)); assert(varTypeIsArithmetic(op3)); #if defined(TARGET_XARCH) assert(!varTypeIsLong(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_X86Base_X64)); - - if (simdSize == 64) - { - hwIntrinsicID = NI_Vector512_WithElement; - } - else if (simdSize == 32) - { - hwIntrinsicID = NI_Vector256_WithElement; - } #elif defined(TARGET_ARM64) switch (simdBaseType) { @@ -30203,7 +29872,7 @@ GenTree* Compiler::gtNewSimdWithElementNode( case TYP_DOUBLE: if (simdSize == 8) { - return gtNewSimdHWIntrinsicNode(type, op3, NI_Vector64_Create, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op3, NI_Vector_Create, simdBaseType, simdSize); } break; @@ -30313,34 +29982,28 @@ GenTreeFieldList* Compiler::gtConvertParamOpToFieldList(GenTree* op, unsigned fi } #endif // TARGET_ARM64 +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) GenTree* Compiler::gtNewSimdWithLowerNode( var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsArithmetic(simdBaseType)); - NamedIntrinsic intrinsicId = NI_Illegal; - #if defined(TARGET_XARCH) if (simdSize == 32) { assert(type == TYP_SIMD32); - intrinsicId = NI_Vector256_WithLower; } else { assert((type == TYP_SIMD64) && (simdSize == 64)); - intrinsicId = NI_Vector512_WithLower; } #elif defined(TARGET_ARM64) assert((type == TYP_SIMD16) && (simdSize == 16)); - intrinsicId = NI_Vector128_WithLower; -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdWithLowerNode"); #else -#error Unsupported platform + unreached(); #endif // !TARGET_XARCH && !TARGET_ARM64 - return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsicId, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Vector_WithLower, simdBaseType, simdSize); } GenTree* Compiler::gtNewSimdWithUpperNode( @@ -30348,30 +30011,24 @@ GenTree* Compiler::gtNewSimdWithUpperNode( { assert(varTypeIsArithmetic(simdBaseType)); - NamedIntrinsic intrinsicId = NI_Illegal; - #if defined(TARGET_XARCH) if (simdSize == 32) { assert(type == TYP_SIMD32); - intrinsicId = NI_Vector256_WithUpper; } else { assert((type == TYP_SIMD64) && (simdSize == 64)); - intrinsicId = NI_Vector512_WithUpper; } #elif defined(TARGET_ARM64) assert((type == TYP_SIMD16) && (simdSize == 16)); - intrinsicId = NI_Vector128_WithUpper; -#elif defined(TARGET_WASM) - NYI_WASM_SIMD("gtNewSimdWithUpperNode"); #else -#error Unsupported platform + unreached(); #endif // !TARGET_XARCH && !TARGET_ARM64 - return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsicId, simdBaseType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Vector_WithUpper, simdBaseType, simdSize); } +#endif // !TARGET_XARCH && !TARGET_ARM64 GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID) { @@ -31261,9 +30918,9 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) break; } - case NI_Vector128_op_Division: - case NI_Vector256_op_Division: + case NI_Vector_op_Division: { + assert((GetSimdSize() == 16) || (GetSimdSize() == 32)); gtFlags |= GTF_EXCEPT; break; } @@ -31998,10 +31655,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, assert(op1->TypeIs(simdType)); assert(op2 != nullptr); -#ifdef TARGET_WASM - NYI_WASM_SIMD("GetHWIntrinsicIdForBinOp"); -#endif - #if defined(TARGET_XARCH) if ((simdSize == 64) || (simdSize == 32)) { @@ -32061,7 +31714,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { id = NI_AdvSimd_Add; } -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + id = NI_PackedSimd_Add; +#else +#error Unsupported platform +#endif break; } @@ -32093,7 +31750,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } #elif defined(TARGET_ARM64) id = NI_AdvSimd_And; -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + id = NI_PackedSimd_And; +#else +#error Unsupported platform +#endif break; } @@ -32136,7 +31797,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, assert(simdSize == 16 || simdSize == 8); id = NI_AdvSimd_BitwiseClear; -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + id = NI_PackedSimd_AndNot; +#else +#error Unsupported platform +#endif break; } @@ -32174,7 +31839,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { id = NI_AdvSimd_Arm64_Divide; } -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + id = NI_PackedSimd_Divide; +#else +#error Unsupported platform +#endif break; } @@ -32226,7 +31895,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { id = op2->IsCnsIntOrI() ? NI_AdvSimd_ShiftLeftLogical : NI_AdvSimd_ShiftLogical; } -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + id = NI_PackedSimd_ShiftLeft; +#else +#error Unsupported platform +#endif break; } @@ -32286,7 +31959,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { id = op2->TypeIs(simdType) ? NI_AdvSimd_Multiply : NI_AdvSimd_MultiplyByScalar; } -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + if (!varTypeIsByte(simdBaseType)) + { + id = NI_PackedSimd_Multiply; + } +#else +#error Unsupported platform +#endif break; } @@ -32317,9 +31997,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_X86Base_Or; } #elif defined(TARGET_ARM64) - id = NI_AdvSimd_Or; -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + if (!varTypeIsByte(simdBaseType)) + { + id = NI_PackedSimd_Or; + } +#else +#error Unsupported platform +#endif break; } @@ -32404,7 +32090,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { id = op2->IsCnsIntOrI() ? NI_AdvSimd_ShiftRightArithmetic : NI_AdvSimd_ShiftArithmetic; } -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + id = NI_PackedSimd_ShiftRightArithmetic; +#else +#error Unsupported platform +#endif break; } @@ -32456,7 +32146,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { id = varTypeIsInt(op2) ? NI_AdvSimd_ShiftRightLogical : NI_AdvSimd_ShiftLogical; } -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + id = NI_PackedSimd_ShiftRightLogical; +#else +#error Unsupported platform +#endif break; } @@ -32498,7 +32192,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { id = NI_AdvSimd_Subtract; } -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + id = NI_PackedSimd_Subtract; +#else +#error Unsupported platform +#endif break; } @@ -32530,7 +32228,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } #elif defined(TARGET_ARM64) id = NI_AdvSimd_Xor; -#endif // !TARGET_XARCH && !TARGET_ARM64 +#elif defined(TARGET_WASM) + id = NI_PackedSimd_Xor; +#else +#error Unsupported platform +#endif break; } @@ -32952,18 +32654,8 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec switch (gtHWIntrinsicId) { -#if defined(TARGET_ARM64) - case NI_Vector64_op_Equality: - case NI_Vector64_op_Inequality: -#endif // TARGET_ARM64 - case NI_Vector128_op_Equality: - case NI_Vector128_op_Inequality: -#if defined(TARGET_XARCH) - case NI_Vector256_op_Equality: - case NI_Vector256_op_Inequality: - case NI_Vector512_op_Equality: - case NI_Vector512_op_Inequality: -#endif // TARGET_XARCH + case NI_Vector_op_Equality: + case NI_Vector_op_Inequality: { // We can optimize when the constant is zero, but only // for non floating-point since +0.0 == -0.0. @@ -33011,21 +32703,9 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec } #endif // TARGET_XARCH - case NI_Vector128_Shuffle: - case NI_Vector128_ShuffleNative: - case NI_Vector128_ShuffleNativeFallback: -#if defined(TARGET_XARCH) - case NI_Vector256_Shuffle: - case NI_Vector256_ShuffleNative: - case NI_Vector256_ShuffleNativeFallback: - case NI_Vector512_Shuffle: - case NI_Vector512_ShuffleNative: - case NI_Vector512_ShuffleNativeFallback: -#elif defined(TARGET_ARM64) - case NI_Vector64_Shuffle: - case NI_Vector64_ShuffleNative: - case NI_Vector64_ShuffleNativeFallback: -#endif + case NI_Vector_Shuffle: + case NI_Vector_ShuffleNative: + case NI_Vector_ShuffleNativeFallback: { // The shuffle indices ideally are constant so we can get the best // codegen possible. There are also some case/s where it would have @@ -34593,26 +34273,24 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) { switch (ni) { -#if defined(TARGET_ARM64) - case NI_Vector64_ExtractMostSignificantBits: -#elif defined(TARGET_XARCH) - case NI_Vector256_ExtractMostSignificantBits: + case NI_Vector_ExtractMostSignificantBits: +#if defined(TARGET_XARCH) case NI_X86Base_MoveMask: case NI_AVX_MoveMask: case NI_AVX2_MoveMask: #endif - case NI_Vector128_ExtractMostSignificantBits: { -#ifdef FEATURE_MASKED_HW_INTRINSICS simdmask_t simdMaskVal; switch (simdSize) { +#if defined(TARGET_ARM64) case 8: { EvaluateExtractMSB(simdBaseType, &simdMaskVal, cnsNode->AsVecCon()->gtSimd8Val); break; } +#endif // TARGET_ARM64 case 16: { @@ -34642,12 +34320,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) resultNode = gtNewIconNode(static_cast(mask)); break; - -#else - NYI_WASM_SIMD("gtFoldExprHWIntrinsic: Extract MSB"); - resultNode = nullptr; - break; -#endif } #ifdef TARGET_XARCH @@ -34718,20 +34390,21 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } #endif - case NI_Vector128_AsVector3: - case NI_Vector128_AsVector128Unsafe: -#ifdef TARGET_ARM64 - case NI_Vector64_ToVector128Unsafe: - case NI_Vector128_GetLower: -#elif defined(TARGET_XARCH) || defined(TARGET_WASM) - case NI_Vector128_AsVector2: -#endif -#ifdef TARGET_XARCH - case NI_Vector128_ToVector256Unsafe: - case NI_Vector256_GetLower: - case NI_Vector256_ToVector512Unsafe: - case NI_Vector512_GetLower: - case NI_Vector512_GetLower128: + case NI_Vector_AsVector3: + case NI_Vector_AsVector128Unsafe: +#if defined(TARGET_XARCH) + case NI_Vector_AsVector2: + case NI_Vector_GetLower: + case NI_Vector_GetLower128: + case NI_Vector_ToVector256Unsafe: + case NI_Vector_ToVector512Unsafe: +#elif defined(TARGET_ARM64) + case NI_Vector_GetLower: + case NI_Vector_ToVector128Unsafe: +#elif defined(TARGET_WASM) + case NI_Vector_AsVector2: +#else +#error Unsupported platform #endif { // These are all going to a smaller type taking the lowest bits @@ -34744,7 +34417,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } #ifdef TARGET_ARM64 - case NI_Vector64_ToVector128: + case NI_Vector_ToVector128: { assert(retType == TYP_SIMD16); assert(cnsNode->TypeIs(TYP_SIMD8)); @@ -34755,7 +34428,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } #elif defined(TARGET_XARCH) - case NI_Vector128_ToVector256: + case NI_Vector_ToVector256: { assert(retType == TYP_SIMD32); assert(cnsNode->TypeIs(TYP_SIMD16)); @@ -34766,22 +34439,18 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } - case NI_Vector128_ToVector512: + case NI_Vector_ToVector512: { assert(retType == TYP_SIMD64); - assert(cnsNode->TypeIs(TYP_SIMD16)); - cnsNode->AsVecCon()->gtSimd64Val.v128[1] = {}; - cnsNode->AsVecCon()->gtSimd64Val.v256[1] = {}; - - cnsNode->gtType = retType; - resultNode = cnsNode; - break; - } - case NI_Vector256_ToVector512: - { - assert(retType == TYP_SIMD64); - assert(cnsNode->TypeIs(TYP_SIMD32)); + if (cnsNode->TypeIs(TYP_SIMD16)) + { + cnsNode->AsVecCon()->gtSimd64Val.v128[1] = {}; + } + else + { + assert(cnsNode->TypeIs(TYP_SIMD32)); + } cnsNode->AsVecCon()->gtSimd64Val.v256[1] = {}; cnsNode->gtType = retType; @@ -34791,7 +34460,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) #endif #ifdef TARGET_ARM64 - case NI_Vector128_GetUpper: + case NI_Vector_GetUpper: { assert(retType == TYP_SIMD8); assert(cnsNode->TypeIs(TYP_SIMD16)); @@ -34802,22 +34471,19 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } #elif defined(TARGET_XARCH) - case NI_Vector256_GetUpper: + case NI_Vector_GetUpper: { - assert(retType == TYP_SIMD16); - assert(cnsNode->TypeIs(TYP_SIMD32)); - cnsNode->AsVecCon()->gtSimd16Val = cnsNode->AsVecCon()->gtSimd32Val.v128[1]; - - cnsNode->gtType = retType; - resultNode = cnsNode; - break; - } - - case NI_Vector512_GetUpper: - { - assert(retType == TYP_SIMD32); - assert(cnsNode->TypeIs(TYP_SIMD64)); - cnsNode->AsVecCon()->gtSimd32Val = cnsNode->AsVecCon()->gtSimd64Val.v256[1]; + if (retType == TYP_SIMD16) + { + assert(cnsNode->TypeIs(TYP_SIMD32)); + cnsNode->AsVecCon()->gtSimd16Val = cnsNode->AsVecCon()->gtSimd32Val.v128[1]; + } + else + { + assert(retType == TYP_SIMD32); + assert(cnsNode->TypeIs(TYP_SIMD64)); + cnsNode->AsVecCon()->gtSimd32Val = cnsNode->AsVecCon()->gtSimd64Val.v256[1]; + } cnsNode->gtType = retType; resultNode = cnsNode; @@ -34825,13 +34491,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } #endif - case NI_Vector128_ToScalar: -#ifdef TARGET_ARM64 - case NI_Vector64_ToScalar: -#elif defined(TARGET_XARCH) - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: -#endif + case NI_Vector_ToScalar: { var_types simdType = getSIMDTypeForSize(simdSize); @@ -35097,13 +34757,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) { switch (ni) { - case NI_Vector128_GetElement: -#ifdef TARGET_ARM64 - case NI_Vector64_GetElement: -#elif defined(TARGET_XARCH) - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: -#endif + case NI_Vector_GetElement: { uint32_t index = static_cast(otherNode->AsIntConCommon()->IconValue()); @@ -35168,7 +34822,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) #endif #ifdef TARGET_ARM64 - case NI_Vector128_WithLower: + case NI_Vector_WithLower: { assert(retType == TYP_SIMD16); assert(cnsNode->TypeIs(TYP_SIMD16)); @@ -35179,37 +34833,29 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } #elif defined(TARGET_XARCH) - case NI_Vector256_WithLower: + case NI_Vector_WithLower: { - assert(retType == TYP_SIMD32); - assert(cnsNode->TypeIs(TYP_SIMD32)); - assert(otherNode->TypeIs(TYP_SIMD16)); - cnsNode->AsVecCon()->gtSimd32Val.v128[0] = otherNode->AsVecCon()->gtSimd16Val; - - resultNode = cnsNode; - break; - } + assert(cnsNode->TypeIs(retType)); - case NI_Vector512_WithLower: - { - assert(retType == TYP_SIMD64); - assert(cnsNode->TypeIs(TYP_SIMD64)); - assert(otherNode->TypeIs(TYP_SIMD32)); - cnsNode->AsVecCon()->gtSimd64Val.v256[0] = otherNode->AsVecCon()->gtSimd32Val; + if (retType == TYP_SIMD32) + { + assert(otherNode->TypeIs(TYP_SIMD16)); + cnsNode->AsVecCon()->gtSimd32Val.v128[0] = otherNode->AsVecCon()->gtSimd16Val; + } + else + { + assert(retType == TYP_SIMD64); + assert(otherNode->TypeIs(TYP_SIMD32)); + cnsNode->AsVecCon()->gtSimd64Val.v256[0] = otherNode->AsVecCon()->gtSimd32Val; + } resultNode = cnsNode; break; } -#elif defined(TARGET_WASM) - case NI_Vector128_WithLower: - { - NYI_WASM_SIMD("gtFoldExprHWIntrinsic: WithLower"); - break; - } #endif #ifdef TARGET_ARM64 - case NI_Vector128_WithUpper: + case NI_Vector_WithUpper: { assert(retType == TYP_SIMD16); assert(cnsNode->TypeIs(TYP_SIMD16)); @@ -35220,42 +34866,28 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } #elif defined(TARGET_XARCH) - case NI_Vector256_WithUpper: + case NI_Vector_WithUpper: { - assert(retType == TYP_SIMD32); - assert(cnsNode->TypeIs(TYP_SIMD32)); - assert(otherNode->TypeIs(TYP_SIMD16)); - cnsNode->AsVecCon()->gtSimd32Val.v128[1] = otherNode->AsVecCon()->gtSimd16Val; - - resultNode = cnsNode; - break; - } + assert(cnsNode->TypeIs(retType)); - case NI_Vector512_WithUpper: - { - assert(retType == TYP_SIMD64); - assert(cnsNode->TypeIs(TYP_SIMD64)); - assert(otherNode->TypeIs(TYP_SIMD32)); - cnsNode->AsVecCon()->gtSimd64Val.v256[1] = otherNode->AsVecCon()->gtSimd32Val; + if (retType == TYP_SIMD32) + { + assert(otherNode->TypeIs(TYP_SIMD16)); + cnsNode->AsVecCon()->gtSimd32Val.v128[1] = otherNode->AsVecCon()->gtSimd16Val; + } + else + { + assert(retType == TYP_SIMD64); + assert(otherNode->TypeIs(TYP_SIMD32)); + cnsNode->AsVecCon()->gtSimd64Val.v256[1] = otherNode->AsVecCon()->gtSimd32Val; + } resultNode = cnsNode; break; } -#elif defined(TARGET_WASM) - case NI_Vector128_WithUpper: - { - NYI_WASM_SIMD("gtFoldExprHWIntrinsic: WithUpper"); - break; - } #endif - case NI_Vector128_op_Equality: -#if defined(TARGET_ARM64) - case NI_Vector64_op_Equality: -#elif defined(TARGET_XARCH) - case NI_Vector256_op_Equality: - case NI_Vector512_op_Equality: -#endif // !TARGET_ARM64 && !TARGET_XARCH + case NI_Vector_op_Equality: { cnsNode->AsVecCon()->EvaluateBinaryInPlace(GT_EQ, isScalar, simdBaseType, otherNode->AsVecCon()); @@ -35263,13 +34895,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } - case NI_Vector128_op_Inequality: -#if defined(TARGET_ARM64) - case NI_Vector64_op_Inequality: -#elif defined(TARGET_XARCH) - case NI_Vector256_op_Inequality: - case NI_Vector512_op_Inequality: -#endif // !TARGET_ARM64 && !TARGET_XARCH + case NI_Vector_op_Inequality: { cnsNode->AsVecCon()->EvaluateBinaryInPlace(GT_NE, isScalar, simdBaseType, otherNode->AsVecCon()); @@ -35877,13 +35503,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } #endif - case NI_Vector128_op_Equality: -#if defined(TARGET_ARM64) - case NI_Vector64_op_Equality: -#elif defined(TARGET_XARCH) - case NI_Vector256_op_Equality: - case NI_Vector512_op_Equality: -#endif // !TARGET_ARM64 && !TARGET_XARCH + case NI_Vector_op_Equality: { if (varTypeIsFloating(simdBaseType)) { @@ -35898,13 +35518,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } - case NI_Vector128_op_Inequality: -#if defined(TARGET_ARM64) - case NI_Vector64_op_Inequality: -#elif defined(TARGET_XARCH) - case NI_Vector256_op_Inequality: - case NI_Vector512_op_Inequality: -#endif // !TARGET_ARM64 && !TARGET_XARCH + case NI_Vector_op_Inequality: { if (varTypeIsFloating(simdBaseType)) { @@ -35934,9 +35548,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) switch (ni) { #if defined(TARGET_XARCH) - case NI_Vector128_ConditionalSelect: - case NI_Vector256_ConditionalSelect: - case NI_Vector512_ConditionalSelect: + case NI_Vector_ConditionalSelect: #elif defined(TARGET_ARM64) case NI_AdvSimd_BitwiseSelect: #endif @@ -36068,13 +35680,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } #endif // TARGET_ARM64 - case NI_Vector128_WithElement: -#ifdef TARGET_ARM64 - case NI_Vector64_WithElement: -#elif defined(TARGET_XARCH) - case NI_Vector256_WithElement: - case NI_Vector512_WithElement: -#endif + case NI_Vector_WithElement: { if ((cnsNode != op1) || !op2->IsCnsIntOrI() || !op3->OperIsConst()) { diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index b66874618c106b..769960abab080d 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6956,21 +6956,9 @@ struct GenTreeVecCon : public GenTree switch (intrinsic) { - case NI_Vector128_Create: - case NI_Vector128_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: -#if defined(TARGET_XARCH) - case NI_Vector256_Create: - case NI_Vector512_Create: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: -#elif defined(TARGET_ARM64) - case NI_Vector64_Create: - case NI_Vector64_CreateScalar: - case NI_Vector64_CreateScalarUnsafe: -#endif + case NI_Vector_Create: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: { // Zero out the simdVal simdVal = {}; @@ -6980,12 +6968,7 @@ struct GenTreeVecCon : public GenTree { // CreateScalar leaves the upper bits as zero -#if defined(TARGET_XARCH) - if ((intrinsic != NI_Vector128_CreateScalar) && (intrinsic != NI_Vector256_CreateScalar) && - (intrinsic != NI_Vector512_CreateScalar)) -#elif defined(TARGET_ARM64) - if ((intrinsic != NI_Vector64_CreateScalar) && (intrinsic != NI_Vector128_CreateScalar)) -#endif + if (intrinsic != NI_Vector_CreateScalar) { // Now assign the rest of the arguments. for (unsigned i = 1; i < ElementCount(simdSize, simdBaseType); i++) @@ -9796,20 +9779,7 @@ inline bool GenTree::IsVectorCreate() const #ifdef FEATURE_HW_INTRINSICS if (OperIs(GT_HWINTRINSIC)) { - switch (AsHWIntrinsic()->GetHWIntrinsicId()) - { - case NI_Vector128_Create: -#if defined(TARGET_XARCH) - case NI_Vector256_Create: - case NI_Vector512_Create: -#elif defined(TARGET_ARMARCH) - case NI_Vector64_Create: -#endif - return true; - - default: - return false; - } + return AsHWIntrinsic()->GetHWIntrinsicId() == NI_Vector_Create; } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index bd3211c738aa21..f9b22169aaea27 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -8,13 +8,12 @@ static const HWIntrinsicInfo hwIntrinsicInfoArray[] = { // clang-format off -#if defined(TARGET_XARCH) #define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, intCost, fltCost, category, flag) \ { \ /* name */ #name, \ /* flags */ static_cast(flag), \ /* id */ NI_##isa##_##name, \ - /* ins */ t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, \ + /* ins */ {t1, t2, t3, t4, t5, t6, t7, t8, t9, t10}, \ /* isa */ InstructionSet_##isa, \ /* simdSize */ simdSize, \ /* numArgs */ numArgs, \ @@ -22,40 +21,7 @@ static const HWIntrinsicInfo hwIntrinsicInfoArray[] = { /* fltCost */ fltCost, \ /* category */ category \ }, -#include "hwintrinsiclistxarch.h" -#elif defined (TARGET_ARM64) -#define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ - { \ - /* name */ #name, \ - /* flags */ static_cast(flag), \ - /* id */ NI_##isa##_##name, \ - /* ins */ t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, \ - /* isa */ InstructionSet_##isa, \ - /* simdSize */ simdSize, \ - /* numArgs */ numArgs, \ - /* intCost */ -1, \ - /* fltCost */ -1, \ - /* category */ category \ - }, -#include "hwintrinsiclistarm64.h" -#elif defined(TARGET_WASM) -#define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ - { \ - /* name */ #name, \ - /* flags */ static_cast(flag), \ - /* id */ NI_##isa##_##name, \ - /* ins */ t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, \ - /* isa */ InstructionSet_##isa, \ - /* simdSize */ simdSize, \ - /* numArgs */ numArgs, \ - /* intCost */ -1, \ - /* fltCost */ -1, \ - /* category */ category \ - }, -#include "hwintrinsiclistwasm.h" -#else -#error Unsupported platform -#endif +#include "hwintrinsiclist.h" // clang-format on }; @@ -982,9 +948,9 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // SHA { NI_Illegal, NI_Illegal }, // WAITPKG { FIRST_NI_X86Serialize, LAST_NI_X86Serialize }, // X86Serialize - { FIRST_NI_Vector128, LAST_NI_Vector128 }, // Vector128 - { FIRST_NI_Vector256, LAST_NI_Vector256 }, // Vector256 - { FIRST_NI_Vector512, LAST_NI_Vector512 }, // Vector512 + { FIRST_NI_Vector, LAST_NI_Vector }, // Vector128 + { FIRST_NI_Vector, LAST_NI_Vector }, // Vector256 + { FIRST_NI_Vector, LAST_NI_Vector }, // Vector512 { NI_Illegal, NI_Illegal }, // VectorT128 { NI_Illegal, NI_Illegal }, // VectorT256 { NI_Illegal, NI_Illegal }, // VectorT512 @@ -1017,9 +983,9 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { FIRST_NI_Sha1, LAST_NI_Sha1 }, // Sha1 { FIRST_NI_Sha256, LAST_NI_Sha256 }, // Sha256 { NI_Illegal, NI_Illegal }, // Atomics - { FIRST_NI_Vector64, LAST_NI_Vector64 }, // Vector64 - { FIRST_NI_Vector128, LAST_NI_Vector128 }, // Vector128 - { FIRST_NI_VectorT, LAST_NI_VectorT }, // VectorT + { FIRST_NI_Vector, LAST_NI_Vector }, // Vector64 + { FIRST_NI_Vector, LAST_NI_Vector }, // Vector128 + { NI_Illegal, NI_Illegal }, // VectorT { NI_Illegal, NI_Illegal }, // Dczva { NI_Illegal, NI_Illegal }, // Rcpc { NI_Illegal, NI_Illegal }, // VectorT128 @@ -1049,7 +1015,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { #elif defined(TARGET_WASM) { NI_Illegal, NI_Illegal }, // WasmBase { FIRST_NI_PackedSimd, LAST_NI_PackedSimd }, // PackedSimd - { FIRST_NI_Vector128, LAST_NI_Vector128 }, // Vector128 + { FIRST_NI_Vector, LAST_NI_Vector }, // Vector128 #else #error Unsupported platform #endif @@ -1104,6 +1070,25 @@ static void ValidateHWIntrinsicIsaRange(CORINFO_InstructionSet isa, const HWIntr } assert(isaRange.LastId != NI_Illegal); +#if defined(TARGET_XARCH) + if ((isa == InstructionSet_Vector128) || (isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector512)) + { + isa = InstructionSet_Vector; + } +#elif defined(TARGET_ARM64) + if ((isa == InstructionSet_Vector64) || (isa == InstructionSet_Vector128) || (isa == InstructionSet_VectorT)) + { + isa = InstructionSet_Vector; + } +#elif defined(TARGET_WASM) + if (isa == InstructionSet_Vector128) + { + isa = InstructionSet_Vector; + } +#else +#error Unsupported platform +#endif + // Both entries should belong to the expected ISA assert(HWIntrinsicInfo::lookupIsa(isaRange.FirstId) == isa); assert(HWIntrinsicInfo::lookupIsa(isaRange.LastId) == isa); @@ -1157,17 +1142,13 @@ static void ValidateHWIntrinsicIsaRangeArray() // binarySearchId: Does a binary search through a given ISA for the NamedIntrinsic matching a given name // // Arguments: -// isa -- The instruction set to search -// sig -- The signature of the intrinsic -// methodName -- The name of the method associated with the HWIntrinsic to lookup -// isLimitedVector256Isa -- true if Vector256 has limited acceleration support +// isa -- The instruction set to search +// sig -- The signature of the intrinsic +// methodName -- The name of the method associated with the HWIntrinsic to lookup // // Return Value: // The NamedIntrinsic associated with methodName and isa -static NamedIntrinsic binarySearchId(CORINFO_InstructionSet isa, - CORINFO_SIG_INFO* sig, - const char* methodName, - bool isLimitedVector256Isa) +static NamedIntrinsic binarySearchId(CORINFO_InstructionSet isa, CORINFO_SIG_INFO* sig, const char* methodName) { size_t isaIndex = static_cast(isa) - 1; assert(isaIndex < ARRAY_SIZE(hwintrinsicIsaRangeArray)); @@ -1204,15 +1185,6 @@ static NamedIntrinsic binarySearchId(CORINFO_InstructionSet isa, { assert(sortOrder == 0); assert((intrinsicInfo.numArgs == -1) || (sig->numArgs == static_cast(intrinsicInfo.numArgs))); - -#if defined(TARGET_XARCH) - // on AVX1-only CPUs we only support a subset of intrinsics in Vector256 - if (isLimitedVector256Isa && !HWIntrinsicInfo::AvxOnlyCompatible(ni)) - { - return NI_Illegal; - } -#endif // TARGET_XARCH - return ni; } } @@ -1390,9 +1362,6 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, // ISA is unsupported. For Vector256 this is when AVX2 is unsupported since integer types // can't get properly accelerated. - // We support some Vector256 intrinsics on AVX-only CPUs - bool isLimitedVector256Isa = false; - if (isa == InstructionSet_Vector128) { if (!isHWIntrinsicEnabled) @@ -1403,16 +1372,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, #if defined(TARGET_XARCH) else if (isa == InstructionSet_Vector256) { - if (!comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) + if (!comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) { - if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - isLimitedVector256Isa = true; - } - else - { - return NI_Illegal; - } + return NI_Illegal; } } else if (isa == InstructionSet_Vector512) @@ -1449,29 +1411,29 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, if (isa == InstructionSet_AVX10v1) { - NamedIntrinsic ni = binarySearchId(InstructionSet_AVX512, sig, methodName, isLimitedVector256Isa); + NamedIntrinsic ni = binarySearchId(InstructionSet_AVX512, sig, methodName); if (ni != NI_Illegal) { return ni; } - ni = binarySearchId(InstructionSet_AVX512v2, sig, methodName, isLimitedVector256Isa); + ni = binarySearchId(InstructionSet_AVX512v2, sig, methodName); if (ni != NI_Illegal) { return ni; } - return binarySearchId(InstructionSet_AVX512v3, sig, methodName, isLimitedVector256Isa); + return binarySearchId(InstructionSet_AVX512v3, sig, methodName); } else if (isa == InstructionSet_AVX10v1_X64) { - return binarySearchId(InstructionSet_AVX512_X64, sig, methodName, isLimitedVector256Isa); + return binarySearchId(InstructionSet_AVX512_X64, sig, methodName); } #endif // TARGET_XARCH - return binarySearchId(isa, sig, methodName, isLimitedVector256Isa); + return binarySearchId(isa, sig, methodName); } //------------------------------------------------------------------------ @@ -1896,15 +1858,7 @@ static bool isSupportedBaseType(NamedIntrinsic intrinsic, var_types baseType) return true; } -#ifdef DEBUG - CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); -#ifdef TARGET_XARCH - assert((isa == InstructionSet_Vector512) || (isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector128)); -#endif // TARGET_XARCH -#ifdef TARGET_ARM64 - assert((isa == InstructionSet_Vector64) || (isa == InstructionSet_Vector128)); -#endif // TARGET_ARM64 -#endif // DEBUG + assert(HWIntrinsicInfo::lookupIsa(intrinsic) == InstructionSet_Vector); return false; } @@ -2886,4 +2840,3027 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, return retNode; } +//------------------------------------------------------------------------ +// impXplatIntrinsic: dispatch xplat intrinsics to their own implementation +// +// Arguments: +// intrinsic -- id of the intrinsic function. +// clsHnd -- class handle containing the intrinsic function. +// method -- method handle of the intrinsic function. +// sig -- signature of the intrinsic call. +// entryPoint -- The entry point information required for R2R scenarios +// simdBaseJitType -- generic argument of the intrinsic. +// retType -- return type of the intrinsic. +// mustExpand -- true if the intrinsic must return a GenTree*; otherwise, false +// +// Return Value: +// the expanded intrinsic. +// +// Assumptions: +// This method is only caled for Vector### methods +// +// Xarch - baseline ISA requirements have been met, as follows: +// Vector128: SSE2 +// Vector256: AVX (note that AVX2 cannot be assumed) +// Vector512: AVX-512F+CD+DQ+BW+VL +// +GenTree* Compiler::impXplatIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig R2RARG(CORINFO_CONST_LOOKUP* entryPoint), + var_types simdBaseType, + var_types retType, + unsigned simdSize, + bool mustExpand) +{ + assert(HWIntrinsicInfo::lookupIsa(intrinsic) == InstructionSet_Vector); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && varTypeIsIntegral(simdBaseType)) + { + bool potentiallyNotSupported = true; + + switch (intrinsic) + { + case NI_Vector_Abs: + { + potentiallyNotSupported = varTypeIsSigned(simdBaseType); + break; + } + + case NI_Vector_CreateAlternatingSequence: + case NI_Vector_CreateGeometricSequence: + case NI_Vector_CreateSequence: + { + GenTree* op1 = impStackTop(1).val; + GenTree* op2 = impStackTop(0).val; + + potentiallyNotSupported = !op1->OperIsConst() || !op2->OperIsConst(); + break; + } + + default: + { + potentiallyNotSupported = !HWIntrinsicInfo::AvxOnlyCompatible(intrinsic); + break; + } + } + + if (potentiallyNotSupported && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + return nullptr; + } + } +#elif defined(TARGET_WASM) + // TODO-WASM-SIMD: Finish adding xplat intrinsic lightup + return nullptr; +#endif + + if (simdSize != 0) + { + assert(varTypeIsArithmetic(simdBaseType)); + } + + GenTree* retNode = nullptr; + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + GenTree* op3 = nullptr; + GenTree* op4 = nullptr; + + bool isMinMaxIntrinsic = false; + bool isMax = false; + bool isMagnitude = false; + bool isNative = false; + bool isNumber = false; + + bool isConcatIntrinsic = false; + bool leftUpper = false; + bool rightUpper = false; + + switch (intrinsic) + { + case NI_Vector_Abs: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + retNode = gtNewSimdAbsNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_AddSaturate: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (varTypeIsFloating(simdBaseType)) + { + retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); + } + else + { +#if defined(TARGET_XARCH) + if (varTypeIsSmall(simdBaseType)) + { + if (simdSize == 64) + { + intrinsic = NI_AVX512_AddSaturate; + } + else if (simdSize == 32) + { + intrinsic = NI_AVX2_AddSaturate; + } + else + { + assert(simdSize == 16); + intrinsic = NI_X86Base_AddSaturate; + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + } + else if (varTypeIsUnsigned(simdBaseType)) + { + // For unsigned we simply have to detect `(x + y) < x` + // and in that scenario return MaxValue (AllBitsSet) + + GenTree* cns = gtNewAllBitsSetConNode(retType); + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + + GenTree* tmp = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); + GenTree* tmpDup1 = fgMakeMultiUse(&tmp); + GenTree* msk = gtNewSimdCmpOpNode(GT_LT, retType, tmp, op1Dup1, simdBaseType, simdSize); + + retNode = gtNewSimdCndSelNode(retType, msk, cns, tmpDup1, simdBaseType, simdSize); + } + else + { + // For signed the logic is a bit more complex, but is + // explained on the managed side as part of Scalar.AddSaturate + + GenTreeVecCon* minCns = gtNewVconNode(retType); + GenTreeVecCon* maxCns = gtNewVconNode(retType); + + switch (simdBaseType) + { + case TYP_SHORT: + { + minCns->EvaluateBroadcastInPlace(INT16_MIN); + maxCns->EvaluateBroadcastInPlace(INT16_MAX); + break; + } + + case TYP_INT: + { + minCns->EvaluateBroadcastInPlace(INT32_MIN); + maxCns->EvaluateBroadcastInPlace(INT32_MAX); + break; + } + + case TYP_LONG: + { + minCns->EvaluateBroadcastInPlace(INT64_MIN); + maxCns->EvaluateBroadcastInPlace(INT64_MAX); + break; + } + + default: + { + unreached(); + } + } + + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op2Dup1 = fgMakeMultiUse(&op2); + + GenTree* tmp = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); + + GenTree* tmpDup1 = fgMakeMultiUse(&tmp); + GenTree* tmpDup2 = gtCloneExpr(tmpDup1); + + GenTree* msk = gtNewSimdIsNegativeNode(retType, tmpDup1, simdBaseType, simdSize); + GenTree* ovf = gtNewSimdCndSelNode(retType, msk, maxCns, minCns, simdBaseType, simdSize); + + // The mask we need is ((a ^ b) & ~(b ^ c)) < 0 + + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // tmpDup1 = a: 0xF0 + // op1Dup1 = b: 0xCC + // op2Dup2 = c: 0xAA + // + // 0x18 = A ? norBC : andBC + // a ? ~(b | c) : (b & c) + msk = gtNewSimdTernaryLogicNode(retType, tmp, op1Dup1, op2Dup1, gtNewIconNode(0x18), + simdBaseType, simdSize); + } + else + { + GenTree* op1Dup2 = gtCloneExpr(op1Dup1); + + GenTree* msk2 = gtNewSimdBinOpNode(GT_XOR, retType, tmp, op1Dup1, simdBaseType, simdSize); + GenTree* msk3 = gtNewSimdBinOpNode(GT_XOR, retType, op1Dup2, op2Dup1, simdBaseType, simdSize); + + msk = gtNewSimdBinOpNode(GT_AND_NOT, retType, msk2, msk3, simdBaseType, simdSize); + } + + msk = gtNewSimdIsNegativeNode(retType, msk, simdBaseType, simdSize); + retNode = gtNewSimdCndSelNode(retType, msk, ovf, tmpDup2, simdBaseType, simdSize); + } +#elif defined(TARGET_ARM64) + intrinsic = NI_AdvSimd_AddSaturate; + + if ((simdSize == 8) && varTypeIsLong(simdBaseType)) + { + intrinsic = NI_AdvSimd_AddSaturateScalar; + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); +#elif defined(TARGET_WASM) + // TODO-WASM-SIMD: Implement NI_Vector_AddSaturate + return nullptr; +#else + unreached(); +#endif + } + break; + } + + case NI_Vector_AndNot: + { + assert(sig->numArgs == 2); + + // We don't want to support creating AND_NOT nodes prior to LIR + // as it can break important optimizations. We'll produces this + // in lowering instead so decompose into the individual operations + // on import + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + op2 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseType, simdSize)); + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_As: + case NI_Vector_AsByte: + case NI_Vector_AsDouble: + case NI_Vector_AsInt16: + case NI_Vector_AsInt32: + case NI_Vector_AsInt64: + case NI_Vector_AsNInt: + case NI_Vector_AsNUInt: + case NI_Vector_AsSByte: + case NI_Vector_AsSingle: + case NI_Vector_AsUInt16: + case NI_Vector_AsUInt32: + case NI_Vector_AsUInt64: + case NI_Vector_AsVector4: +#if defined(TARGET_ARM64) + case NI_Vector_AsVector: +#endif + { + // We fold away the cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both the same TYP_SIMD. + + assert(sig->numArgs == 1); + retNode = impSIMDPopStack(); + + assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + break; + } + +#if defined(TARGET_XARCH) + case NI_Vector_AsVector: + case NI_Vector_AsVector256: + case NI_Vector_AsVector512: + { + assert(sig->numArgs == 1); + uint32_t vectorTByteLength = getVectorTByteLength(); + + if (vectorTByteLength == 0) + { + // VectorT ISA was not present. Fall back to managed. + break; + } + + if (vectorTByteLength == simdSize) + { + // We fold away the cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both the same TYP_SIMD. + + retNode = impSIMDPopStack(); + assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + + break; + } + + // Vector is a different size than the source/target SIMD type, so we should + // treat this as a call to the appropriate narrowing or widening intrinsic. + + NamedIntrinsic convertIntrinsic = NI_Illegal; + unsigned convertSize = 0; + + switch (vectorTByteLength) + { + case XMM_REGSIZE_BYTES: + { + if (intrinsic == NI_Vector_AsVector) + { + if (simdSize == 64) + { + convertIntrinsic = NI_Vector_GetLower128; + convertSize = 64; + } + else + { + assert(simdSize == 32); + convertIntrinsic = NI_Vector_GetLower; + convertSize = 32; + } + } + else if (intrinsic == NI_Vector_AsVector512) + { + assert(simdSize == 64); + convertIntrinsic = NI_Vector_ToVector512; + convertSize = 16; + } + else + { + assert(intrinsic == NI_Vector_AsVector256); + assert(simdSize == 32); + + convertIntrinsic = NI_Vector_ToVector256; + convertSize = 16; + } + break; + } + + case YMM_REGSIZE_BYTES: + { + if (intrinsic == NI_Vector_AsVector) + { + if (simdSize == 64) + { + convertIntrinsic = NI_Vector_GetLower; + convertSize = 64; + } + else + { + assert(simdSize == 16); + convertIntrinsic = NI_Vector_ToVector256; + convertSize = 16; + } + } + else + { + assert(intrinsic == NI_Vector_AsVector512); + assert(simdSize == 64); + + convertIntrinsic = NI_Vector_ToVector512; + convertSize = 32; + } + break; + } + + case ZMM_REGSIZE_BYTES: + { + if (intrinsic == NI_Vector_AsVector) + { + assert((simdSize == 16) || (simdSize == 32)); + convertIntrinsic = NI_Vector_ToVector512; + convertSize = simdSize; + } + else + { + assert(intrinsic == NI_Vector_AsVector256); + assert(simdSize == 32); + + convertIntrinsic = NI_Vector_GetLower; + convertSize = 64; + } + break; + } + + default: + { + unreached(); + } + } + + assert(convertIntrinsic != NI_Illegal); + assert(convertSize != 0); + + op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, convertIntrinsic, simdBaseType, convertSize); + + break; + } +#endif + + case NI_Vector_AsVector128: + { + assert(sig->numArgs == 1); + assert(retType == TYP_SIMD16); + + switch (simdSize) + { + case 8: + { + assert(simdBaseType == TYP_FLOAT); + + op1 = impSIMDPopStack(); + + if (op1->IsCnsVec()) + { + GenTreeVecCon* vecCon = op1->AsVecCon(); + vecCon->gtType = TYP_SIMD16; + + vecCon->gtSimdVal.f32[2] = 0.0f; + vecCon->gtSimdVal.f32[3] = 0.0f; + + return vecCon; + } + +#if defined(TARGET_ARM64) + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector_ToVector128Unsafe, simdBaseType, 8); +#else + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector_AsVector128Unsafe, simdBaseType, 8); +#endif + + GenTree* idx = gtNewIconNode(2, TYP_INT); + GenTree* zero = gtNewZeroConNode(TYP_FLOAT); + op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseType, 16); + + idx = gtNewIconNode(3, TYP_INT); + zero = gtNewZeroConNode(TYP_FLOAT); + retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseType, 16); + + break; + } + + case 12: + { + assert(simdBaseType == TYP_FLOAT); + + op1 = impSIMDPopStack(); + + if (op1->IsCnsVec()) + { + GenTreeVecCon* vecCon = op1->AsVecCon(); + vecCon->gtType = TYP_SIMD16; + + vecCon->gtSimdVal.f32[3] = 0.0f; + return vecCon; + } + + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector_AsVector128Unsafe, simdBaseType, 12); + + GenTree* idx = gtNewIconNode(3, TYP_INT); + GenTree* zero = gtNewZeroConNode(TYP_FLOAT); + retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseType, 16); + break; + } + + case 16: + { + // We fold away the cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both the same TYP_SIMD. + + retNode = impSIMDPopStack(); + assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + + break; + } + +#if defined(TARGET_XARCH) + case 32: + case 64: + { + // Vector is larger, so we should treat this as a call to the appropriate narrowing intrinsic + intrinsic = simdSize == YMM_REGSIZE_BYTES ? NI_Vector_GetLower : NI_Vector_GetLower128; + + op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); + break; + } +#endif + + default: + { + unreached(); + } + } + + break; + } + + case NI_Vector_AsVector128Unsafe: + { + assert(sig->numArgs == 1); + assert(retType == TYP_SIMD16); + assert(simdBaseType == TYP_FLOAT); + assert((simdSize == 8) || (simdSize == 12)); + + op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector_AsVector128Unsafe, simdBaseType, simdSize); + break; + } + + case NI_Vector_AsVector2: + case NI_Vector_AsVector3: + { + assert((simdSize == 16) && (simdBaseType == TYP_FLOAT)); + assert((retType == TYP_SIMD8) || (retType == TYP_SIMD12)); + + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + +#if defined(TARGET_ARM64) + if (retType == TYP_SIMD8) + { + retNode = gtNewSimdGetLowerNode(TYP_SIMD8, op1, simdBaseType, simdSize); + break; + } +#endif + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); + break; + } + + case NI_Vector_Ceiling: + { + assert(sig->numArgs == 1); + + if (!varTypeIsFloating(simdBaseType)) + { + retNode = impSIMDPopStack(); + break; + } + + op1 = impSIMDPopStack(); + retNode = gtNewSimdCeilNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConcatLowerLower: + { + isConcatIntrinsic = true; + break; + } + + case NI_Vector_ConcatLowerUpper: + { + isConcatIntrinsic = true; + rightUpper = true; + break; + } + + case NI_Vector_ConcatUpperLower: + { + isConcatIntrinsic = true; + leftUpper = true; + break; + } + + case NI_Vector_ConcatUpperUpper: + { + isConcatIntrinsic = true; + leftUpper = true; + rightUpper = true; + break; + } + + case NI_Vector_ConditionalSelect: + { + assert(sig->numArgs == 3); + + op3 = impSIMDPopStack(); + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToDouble: + { + assert(sig->numArgs == 1); + assert(varTypeIsLong(simdBaseType)); + +#if defined(TARGET_XARCH) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + break; + } + + if (simdSize == 64) + { + intrinsic = NI_AVX512_ConvertToVector512Double; + } + else if (simdSize == 32) + { + intrinsic = NI_AVX512_ConvertToVector256Double; + } + else + { + assert(simdSize == 16); + intrinsic = NI_AVX512_ConvertToVector128Double; + } +#elif defined(TARGET_ARM64) + if (simdSize == 16) + { + intrinsic = NI_AdvSimd_Arm64_ConvertToDouble; + } + else + { + assert(simdSize == 8); + intrinsic = NI_AdvSimd_Arm64_ConvertToDoubleScalar; + } +#elif defined(TARGET_WASM) + // TODO-WASM-SIMD: Implement NI_Vector_ConvertToDouble + return nullptr; +#else + unreached(); +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToInt32: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_FLOAT); + + op1 = impSIMDPopStack(); + retNode = gtNewSimdCvtNode(retType, op1, TYP_INT, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToInt32Native: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_FLOAT); + + if (BlockNonDeterministicIntrinsics(mustExpand)) + { + break; + } + + op1 = impSIMDPopStack(); + retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_INT, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToInt64: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_DOUBLE); + +#if defined(TARGET_XARCH) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdCvtNode(retType, op1, TYP_LONG, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToInt64Native: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_DOUBLE); + + if (BlockNonDeterministicIntrinsics(mustExpand)) + { + break; + } + +#if defined(TARGET_XARCH) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_LONG, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToSingle: + { + assert(sig->numArgs == 1); + assert(varTypeIsInt(simdBaseType)); + +#if defined(TARGET_XARCH) + if (simdBaseType == TYP_INT) + { + if (simdSize == 64) + { + intrinsic = NI_AVX512_ConvertToVector512Single; + } + else if (simdSize == 32) + { + intrinsic = NI_AVX_ConvertToVector256Single; + } + else + { + assert(simdSize == 16); + intrinsic = NI_X86Base_ConvertToVector128Single; + } + } + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + if (simdSize == 64) + { + intrinsic = NI_AVX512_ConvertToVector512Single; + } + else if (simdSize == 32) + { + intrinsic = NI_AVX512_ConvertToVector256Single; + } + else + { + assert(simdSize == 16); + intrinsic = NI_AVX512_ConvertToVector128Single; + } + } + else + { + break; + } +#elif defined(TARGET_ARM64) + intrinsic = NI_AdvSimd_ConvertToSingle; +#elif defined(TARGET_WASM) + // TODO-WASM-SIMD: Implement NI_Vector_ConvertToSingle + return nullptr; +#else + unreached(); +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToUInt32: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_FLOAT); + +#if defined(TARGET_XARCH) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdCvtNode(retType, op1, TYP_UINT, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToUInt32Native: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_FLOAT); + + if (BlockNonDeterministicIntrinsics(mustExpand)) + { + break; + } + +#if defined(TARGET_XARCH) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_UINT, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToUInt64: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_DOUBLE); + +#if defined(TARGET_XARCH) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdCvtNode(retType, op1, TYP_ULONG, simdBaseType, simdSize); + break; + } + + case NI_Vector_ConvertToUInt64Native: + { + assert(sig->numArgs == 1); + assert(simdBaseType == TYP_DOUBLE); + + if (BlockNonDeterministicIntrinsics(mustExpand)) + { + break; + } + +#if defined(TARGET_XARCH) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_ULONG, simdBaseType, simdSize); + break; + } + + case NI_Vector_Create: + { + retNode = impSimdCreate(intrinsic, sig, simdBaseType, retType, simdSize); + break; + } + + case NI_Vector_CreateAlternatingSequence: + { + assert(sig->numArgs == 2); + + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op1 side effects for vector CreateAlternatingSequence")); + + op2 = impPopStack().val; + op1 = impPopStack().val; + + retNode = gtNewSimdCreateAlternatingSequenceNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_CreateGeometricSequence: + { + assert(sig->numArgs == 2); + + bool multiplierIsConst = impStackTop(0).val->OperIsConst(); + bool initialIsConst = impStackTop(1).val->OperIsConst(); + bool canGenerate = multiplierIsConst; + +#if !defined(TARGET_XARCH) + if (canGenerate && !initialIsConst) + { +#if defined(TARGET_ARM64) + canGenerate = !varTypeIsLong(simdBaseType) || (simdSize == 8); +#else + canGenerate = false; +#endif + } +#endif + + if (!canGenerate) + { + if (opts.OptimizationEnabled()) + { + op2 = impPopStack().val; + op1 = impPopStack().val; + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); + } + break; + } + + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op1 side effects for vector CreateGeometricSequence")); + + op2 = impPopStack().val; + op1 = impPopStack().val; + + retNode = gtNewSimdCreateGeometricSequenceNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_CreateScalar: + { + assert(sig->numArgs == 1); + + op1 = impPopStack().val; + retNode = gtNewSimdCreateScalarNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_CreateScalarUnsafe: + { + assert(sig->numArgs == 1); + + op1 = impPopStack().val; + retNode = gtNewSimdCreateScalarUnsafeNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_CreateSequence: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_ARM64) + if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->OperIsConst()) + { + // TODO-ARM64-CQ: We should support long/ulong multiplication. + break; + } +#endif + + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op1 side effects for vector CreateSequence")); + + op2 = impPopStack().val; + op1 = impPopStack().val; + + retNode = gtNewSimdCreateSequenceNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_Dot: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_ARM64) + if (varTypeIsLong(simdBaseType)) + { + break; + } +#endif + + var_types simdType = getSIMDTypeForSize(simdSize); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + +#if defined(TARGET_XARCH) + if ((simdSize == 64) || varTypeIsByte(simdBaseType) || varTypeIsLong(simdBaseType)) + { + // The lowering for Dot doesn't handle these cases, so import as Sum(left * right) + retNode = gtNewSimdBinOpNode(GT_MUL, simdType, op1, op2, simdBaseType, simdSize); + retNode = gtNewSimdSumNode(retType, retNode, simdBaseType, simdSize); + break; + } +#endif + + retNode = gtNewSimdDotProdNode(simdType, op1, op2, simdBaseType, simdSize); + retNode = gtNewSimdToScalarNode(retType, retNode, simdBaseType, simdSize); + break; + } + + case NI_Vector_Equals: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_EqualsAny: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_ExtractMostSignificantBits: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + +#if defined(TARGET_XARCH) + if ((simdSize == 64) || canUseEvexEncoding()) + { + op1 = gtFoldExpr(gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseType, simdSize)); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AVX512_MoveMask, simdBaseType, simdSize); + break; + } + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + intrinsic = (simdSize == 32) ? NI_AVX2_MoveMask : NI_X86Base_MoveMask; + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + break; + } + + case TYP_INT: + case TYP_UINT: + case TYP_FLOAT: + { + simdBaseType = TYP_FLOAT; + intrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_X86Base_MoveMask; + break; + } + + case TYP_LONG: + case TYP_ULONG: + case TYP_DOUBLE: + { + simdBaseType = TYP_DOUBLE; + intrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_X86Base_MoveMask; + break; + } + + default: + { + unreached(); + } + } +#endif + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); + break; + } + + case NI_Vector_Floor: + { + assert(sig->numArgs == 1); + + if (!varTypeIsFloating(simdBaseType)) + { + retNode = impSIMDPopStack(); + break; + } + + op1 = impSIMDPopStack(); + retNode = gtNewSimdFloorNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_FusedMultiplyAdd: + { + assert(sig->numArgs == 3); + assert(varTypeIsFloating(simdBaseType)); + +#if defined(TARGET_XARCH) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#elif defined(TARGET_ARM64) + impSpillSideEffect(true, + stackState.esStackDepth - 3 DEBUGARG("Spilling op1 side effects for FusedMultiplyAdd")); + + impSpillSideEffect(true, + stackState.esStackDepth - 2 DEBUGARG("Spilling op2 side effects for FusedMultiplyAdd")); +#endif + + op3 = impSIMDPopStack(); + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseType, simdSize); + break; + } + + case NI_Vector_GetElement: + { + assert(sig->numArgs == 2); + + op2 = impPopStack().val; + op1 = impSIMDPopStack(); + + retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) + case NI_Vector_GetLower: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if (simdSize == 8) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdGetLowerNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_GetUpper: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if (simdSize == 8) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdGetUpperNode(retType, op1, simdBaseType, simdSize); + break; + } +#endif // !TARGET_XARCH && !TARGET_ARM64 + + case NI_Vector_GreaterThan: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_GreaterThanAll: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAllNode(GT_GT, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_GreaterThanAny: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAnyNode(GT_GT, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_GreaterThanOrEqual: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_GreaterThanOrEqualAll: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAllNode(GT_GE, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_GreaterThanOrEqualAny: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAnyNode(GT_GE, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsEvenInteger: + { + assert(sig->numArgs == 1); + + if (varTypeIsFloating(simdBaseType)) + { + // The code for handling floating-point is decently complex but also expected + // to be rare, so we fallback to the managed implementation, which is accelerated + break; + } + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsEvenIntegerNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsFinite: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsFiniteNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsInfinity: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsInfinityNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsInteger: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsIntegerNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsNaN: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsNaNNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsNegative: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsNegativeNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsNegativeInfinity: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsNegativeInfinityNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsNormal: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsNormalNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsOddInteger: + { + assert(sig->numArgs == 1); + + if (varTypeIsFloating(simdBaseType)) + { + // The code for handling floating-point is decently complex but also expected + // to be rare, so we fallback to the managed implementation, which is accelerated + break; + } + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsOddIntegerNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsPositive: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsPositiveNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsPositiveInfinity: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsPositiveInfinityNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsSubnormal: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsSubnormalNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_IsZero: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + retNode = gtNewSimdIsZeroNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_LessThan: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_LessThanAll: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAllNode(GT_LT, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_LessThanAny: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAnyNode(GT_LT, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_LessThanOrEqual: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_LessThanOrEqualAll: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAllNode(GT_LE, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_LessThanOrEqualAny: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAnyNode(GT_LE, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_LoadAligned: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_ARM64) + if (opts.OptimizationDisabled()) + { + // ARM64 doesn't have aligned loads, but aligned loads are only validated to be + // aligned when optimizations are disable, so only skip the intrinsic handling + // if optimizations are enabled + break; + } +#endif + + op1 = impPopStack().val; + + if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) + { + // If what we have is a BYREF, that's what we really want, so throw away the cast. + op1 = op1->gtGetOp1(); + } + + retNode = gtNewSimdLoadAlignedNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_LoadAlignedNonTemporal: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_ARM64) + if (opts.OptimizationDisabled()) + { + // ARM64 doesn't have aligned loads, but aligned loads are only validated to be + // aligned when optimizations are disable, so only skip the intrinsic handling + // if optimizations are enabled + break; + } +#endif + + op1 = impPopStack().val; + + if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) + { + // If what we have is a BYREF, that's what we really want, so throw away the cast. + op1 = op1->gtGetOp1(); + } + + retNode = gtNewSimdLoadNonTemporalNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_LoadUnsafe: + { + if (sig->numArgs == 2) + { + op2 = impPopStack().val; + } + else + { + assert(sig->numArgs == 1); + } + + op1 = impPopStack().val; + + if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) + { + // If what we have is a BYREF, that's what we really want, so throw away the cast. + op1 = op1->gtGetOp1(); + } + + if (sig->numArgs == 2) + { + op3 = gtNewIconNode(genTypeSize(simdBaseType), op2->TypeGet()); + op2 = gtNewOperNode(GT_MUL, op2->TypeGet(), op2, op3); + op1 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, op2); + } + + retNode = gtNewSimdLoadNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_Max: + { + isMinMaxIntrinsic = true; + isMax = true; + break; + } + + case NI_Vector_MaxMagnitude: + { + isMinMaxIntrinsic = true; + isMax = true; + isMagnitude = true; + break; + } + + case NI_Vector_MaxMagnitudeNumber: + { + isMinMaxIntrinsic = true; + isMax = true; + isMagnitude = true; + isNumber = true; + break; + } + + case NI_Vector_MaxNative: + { + isMinMaxIntrinsic = true; + isMax = true; + isNative = true; + break; + } + + case NI_Vector_MaxNumber: + { + isMinMaxIntrinsic = true; + isMax = true; + isNumber = true; + break; + } + + case NI_Vector_Min: + { + isMinMaxIntrinsic = true; + break; + } + + case NI_Vector_MinMagnitude: + { + isMinMaxIntrinsic = true; + isMagnitude = true; + break; + } + + case NI_Vector_MinMagnitudeNumber: + { + isMinMaxIntrinsic = true; + isMagnitude = true; + isNumber = true; + break; + } + + case NI_Vector_MinNative: + { + isMinMaxIntrinsic = true; + isNative = true; + break; + } + + case NI_Vector_MinNumber: + { + isMinMaxIntrinsic = true; + isNumber = true; + break; + } + + case NI_Vector_MultiplyAddEstimate: + { + assert(sig->numArgs == 3); + + if (BlockNonDeterministicIntrinsics(mustExpand)) + { + break; + } + +#if defined(TARGET_ARM64) + if (varTypeIsFloating(simdBaseType)) + { + impSpillSideEffect(true, stackState.esStackDepth - + 3 DEBUGARG("Spilling op1 side effects for MultiplyAddEstimate")); + + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op2 side effects for MultiplyAddEstimate")); + } +#endif + + op3 = impSIMDPopStack(); + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + bool isFmaSupported = varTypeIsFloating(simdBaseType); + +#if defined(TARGET_XARCH) + if (isFmaSupported) + { + isFmaSupported = compExactlyDependsOn(InstructionSet_AVX2); + } +#endif + + if (isFmaSupported) + { + retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseType, simdSize); + } + else + { + GenTree* mulNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseType, simdSize); + retNode = gtNewSimdBinOpNode(GT_ADD, retType, mulNode, op3, simdBaseType, simdSize); + } + break; + } + + case NI_Vector_Narrow: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdNarrowNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_NarrowWithSaturation: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (simdBaseType == TYP_DOUBLE) + { + // gtNewSimdNarrowNode uses the base type of the return for the simdBaseType + retNode = gtNewSimdNarrowNode(retType, op1, op2, TYP_FLOAT, simdSize); + } + else if ((simdSize == 16) && ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_INT))) + { + // PackSignedSaturate uses the base type of the return for the simdBaseType + simdBaseType = (simdBaseType == TYP_SHORT) ? TYP_BYTE : TYP_SHORT; + + intrinsic = NI_X86Base_PackSignedSaturate; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + } + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + if ((simdSize == 32) || (simdSize == 64)) + { + if (simdSize == 32) + { + intrinsic = NI_Vector_ToVector512Unsafe; + + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD64, op1, intrinsic, simdBaseType, simdSize); + op1 = gtNewSimdWithUpperNode(TYP_SIMD64, op1, op2, simdBaseType, simdSize * 2); + } + + switch (simdBaseType) + { + case TYP_SHORT: + { + intrinsic = NI_AVX512_ConvertToVector256SByteWithSaturation; + break; + } + + case TYP_USHORT: + { + intrinsic = NI_AVX512_ConvertToVector256ByteWithSaturation; + break; + } + + case TYP_INT: + { + intrinsic = NI_AVX512_ConvertToVector256Int16WithSaturation; + break; + } + + case TYP_UINT: + { + intrinsic = NI_AVX512_ConvertToVector256UInt16WithSaturation; + break; + } + + case TYP_LONG: + { + intrinsic = NI_AVX512_ConvertToVector256Int32WithSaturation; + break; + } + + case TYP_ULONG: + { + intrinsic = NI_AVX512_ConvertToVector256UInt32WithSaturation; + break; + } + + default: + { + unreached(); + } + } + } + else + { + assert(simdSize == 16); + intrinsic = NI_Vector_ToVector256Unsafe; + + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, intrinsic, simdBaseType, simdSize); + op1 = gtNewSimdWithUpperNode(TYP_SIMD32, op1, op2, simdBaseType, simdSize * 2); + + switch (simdBaseType) + { + case TYP_USHORT: + { + intrinsic = NI_AVX512_ConvertToVector128ByteWithSaturation; + break; + } + + case TYP_UINT: + { + intrinsic = NI_AVX512_ConvertToVector128UInt16WithSaturation; + break; + } + + case TYP_LONG: + { + intrinsic = NI_AVX512_ConvertToVector128Int32WithSaturation; + break; + } + + case TYP_ULONG: + { + intrinsic = NI_AVX512_ConvertToVector128UInt32WithSaturation; + break; + } + + default: + { + unreached(); + } + } + } + + if (simdSize == 64) + { + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, intrinsic, simdBaseType, simdSize); + op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op2, intrinsic, simdBaseType, simdSize); + + retNode = gtNewSimdWithUpperNode(retType, op1, op2, simdBaseType, simdSize); + } + else + { + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize * 2); + } + } + else + { + // gtNewSimdNarrowNode uses the base type of the return for the simdBaseType + var_types narrowSimdBaseType; + + GenTreeVecCon* minCns = varTypeIsSigned(simdBaseType) ? gtNewVconNode(retType) : nullptr; + GenTreeVecCon* maxCns = gtNewVconNode(retType); + + switch (simdBaseType) + { + case TYP_SHORT: + { + minCns->EvaluateBroadcastInPlace(INT8_MIN); + maxCns->EvaluateBroadcastInPlace(INT8_MAX); + + narrowSimdBaseType = TYP_BYTE; + break; + } + + case TYP_USHORT: + { + maxCns->EvaluateBroadcastInPlace(UINT8_MAX); + narrowSimdBaseType = TYP_UBYTE; + break; + } + + case TYP_INT: + { + minCns->EvaluateBroadcastInPlace(INT16_MIN); + maxCns->EvaluateBroadcastInPlace(INT16_MAX); + + narrowSimdBaseType = TYP_SHORT; + break; + } + + case TYP_UINT: + { + maxCns->EvaluateBroadcastInPlace(UINT16_MAX); + narrowSimdBaseType = TYP_USHORT; + break; + } + + case TYP_LONG: + { + minCns->EvaluateBroadcastInPlace(INT32_MIN); + maxCns->EvaluateBroadcastInPlace(INT32_MAX); + + narrowSimdBaseType = TYP_INT; + break; + } + + case TYP_ULONG: + { + maxCns->EvaluateBroadcastInPlace(UINT32_MAX); + narrowSimdBaseType = TYP_UINT; + break; + } + + default: + { + unreached(); + } + } + + // This does a clamp which is defined as: Min(Max(value, min), max) + // which means that we do a max computation if a minimum constant is specified + // There will be none specified for unsigned to unsigned narrowing since + // they share a lower bound (0) and will already be correct. + + if (minCns != nullptr) + { + op1 = gtNewSimdMinMaxNode(retType, op1, minCns, simdBaseType, simdSize, /* isMax */ true, + /* isMagnitude */ false, /* isNumber */ false); + op2 = gtNewSimdMinMaxNode(retType, op2, gtCloneExpr(minCns), simdBaseType, simdSize, + /* isMax */ true, /* isMagnitude */ false, /* isNumber */ false); + } + + op1 = gtNewSimdMinMaxNode(retType, op1, maxCns, simdBaseType, simdSize, /* isMax */ false, + /* isMagnitude */ false, /* isNumber */ false); + op2 = gtNewSimdMinMaxNode(retType, op2, gtCloneExpr(maxCns), simdBaseType, simdSize, + /* isMax */ false, /* isMagnitude */ false, /* isNumber */ false); + + retNode = gtNewSimdNarrowNode(retType, op1, op2, narrowSimdBaseType, simdSize); + } +#elif defined(TARGET_ARM64) + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (varTypeIsFloating(simdBaseType)) + { + retNode = gtNewSimdNarrowNode(retType, op1, op2, simdBaseType, simdSize); + } + else if (simdSize == 16) + { + intrinsic = NI_AdvSimd_ExtractNarrowingSaturateLower; + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, intrinsic, simdBaseType, 8); + + intrinsic = NI_AdvSimd_ExtractNarrowingSaturateUpper; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + } + else + { + intrinsic = NI_Vector_ToVector128Unsafe; + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, simdBaseType, simdSize); + + op1 = gtNewSimdWithUpperNode(TYP_SIMD16, op1, op2, simdBaseType, 16); + + intrinsic = NI_AdvSimd_ExtractNarrowingSaturateLower; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); + } +#elif defined(TARGET_WASM) + // TODO-WASM-SIMD: Implement NI_Vector_NarrowWithSaturation + return nullptr; +#else + unreached(); +#endif + break; + } + + case NI_Vector_Reverse: + { + assert(sig->numArgs == 1); + +#if defined(TARGET_XARCH) + if ((simdSize == 64) && varTypeIsByte(simdBaseType)) + { + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512v2)) + { + break; + } + } +#endif + + op1 = impSIMDPopStack(); + retNode = gtNewSimdReverseNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_Round: + { + if (sig->numArgs != 1) + { + break; + } + + if (!varTypeIsFloating(simdBaseType)) + { + retNode = impSIMDPopStack(); + break; + } + + op1 = impSIMDPopStack(); + retNode = gtNewSimdRoundNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_ShiftLeft: + { + assert(sig->numArgs == 2); + + if (!varTypeIsSIMD(impStackTop(0).val)) + { + // We just want the inlining profitability boost for the helper intrinsics/ + // that have operator alternatives like `simd << int` + break; + } + +#if defined(TARGET_XARCH) + if ((simdSize == 16) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + +#if defined(TARGET_XARCH) + if (simdSize == 64) + { + intrinsic = NI_AVX512_ShiftLeftLogicalVariable; + } + else + { + assert((simdSize == 16) || (simdSize == 32)); + intrinsic = NI_AVX2_ShiftLeftLogicalVariable; + } +#elif defined(TARGET_ARM64) + if (simdSize == 16) + { + intrinsic = NI_AdvSimd_ShiftLogical; + } + else + { + assert(simdSize == 8); + intrinsic = varTypeIsLong(simdBaseType) ? NI_AdvSimd_ShiftLogicalScalar : NI_AdvSimd_ShiftLogical; + } +#elif defined(TARGET_WASM) + // TODO-WASM-SIMD: Implement NI_Vector_ShiftLeft + return nullptr; +#else + unreached(); +#endif + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + break; + } + + case NI_Vector_Shuffle: + case NI_Vector_ShuffleNative: + case NI_Vector_ShuffleNativeFallback: + { + assert((sig->numArgs == 2) || (sig->numArgs == 3)); + + bool isShuffleNative = (intrinsic != NI_Vector_Shuffle); + bool isNonDeterministic = isShuffleNative; + +#if defined(TARGET_ARM64) + if (isNonDeterministic) + { + isNonDeterministic = genTypeSize(simdBaseType) > 1; + } +#endif + + if (isNonDeterministic && BlockNonDeterministicIntrinsics(mustExpand)) + { + break; + } + + GenTree* indices = impStackTop(0).val; + + // Check if the required intrinsics are available to emit now (validForShuffle). If we have variable + // indices that might become possible to emit later (due to them becoming constant), this will be + // indicated in canBecomeValidForShuffle; otherwise, it's just the same as validForShuffle. + bool canBecomeValidForShuffle = false; + bool validForShuffle = + IsValidForShuffle(indices, simdSize, simdBaseType, &canBecomeValidForShuffle, isShuffleNative); + + // If it isn't valid for shuffle (and can't become valid later), then give up now. + if (!canBecomeValidForShuffle) + { + return nullptr; + } + + // If the indices might become constant later, then we don't emit for now, delay until later. + if ((!validForShuffle) || (!indices->IsCnsVec())) + { + assert(sig->numArgs == 2); + + if (opts.OptimizationEnabled()) + { + // Only enable late stage rewriting if optimizations are enabled + // as we won't otherwise encounter a constant at the later point + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + + retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); + break; + } + + // If we're not doing late stage rewriting, just return null now as it won't become valid. + if (!validForShuffle) + { + return nullptr; + } + } + + if (sig->numArgs == 2) + { + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseType, simdSize, isShuffleNative); + } + break; + } + + case NI_Vector_Sqrt: + { + assert(sig->numArgs == 1); + + if (varTypeIsFloating(simdBaseType)) + { + op1 = impSIMDPopStack(); + retNode = gtNewSimdSqrtNode(retType, op1, simdBaseType, simdSize); + } + break; + } + + case NI_Vector_StoreAligned: + { + assert(sig->numArgs == 2); + assert(retType == TYP_VOID); + +#if defined(TARGET_ARM64) + if (opts.OptimizationDisabled()) + { + // ARM64 doesn't have aligned stores, but aligned stores are only validated to be + // aligned when optimizations are disable, so only skip the intrinsic handling + // if optimizations are enabled + break; + } +#endif + + impSpillSideEffect(true, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); + + op2 = impPopStack().val; + + if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) + { + // If what we have is a BYREF, that's what we really want, so throw away the cast. + op2 = op2->gtGetOp1(); + } + + op1 = impSIMDPopStack(); + + retNode = gtNewSimdStoreAlignedNode(op2, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_StoreAlignedNonTemporal: + { + assert(sig->numArgs == 2); + assert(retType == TYP_VOID); + +#if defined(TARGET_ARM64) + if (opts.OptimizationDisabled()) + { + // ARM64 doesn't have aligned stores, but aligned stores are only validated to be + // aligned when optimizations are disable, so only skip the intrinsic handling + // if optimizations are enabled + break; + } +#endif + + impSpillSideEffect(true, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); + + op2 = impPopStack().val; + + if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) + { + // If what we have is a BYREF, that's what we really want, so throw away the cast. + op2 = op2->gtGetOp1(); + } + + op1 = impSIMDPopStack(); + + retNode = gtNewSimdStoreNonTemporalNode(op2, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_StoreUnsafe: + { + assert(retType == TYP_VOID); + + if (sig->numArgs == 3) + { + impSpillSideEffect(true, + stackState.esStackDepth - 3 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); + + op3 = impPopStack().val; + } + else + { + assert(sig->numArgs == 2); + + impSpillSideEffect(true, + stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); + } + + op2 = impPopStack().val; + + if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) + { + // If what we have is a BYREF, that's what we really want, so throw away the cast. + op2 = op2->gtGetOp1(); + } + + if (sig->numArgs == 3) + { + op4 = gtNewIconNode(genTypeSize(simdBaseType), op3->TypeGet()); + op3 = gtNewOperNode(GT_MUL, op3->TypeGet(), op3, op4); + op2 = gtNewOperNode(GT_ADD, op2->TypeGet(), op2, op3); + } + + op1 = impSIMDPopStack(); + + retNode = gtNewSimdStoreNode(op2, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_SubtractSaturate: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (varTypeIsFloating(simdBaseType)) + { + retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); + } + else + { +#if defined(TARGET_XARCH) + if (varTypeIsSmall(simdBaseType)) + { + if (simdSize == 64) + { + intrinsic = NI_AVX512_SubtractSaturate; + } + else if (simdSize == 32) + { + intrinsic = NI_AVX2_SubtractSaturate; + } + else + { + assert(simdSize == 16); + intrinsic = NI_X86Base_SubtractSaturate; + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + } + else if (varTypeIsUnsigned(simdBaseType)) + { + // For unsigned we simply have to detect `(x - y) > x` + // and in that scenario return MinValue (Zero) + + GenTree* cns = gtNewZeroConNode(retType); + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + + GenTree* tmp = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); + GenTree* tmpDup1 = fgMakeMultiUse(&tmp); + GenTree* msk = gtNewSimdCmpOpNode(GT_GT, retType, tmp, op1Dup1, simdBaseType, simdSize); + + retNode = gtNewSimdCndSelNode(retType, msk, cns, tmpDup1, simdBaseType, simdSize); + } + else + { + // For signed the logic is a bit more complex, but is + // explained on the managed side as part of Scalar.SubtractSaturate + + GenTreeVecCon* minCns = gtNewVconNode(retType); + GenTreeVecCon* maxCns = gtNewVconNode(retType); + + switch (simdBaseType) + { + case TYP_SHORT: + { + minCns->EvaluateBroadcastInPlace(INT16_MIN); + maxCns->EvaluateBroadcastInPlace(INT16_MAX); + break; + } + + case TYP_INT: + { + minCns->EvaluateBroadcastInPlace(INT32_MIN); + maxCns->EvaluateBroadcastInPlace(INT32_MAX); + break; + } + + case TYP_LONG: + { + minCns->EvaluateBroadcastInPlace(INT64_MIN); + maxCns->EvaluateBroadcastInPlace(INT64_MAX); + break; + } + + default: + { + unreached(); + } + } + + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op2Dup1 = fgMakeMultiUse(&op2); + + GenTree* tmp = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); + + GenTree* tmpDup1 = fgMakeMultiUse(&tmp); + GenTree* tmpDup2 = gtCloneExpr(tmpDup1); + + GenTree* msk = gtNewSimdIsNegativeNode(retType, tmpDup1, simdBaseType, simdSize); + GenTree* ovf = gtNewSimdCndSelNode(retType, msk, maxCns, minCns, simdBaseType, simdSize); + + // The mask we need is ((a ^ b) & (b ^ c)) < 0 + + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // tmpDup1 = a: 0xF0 + // op1Dup1 = b: 0xCC + // op2Dup2 = c: 0xAA + // + // 0x18 = B ? norAC : andAC + // b ? ~(a | c) : (a & c) + msk = gtNewSimdTernaryLogicNode(retType, tmp, op1Dup1, op2Dup1, gtNewIconNode(0x24), + simdBaseType, simdSize); + } + else + { + GenTree* op1Dup2 = gtCloneExpr(op1Dup1); + + GenTree* msk2 = gtNewSimdBinOpNode(GT_XOR, retType, tmp, op1Dup1, simdBaseType, simdSize); + GenTree* msk3 = gtNewSimdBinOpNode(GT_XOR, retType, op1Dup2, op2Dup1, simdBaseType, simdSize); + + msk = gtNewSimdBinOpNode(GT_AND, retType, msk2, msk3, simdBaseType, simdSize); + } + + msk = gtNewSimdIsNegativeNode(retType, msk, simdBaseType, simdSize); + retNode = gtNewSimdCndSelNode(retType, msk, ovf, tmpDup2, simdBaseType, simdSize); + } +#elif defined(TARGET_ARM64) + intrinsic = NI_AdvSimd_SubtractSaturate; + + if ((simdSize == 8) && varTypeIsLong(simdBaseType)) + { + intrinsic = NI_AdvSimd_SubtractSaturateScalar; + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); +#elif defined(TARGET_WASM) + // TODO-WASM-SIMD: Implement NI_Vector_SubtractSaturate + return nullptr; +#else + unreached(); +#endif + } + break; + } + + case NI_Vector_Sum: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + retNode = gtNewSimdSumNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_ToScalar: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + retNode = gtNewSimdToScalarNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_Truncate: + { + assert(sig->numArgs == 1); + + if (!varTypeIsFloating(simdBaseType)) + { + retNode = impSIMDPopStack(); + break; + } + + op1 = impSIMDPopStack(); + retNode = gtNewSimdTruncNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_UnzipEven: + case NI_Vector_UnzipOdd: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if (simdSize == 16) + { + bool supportsX86BaseShuffle = (genTypeSize(simdBaseType) == 4); + + if (!supportsX86BaseShuffle && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + bool odd = (intrinsic == NI_Vector_UnzipOdd); + retNode = gtNewSimdUnzipNode(retType, op1, op2, simdBaseType, simdSize, odd); + break; + } + + case NI_Vector_WidenLower: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + retNode = gtNewSimdWidenLowerNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_WidenUpper: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + retNode = gtNewSimdWidenUpperNode(retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_WithElement: + { + assert(sig->numArgs == 3); + +#if defined(TARGET_X86) + if (varTypeIsLong(simdBaseType)) + { + return nullptr; + } +#elif defined(TARGET_ARM64) + bool isIndexConst = true; + + if (!impStackTop(1).val->OperIsConst()) + { + if (!opts.OptimizationEnabled()) + { + // Only enable late stage rewriting if optimizations are enabled + // as we won't otherwise encounter a constant at the later point + return nullptr; + } + isIndexConst = false; + } +#endif + + op3 = impPopStack().val; + op2 = impPopStack().val; + op1 = impSIMDPopStack(); + +#if defined(TARGET_ARM64) + if (!isIndexConst) + { + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseType, simdSize); + retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); + break; + } +#endif + + retNode = gtNewSimdWithElementNode(retType, op1, op2, op3, simdBaseType, simdSize); + break; + } + +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) + case NI_Vector_WithLower: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if (simdSize == 16) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdWithLowerNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_WithUpper: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if (simdSize == 16) + { + break; + } +#endif + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdWithUpperNode(retType, op1, op2, simdBaseType, simdSize); + break; + } +#endif // !TARGET_XARCH && !TARGET_ARM64 + + case NI_Vector_ZipLower: + case NI_Vector_ZipUpper: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + bool upper = (intrinsic == NI_Vector_ZipUpper); + retNode = gtNewSimdZipNode(retType, op1, op2, simdBaseType, simdSize, upper); + break; + } + + case NI_Vector_get_AllBitsSet: + { + assert(sig->numArgs == 0); + retNode = gtNewAllBitsSetConNode(retType); + break; + } + + case NI_Vector_get_E: + { + assert(sig->numArgs == 0); + + if (varTypeIsFloating(simdBaseType)) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(simdBaseType, 2.718281828459045); + retNode = vecCns; + } + break; + } + + case NI_Vector_get_Epsilon: + { + assert(sig->numArgs == 0); + + if (simdBaseType == TYP_FLOAT) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0x00000001)); + retNode = vecCns; + } + else if (simdBaseType == TYP_DOUBLE) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0x0000000000000001)); + retNode = vecCns; + } + break; + } + + case NI_Vector_get_Indices: + { + assert(sig->numArgs == 0); + retNode = gtNewSimdGetIndicesNode(retType, simdBaseType, simdSize); + break; + } + + case NI_Vector_get_NaN: + { + assert(sig->numArgs == 0); + + if (simdBaseType == TYP_FLOAT) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0xFFC00000)); + retNode = vecCns; + } + else if (simdBaseType == TYP_DOUBLE) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0xFFF8000000000000)); + retNode = vecCns; + } + break; + } + + case NI_Vector_get_NegativeInfinity: + { + assert(sig->numArgs == 0); + + if (simdBaseType == TYP_FLOAT) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0xFF800000)); + retNode = vecCns; + } + else if (simdBaseType == TYP_DOUBLE) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0xFFF0000000000000)); + retNode = vecCns; + } + break; + } + + case NI_Vector_get_NegativeOne: + { + assert(sig->numArgs == 0); + + if (varTypeIsFloating(simdBaseType)) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(simdBaseType, -1.0); + retNode = vecCns; + } + else if (varTypeIsSigned(simdBaseType)) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(simdBaseType, static_cast(-1)); + retNode = vecCns; + } + break; + } + + case NI_Vector_get_NegativeZero: + { + assert(sig->numArgs == 0); + + if (varTypeIsFloating(simdBaseType)) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(simdBaseType, -0.0); + retNode = vecCns; + } + break; + } + + case NI_Vector_get_One: + { + assert(sig->numArgs == 0); + retNode = gtNewOneConNode(retType, simdBaseType); + break; + } + + case NI_Vector_get_Pi: + { + assert(sig->numArgs == 0); + + if (varTypeIsFloating(simdBaseType)) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(simdBaseType, 3.141592653589793); + retNode = vecCns; + } + break; + } + + case NI_Vector_get_PositiveInfinity: + { + assert(sig->numArgs == 0); + + if (simdBaseType == TYP_FLOAT) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0x7F800000)); + retNode = vecCns; + } + else if (simdBaseType == TYP_DOUBLE) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0x7FF0000000000000)); + retNode = vecCns; + } + break; + } + + case NI_Vector_get_SignSequence: + { + assert(sig->numArgs == 0); + + var_types scalarType = genActualType(simdBaseType); + GenTree* one = gtNewOneConNode(scalarType); + GenTree* negativeOne = varTypeIsFloating(simdBaseType) ? gtNewDconNode(-1.0, simdBaseType) + : gtNewAllBitsSetConNode(scalarType); + + retNode = gtNewSimdCreateAlternatingSequenceNode(retType, one, negativeOne, simdBaseType, simdSize); + break; + } + + case NI_Vector_get_Tau: + { + assert(sig->numArgs == 0); + + if (varTypeIsFloating(simdBaseType)) + { + GenTreeVecCon* vecCns = gtNewVconNode(retType); + vecCns->EvaluateBroadcastInPlace(simdBaseType, 6.283185307179586); + retNode = vecCns; + } + break; + } + + case NI_Vector_get_Zero: + { + assert(sig->numArgs == 0); + retNode = gtNewZeroConNode(retType); + break; + } + + case NI_Vector_op_Addition: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_BitwiseAnd: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_BitwiseOr: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_Division: + { + assert(sig->numArgs == 2); + + if (!varTypeIsFloating(simdBaseType)) + { +#if defined(TARGET_XARCH) + // Check to see if it is possible to emulate the integer division + if (varTypeIsLong(simdBaseType)) + { + break; + } + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op1 side effects for vector integer division")); +#else + // We can't trivially handle division for integral types using SIMD + break; +#endif + } + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); + + retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_Equality: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_ExclusiveOr: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_Inequality: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdCmpOpAnyNode(GT_NE, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_LeftShift: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impPopStack().val; + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_Multiply: + { + assert(sig->numArgs == 2); + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); + + retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_OnesComplement: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_RightShift: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH; + + op2 = impPopStack().val; + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_Subtraction: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_UnaryNegation: + { + assert(sig->numArgs == 1); + op1 = impSIMDPopStack(); + retNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseType, simdSize); + break; + } + + case NI_Vector_op_UnaryPlus: + { + assert(sig->numArgs == 1); + retNode = impSIMDPopStack(); + break; + } + + case NI_Vector_op_UnsignedRightShift: + { + assert(sig->numArgs == 2); + +#if defined(TARGET_XARCH) + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } +#endif + + op2 = impPopStack().val; + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseType, simdSize); + break; + } + + default: + { + unreached(); + break; + } + } + + if (isMinMaxIntrinsic) + { + assert(sig->numArgs == 2); + assert(retNode == nullptr); + + if (isNative && BlockNonDeterministicIntrinsics(mustExpand)) + { + return nullptr; + } + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + if (isNative) + { + assert(!isMagnitude && !isNumber); + retNode = gtNewSimdMinMaxNativeNode(retType, op1, op2, simdBaseType, simdSize, isMax); + } + else + { + retNode = gtNewSimdMinMaxNode(retType, op1, op2, simdBaseType, simdSize, isMax, isMagnitude, isNumber); + } + } + else if (isConcatIntrinsic) + { + assert(sig->numArgs == 2); + assert(retNode == nullptr); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdConcatNode(retType, op1, op2, simdBaseType, simdSize, leftUpper, rightUpper); + } +#if defined(TARGET_XARCH) + else if (retType == TYP_MASK) + { + retType = getSIMDTypeForSize(simdSize); + assert(retType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + retNode = gtNewSimdCvtMaskToVectorNode(retType, gtFoldExpr(retNode), simdBaseType, simdSize); + } +#endif + + return retNode; +} #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 27c17c2befd5ab..cb89bdc1dd10d6 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -979,92 +979,27 @@ struct HWIntrinsicInfo static bool IsVectorCreate(NamedIntrinsic id) { - switch (id) - { -#if defined(TARGET_ARM64) - case NI_Vector64_Create: -#endif // TARGET_ARM64 - case NI_Vector128_Create: -#if defined(TARGET_XARCH) - case NI_Vector256_Create: - case NI_Vector512_Create: -#endif // TARGET_XARCH - return true; - default: - return false; - } + return id == NI_Vector_Create; } static bool IsVectorCreateScalar(NamedIntrinsic id) { - switch (id) - { -#if defined(TARGET_ARM64) - case NI_Vector64_CreateScalar: -#endif // TARGET_ARM64 - case NI_Vector128_CreateScalar: -#if defined(TARGET_XARCH) - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: -#endif // TARGET_XARCH - return true; - default: - return false; - } + return id == NI_Vector_CreateScalar; } static bool IsVectorCreateScalarUnsafe(NamedIntrinsic id) { - switch (id) - { -#if defined(TARGET_ARM64) - case NI_Vector64_CreateScalarUnsafe: -#endif // TARGET_ARM64 - case NI_Vector128_CreateScalarUnsafe: -#if defined(TARGET_XARCH) - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: -#endif // TARGET_XARCH - return true; - default: - return false; - } + return id == NI_Vector_CreateScalarUnsafe; } static bool IsVectorGetElement(NamedIntrinsic id) { - switch (id) - { -#if defined(TARGET_ARM64) - case NI_Vector64_GetElement: -#endif // TARGET_ARM64 - case NI_Vector128_GetElement: -#if defined(TARGET_XARCH) - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: -#endif // TARGET_XARCH - return true; - default: - return false; - } + return id == NI_Vector_GetElement; } static bool IsVectorToScalar(NamedIntrinsic id) { - switch (id) - { -#if defined(TARGET_ARM64) - case NI_Vector64_ToScalar: -#endif // TARGET_ARM64 - case NI_Vector128_ToScalar: -#if defined(TARGET_XARCH) - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: -#endif // TARGET_XARCH - return true; - default: - return false; - } + return id == NI_Vector_ToScalar; } static bool HasImmediateOperand(NamedIntrinsic id) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index b215633ac49b71..afdf296aa1a3a1 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -691,6 +691,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, unsigned simdSize, bool mustExpand) { + CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); + + if (isa == InstructionSet_Vector) + { + return impXplatIntrinsic(intrinsic, clsHnd, method, sig R2RARG(entryPoint), simdBaseType, retType, simdSize, + mustExpand); + } + const HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic); const int numArgs = sig->numArgs; @@ -720,1614 +728,56 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, bool isValidScalarIntrinsic = false; #endif - bool isMinMaxIntrinsic = false; - bool isMax = false; - bool isMagnitude = false; - bool isNative = false; - bool isNumber = false; - switch (intrinsic) { - case NI_Vector64_Abs: - case NI_Vector128_Abs: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdAbsNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_Addition: - case NI_Vector128_op_Addition: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_AddSaturate: - case NI_Vector128_AddSaturate: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (varTypeIsFloating(simdBaseType)) - { - retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); - } - else - { - intrinsic = NI_AdvSimd_AddSaturate; - - if ((simdSize == 8) && varTypeIsLong(simdBaseType)) - { - intrinsic = NI_AdvSimd_AddSaturateScalar; - } - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - } - break; - } - case NI_AdvSimd_BitwiseClear: case NI_Sve_BitwiseClear: - case NI_Vector64_AndNot: - case NI_Vector128_AndNot: - { - assert(sig->numArgs == 2); - - // We don't want to support creating AND_NOT nodes prior to LIR - // as it can break important optimizations. We'll produces this - // in lowering instead so decompose into the individual operations - // on import - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - op2 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseType, simdSize)); - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_AdvSimd_OrNot: - { - assert(sig->numArgs == 2); - - // We don't want to support creating OR_NOT nodes prior to LIR - // as it can break important optimizations. We'll produces this - // in lowering instead so decompose into the individual operations - // on import - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - op2 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseType, simdSize)); - retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_As: - case NI_Vector64_AsByte: - case NI_Vector64_AsDouble: - case NI_Vector64_AsInt16: - case NI_Vector64_AsInt32: - case NI_Vector64_AsInt64: - case NI_Vector64_AsNInt: - case NI_Vector64_AsNUInt: - case NI_Vector64_AsSByte: - case NI_Vector64_AsSingle: - case NI_Vector64_AsUInt16: - case NI_Vector64_AsUInt32: - case NI_Vector64_AsUInt64: - case NI_Vector128_As: - case NI_Vector128_AsByte: - case NI_Vector128_AsDouble: - case NI_Vector128_AsInt16: - case NI_Vector128_AsInt32: - case NI_Vector128_AsInt64: - case NI_Vector128_AsNInt: - case NI_Vector128_AsNUInt: - case NI_Vector128_AsSByte: - case NI_Vector128_AsSingle: - case NI_Vector128_AsUInt16: - case NI_Vector128_AsUInt32: - case NI_Vector128_AsUInt64: - case NI_Vector128_AsVector: - case NI_Vector128_AsVector4: - { - assert(!sig->hasThis()); - assert(numArgs == 1); - - // We fold away the cast here, as it only exists to satisfy - // the type system. It is safe to do this here since the retNode type - // and the signature return type are both the same TYP_SIMD. - - retNode = impSIMDPopStack(); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); - break; - } - - case NI_Vector128_AsVector2: - { - assert(sig->numArgs == 1); - assert((simdSize == 16) && (simdBaseType == TYP_FLOAT)); - assert(retType == TYP_SIMD8); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdGetLowerNode(TYP_SIMD8, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_AsVector3: - { - assert(sig->numArgs == 1); - assert((simdSize == 16) && (simdBaseType == TYP_FLOAT)); - assert(retType == TYP_SIMD12); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); - break; - } - - case NI_Vector128_AsVector128: - { - assert(!sig->hasThis()); - assert(numArgs == 1); - assert(retType == TYP_SIMD16); - - switch (getSIMDTypeForSize(simdSize)) - { - case TYP_SIMD8: - { - assert((simdSize == 8) && (simdBaseType == TYP_FLOAT)); - - op1 = impSIMDPopStack(); - - if (op1->IsCnsVec()) - { - GenTreeVecCon* vecCon = op1->AsVecCon(); - vecCon->gtType = TYP_SIMD16; - - vecCon->gtSimdVal.f32[2] = 0.0f; - vecCon->gtSimdVal.f32[3] = 0.0f; - - return vecCon; - } - - op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector64_ToVector128Unsafe, simdBaseType, 8); - - GenTree* idx = gtNewIconNode(2, TYP_INT); - GenTree* zero = gtNewZeroConNode(TYP_FLOAT); - op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseType, 16); - - idx = gtNewIconNode(3, TYP_INT); - zero = gtNewZeroConNode(TYP_FLOAT); - retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseType, 16); - - break; - } - - case TYP_SIMD12: - { - assert((simdSize == 12) && (simdBaseType == TYP_FLOAT)); - - op1 = impSIMDPopStack(); - - if (op1->IsCnsVec()) - { - GenTreeVecCon* vecCon = op1->AsVecCon(); - vecCon->gtType = TYP_SIMD16; - - vecCon->gtSimdVal.f32[3] = 0.0f; - return vecCon; - } - - op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseType, 12); - - GenTree* idx = gtNewIconNode(3, TYP_INT); - GenTree* zero = gtNewZeroConNode(TYP_FLOAT); - retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseType, 16); - break; - } - - case TYP_SIMD16: - { - // We fold away the cast here, as it only exists to satisfy - // the type system. It is safe to do this here since the retNode type - // and the signature return type are both the same TYP_SIMD. - - retNode = impSIMDPopStack(); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); - break; - } - - default: - { - unreached(); - } - } - - break; - } - - case NI_Vector128_AsVector128Unsafe: - { - assert(sig->numArgs == 1); - assert(retType == TYP_SIMD16); - assert(simdBaseType == TYP_FLOAT); - assert((simdSize == 8) || (simdSize == 12)); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_BitwiseAnd: - case NI_Vector128_op_BitwiseAnd: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_BitwiseOr: - case NI_Vector128_op_BitwiseOr: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_Ceiling: - case NI_Vector128_Ceiling: - { - assert(sig->numArgs == 1); - - if (!varTypeIsFloating(simdBaseType)) - { - retNode = impSIMDPopStack(); - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdCeilNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ConditionalSelect: - case NI_Vector128_ConditionalSelect: - { - assert(sig->numArgs == 3); - - op3 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ConvertToDouble: - case NI_Vector128_ConvertToDouble: - { - assert(sig->numArgs == 1); - assert((simdBaseType == TYP_LONG) || (simdBaseType == TYP_ULONG)); - - intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToDoubleScalar : NI_AdvSimd_Arm64_ConvertToDouble; - - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ConvertToInt32Native: - case NI_Vector128_ConvertToInt32Native: - { - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - FALLTHROUGH; - } - - case NI_Vector64_ConvertToInt32: - case NI_Vector128_ConvertToInt32: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_INT, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ConvertToInt64Native: - case NI_Vector128_ConvertToInt64Native: - { - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - FALLTHROUGH; - } - - case NI_Vector64_ConvertToInt64: - case NI_Vector128_ConvertToInt64: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_LONG, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ConvertToSingle: - case NI_Vector128_ConvertToSingle: - { - assert(sig->numArgs == 1); - assert((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToSingle, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ConvertToUInt32Native: - case NI_Vector128_ConvertToUInt32Native: - { - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - FALLTHROUGH; - } - - case NI_Vector64_ConvertToUInt32: - case NI_Vector128_ConvertToUInt32: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_UINT, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ConvertToUInt64Native: - case NI_Vector128_ConvertToUInt64Native: - { - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - FALLTHROUGH; - } - - case NI_Vector64_ConvertToUInt64: - case NI_Vector128_ConvertToUInt64: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_ULONG, simdBaseType, simdSize); - break; - } - - case NI_Vector64_Create: - case NI_Vector128_Create: - { - retNode = impSimdCreate(intrinsic, sig, simdBaseType, retType, simdSize); - break; - } - - case NI_Vector64_CreateScalar: - case NI_Vector128_CreateScalar: - { - assert(sig->numArgs == 1); - - op1 = impPopStack().val; - retNode = gtNewSimdCreateScalarNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_CreateSequence: - case NI_Vector128_CreateSequence: { assert(sig->numArgs == 2); - if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->OperIsConst()) - { - // TODO-ARM64-CQ: We should support long/ulong multiplication. - break; - } - - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op1 side effects for vector CreateSequence")); - - op2 = impPopStack().val; - op1 = impPopStack().val; - - retNode = gtNewSimdCreateSequenceNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_CreateGeometricSequence: - case NI_Vector128_CreateGeometricSequence: - { - assert(sig->numArgs == 2); - - bool multiplierIsConst = impStackTop(0).val->OperIsConst(); - bool initialIsConst = impStackTop(1).val->OperIsConst(); - bool canGenerate = multiplierIsConst && (!varTypeIsLong(simdBaseType) || initialIsConst || (simdSize == 8)); - - if (!canGenerate) - { - if (opts.OptimizationEnabled()) - { - op2 = impPopStack().val; - op1 = impPopStack().val; - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - } - break; - } - - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op1 side effects for vector CreateGeometricSequence")); - - op2 = impPopStack().val; - op1 = impPopStack().val; - - retNode = gtNewSimdCreateGeometricSequenceNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_CreateAlternatingSequence: - case NI_Vector128_CreateAlternatingSequence: - { - assert(sig->numArgs == 2); - - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op1 side effects for vector CreateAlternatingSequence")); - - op2 = impPopStack().val; - op1 = impPopStack().val; - - retNode = gtNewSimdCreateAlternatingSequenceNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_CreateScalarUnsafe: - case NI_Vector128_CreateScalarUnsafe: - { - assert(sig->numArgs == 1); - - op1 = impPopStack().val; - retNode = gtNewSimdCreateScalarUnsafeNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_Division: - case NI_Vector128_op_Division: - { - assert(sig->numArgs == 2); - - if (!varTypeIsFloating(simdBaseType)) - { - // We can't trivially handle division for integral types using SIMD - break; - } - - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass); - - retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_Dot: - case NI_Vector128_Dot: - { - assert(sig->numArgs == 2); - - if (!varTypeIsLong(simdBaseType)) - { - var_types simdType = getSIMDTypeForSize(simdSize); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdDotProdNode(simdType, op1, op2, simdBaseType, simdSize); - retNode = gtNewSimdToScalarNode(retType, retNode, simdBaseType, simdSize); - } - break; - } - - case NI_Vector64_Equals: - case NI_Vector128_Equals: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_Equality: - case NI_Vector128_op_Equality: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_EqualsAny: - case NI_Vector128_EqualsAny: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ExtractMostSignificantBits: - case NI_Vector128_ExtractMostSignificantBits: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); - break; - } - - case NI_Vector64_Floor: - case NI_Vector128_Floor: - { - assert(sig->numArgs == 1); - - if (!varTypeIsFloating(simdBaseType)) - { - retNode = impSIMDPopStack(); - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdFloorNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_FusedMultiplyAdd: - case NI_Vector128_FusedMultiplyAdd: - { - assert(sig->numArgs == 3); - assert(varTypeIsFloating(simdBaseType)); - - impSpillSideEffect(true, - stackState.esStackDepth - 3 DEBUGARG("Spilling op1 side effects for FusedMultiplyAdd")); - - impSpillSideEffect(true, - stackState.esStackDepth - 2 DEBUGARG("Spilling op2 side effects for FusedMultiplyAdd")); - - op3 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseType, simdSize); - break; - } - - case NI_Vector64_get_AllBitsSet: - case NI_Vector128_get_AllBitsSet: - { - assert(sig->numArgs == 0); - retNode = gtNewAllBitsSetConNode(retType); - break; - } - - case NI_Vector64_get_E: - case NI_Vector128_get_E: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, 2.718281828459045); - retNode = vecCns; - } - break; - } - - case NI_Vector64_get_Epsilon: - case NI_Vector128_get_Epsilon: - { - assert(sig->numArgs == 0); - - if (simdBaseType == TYP_FLOAT) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0x00000001)); - retNode = vecCns; - } - else if (simdBaseType == TYP_DOUBLE) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0x0000000000000001)); - retNode = vecCns; - } - break; - } - - case NI_Vector64_get_Indices: - case NI_Vector128_get_Indices: - { - assert(sig->numArgs == 0); - retNode = gtNewSimdGetIndicesNode(retType, simdBaseType, simdSize); - break; - } - - case NI_Vector64_get_SignSequence: - case NI_Vector128_get_SignSequence: - { - assert(sig->numArgs == 0); - - var_types scalarType = genActualType(simdBaseType); - GenTree* one = gtNewOneConNode(scalarType); - GenTree* negativeOne = varTypeIsFloating(simdBaseType) ? gtNewDconNode(-1.0, simdBaseType) - : gtNewAllBitsSetConNode(scalarType); - - retNode = gtNewSimdCreateAlternatingSequenceNode(retType, one, negativeOne, simdBaseType, simdSize); - break; - } - - case NI_Vector64_get_NaN: - case NI_Vector128_get_NaN: - { - assert(sig->numArgs == 0); - - if (simdBaseType == TYP_FLOAT) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0x7FC00000)); - retNode = vecCns; - } - else if (simdBaseType == TYP_DOUBLE) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0x7FF8000000000000)); - retNode = vecCns; - } - break; - } - - case NI_Vector64_get_NegativeInfinity: - case NI_Vector128_get_NegativeInfinity: - { - assert(sig->numArgs == 0); - - if (simdBaseType == TYP_FLOAT) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0xFF800000)); - retNode = vecCns; - } - else if (simdBaseType == TYP_DOUBLE) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0xFFF0000000000000)); - retNode = vecCns; - } - break; - } - - case NI_Vector64_get_NegativeOne: - case NI_Vector128_get_NegativeOne: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, -1.0); - retNode = vecCns; - } - else if (varTypeIsSigned(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, static_cast(-1)); - retNode = vecCns; - } - break; - } - - case NI_Vector64_get_NegativeZero: - case NI_Vector128_get_NegativeZero: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, -0.0); - retNode = vecCns; - } - break; - } - - case NI_Vector64_get_One: - case NI_Vector128_get_One: - { - assert(sig->numArgs == 0); - retNode = gtNewOneConNode(retType, simdBaseType); - break; - } - - case NI_Vector64_get_Pi: - case NI_Vector128_get_Pi: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, 3.141592653589793); - retNode = vecCns; - } - break; - } - - case NI_Vector64_get_PositiveInfinity: - case NI_Vector128_get_PositiveInfinity: - { - assert(sig->numArgs == 0); - - if (simdBaseType == TYP_FLOAT) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0x7F800000)); - retNode = vecCns; - } - else if (simdBaseType == TYP_DOUBLE) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0x7FF0000000000000)); - retNode = vecCns; - } - break; - } - - case NI_Vector64_get_Tau: - case NI_Vector128_get_Tau: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, 6.283185307179586); - retNode = vecCns; - } - break; - } - - case NI_Vector64_get_Zero: - case NI_Vector128_get_Zero: - { - assert(sig->numArgs == 0); - retNode = gtNewZeroConNode(retType); - break; - } - - case NI_Vector64_GetElement: - case NI_Vector128_GetElement: - { - assert(!sig->hasThis()); - assert(numArgs == 2); - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_GetLower: - { - assert(sig->numArgs == 1); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdGetLowerNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_GetUpper: - { - assert(sig->numArgs == 1); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdGetUpperNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_GreaterThan: - case NI_Vector128_GreaterThan: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_GreaterThanAll: - case NI_Vector128_GreaterThanAll: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_GT, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_GreaterThanAny: - case NI_Vector128_GreaterThanAny: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_GT, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_GreaterThanOrEqual: - case NI_Vector128_GreaterThanOrEqual: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_GreaterThanOrEqualAll: - case NI_Vector128_GreaterThanOrEqualAll: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_GE, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_GreaterThanOrEqualAny: - case NI_Vector128_GreaterThanOrEqualAny: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_GE, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsEvenInteger: - case NI_Vector128_IsEvenInteger: - { - assert(sig->numArgs == 1); - - if (varTypeIsFloating(simdBaseType)) - { - // The code for handling floating-point is decently complex but also expected - // to be rare, so we fallback to the managed implementation, which is accelerated - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsEvenIntegerNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsFinite: - case NI_Vector128_IsFinite: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsFiniteNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsInfinity: - case NI_Vector128_IsInfinity: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsInfinityNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsInteger: - case NI_Vector128_IsInteger: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsIntegerNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsNaN: - case NI_Vector128_IsNaN: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNaNNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsNegative: - case NI_Vector128_IsNegative: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNegativeNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsNegativeInfinity: - case NI_Vector128_IsNegativeInfinity: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNegativeInfinityNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsNormal: - case NI_Vector128_IsNormal: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNormalNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsOddInteger: - case NI_Vector128_IsOddInteger: - { - assert(sig->numArgs == 1); - - if (varTypeIsFloating(simdBaseType)) - { - // The code for handling floating-point is decently complex but also expected - // to be rare, so we fallback to the managed implementation, which is accelerated - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsOddIntegerNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsPositive: - case NI_Vector128_IsPositive: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsPositiveNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsPositiveInfinity: - case NI_Vector128_IsPositiveInfinity: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsPositiveInfinityNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsSubnormal: - case NI_Vector128_IsSubnormal: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsSubnormalNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_IsZero: - case NI_Vector128_IsZero: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsZeroNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_LessThan: - case NI_Vector128_LessThan: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_LessThanAll: - case NI_Vector128_LessThanAll: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_LT, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_LessThanAny: - case NI_Vector128_LessThanAny: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_LT, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_LessThanOrEqual: - case NI_Vector128_LessThanOrEqual: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_LessThanOrEqualAll: - case NI_Vector128_LessThanOrEqualAll: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_LE, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_LessThanOrEqualAny: - case NI_Vector128_LessThanOrEqualAny: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_LE, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_AdvSimd_LoadVector64: - case NI_AdvSimd_LoadVector128: - case NI_Vector64_LoadUnsafe: - case NI_Vector128_LoadUnsafe: - { - if (sig->numArgs == 2) - { - op2 = impPopStack().val; - } - else - { - assert(sig->numArgs == 1); - } - - op1 = impPopStack().val; - - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - if (sig->numArgs == 2) - { - op3 = gtNewIconNode(genTypeSize(simdBaseType), op2->TypeGet()); - op2 = gtNewOperNode(GT_MUL, op2->TypeGet(), op2, op3); - op1 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, op2); - } - - retNode = gtNewSimdLoadNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_LoadAligned: - case NI_Vector128_LoadAligned: - { - assert(sig->numArgs == 1); - - if (opts.OptimizationDisabled()) - { - // ARM64 doesn't have aligned loads, but aligned loads are only validated to be - // aligned when optimizations are disable, so only skip the intrinsic handling - // if optimizations are enabled - break; - } - - op1 = impPopStack().val; - - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - retNode = gtNewSimdLoadAlignedNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_LoadAlignedNonTemporal: - case NI_Vector128_LoadAlignedNonTemporal: - { - assert(sig->numArgs == 1); - - if (opts.OptimizationDisabled()) - { - // ARM64 doesn't have aligned loads, but aligned loads are only validated to be - // aligned when optimizations are disable, so only skip the intrinsic handling - // if optimizations are enabled - break; - } - - op1 = impPopStack().val; - - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - retNode = gtNewSimdLoadNonTemporalNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_Max: - case NI_Vector128_Max: - { - isMinMaxIntrinsic = true; - isMax = true; - break; - } - - case NI_Vector64_MaxMagnitude: - case NI_Vector128_MaxMagnitude: - { - isMinMaxIntrinsic = true; - isMax = true; - isMagnitude = true; - break; - } - - case NI_Vector64_MaxMagnitudeNumber: - case NI_Vector128_MaxMagnitudeNumber: - { - isMinMaxIntrinsic = true; - isMax = true; - isMagnitude = true; - isNumber = true; - break; - } - - case NI_Vector64_MaxNative: - case NI_Vector128_MaxNative: - { - isMinMaxIntrinsic = true; - isMax = true; - isNative = true; - break; - } - - case NI_Vector64_MaxNumber: - case NI_Vector128_MaxNumber: - { - isMinMaxIntrinsic = true; - isMax = true; - isNumber = true; - break; - } - - case NI_Vector64_Min: - case NI_Vector128_Min: - { - isMinMaxIntrinsic = true; - break; - } - - case NI_Vector64_MinMagnitude: - case NI_Vector128_MinMagnitude: - { - isMinMaxIntrinsic = true; - isMagnitude = true; - break; - } - - case NI_Vector64_MinMagnitudeNumber: - case NI_Vector128_MinMagnitudeNumber: - { - isMinMaxIntrinsic = true; - isMagnitude = true; - isNumber = true; - break; - } - - case NI_Vector64_MinNative: - case NI_Vector128_MinNative: - { - isMinMaxIntrinsic = true; - isNative = true; - break; - } - - case NI_Vector64_MinNumber: - case NI_Vector128_MinNumber: - { - isMinMaxIntrinsic = true; - isNumber = true; - break; - } - - case NI_Vector64_op_Multiply: - case NI_Vector128_op_Multiply: - { - assert(sig->numArgs == 2); - - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass); - - retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_MultiplyAddEstimate: - case NI_Vector128_MultiplyAddEstimate: - { - assert(sig->numArgs == 3); - - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - - if (varTypeIsFloating(simdBaseType)) - { - impSpillSideEffect(true, stackState.esStackDepth - - 3 DEBUGARG("Spilling op1 side effects for MultiplyAddEstimate")); - - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op2 side effects for MultiplyAddEstimate")); - } - - op3 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (varTypeIsFloating(simdBaseType)) - { - retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseType, simdSize); - } - else - { - GenTree* mulNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseType, simdSize); - retNode = gtNewSimdBinOpNode(GT_ADD, retType, mulNode, op3, simdBaseType, simdSize); - } - break; - } - - case NI_Vector64_Narrow: - case NI_Vector128_Narrow: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdNarrowNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_NarrowWithSaturation: - case NI_Vector128_NarrowWithSaturation: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (varTypeIsFloating(simdBaseType)) - { - retNode = gtNewSimdNarrowNode(retType, op1, op2, simdBaseType, simdSize); - } - else if (simdSize == 16) - { - intrinsic = NI_AdvSimd_ExtractNarrowingSaturateLower; - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, intrinsic, simdBaseType, 8); - - intrinsic = NI_AdvSimd_ExtractNarrowingSaturateUpper; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - } - else - { - intrinsic = NI_Vector64_ToVector128Unsafe; - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, simdBaseType, simdSize); - - op1 = gtNewSimdWithUpperNode(TYP_SIMD16, op1, op2, simdBaseType, 16); - - intrinsic = NI_AdvSimd_ExtractNarrowingSaturateLower; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); - } - break; - } - - case NI_Vector64_op_UnaryNegation: - case NI_Vector128_op_UnaryNegation: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_OnesComplement: - case NI_Vector128_op_OnesComplement: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_Inequality: - case NI_Vector128_op_Inequality: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_NE, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_UnaryPlus: - case NI_Vector128_op_UnaryPlus: - { - assert(sig->numArgs == 1); - retNode = impSIMDPopStack(); - break; - } - - case NI_Vector64_op_Subtraction: - case NI_Vector128_op_Subtraction: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_LeftShift: - case NI_Vector128_op_LeftShift: - { - assert(sig->numArgs == 2); - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_RightShift: - case NI_Vector128_op_RightShift: - { - assert(sig->numArgs == 2); - genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH; - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_UnsignedRightShift: - case NI_Vector128_op_UnsignedRightShift: - { - assert(sig->numArgs == 2); - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_Round: - case NI_Vector128_Round: - { - if (sig->numArgs != 1) - { - break; - } - - if (!varTypeIsFloating(simdBaseType)) - { - retNode = impSIMDPopStack(); - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdRoundNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ShiftLeft: - case NI_Vector128_ShiftLeft: - { - assert(sig->numArgs == 2); - - if (!varTypeIsSIMD(impStackTop(0).val)) - { - // We just want the inlining profitability boost for the helper intrinsics/ - // that have operator alternatives like `simd << int` - break; - } + // We don't want to support creating AND_NOT nodes prior to LIR + // as it can break important optimizations. We'll produces this + // in lowering instead so decompose into the individual operations + // on import op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - if (simdSize == 8) - { - intrinsic = varTypeIsLong(simdBaseType) ? NI_AdvSimd_ShiftLogicalScalar : NI_AdvSimd_ShiftLogical; - } - else - { - assert(simdSize == 16); - intrinsic = NI_AdvSimd_ShiftLogical; - } - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + op2 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseType, simdSize)); + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseType, simdSize); break; } - case NI_Vector64_Shuffle: - case NI_Vector128_Shuffle: - case NI_Vector64_ShuffleNative: - case NI_Vector128_ShuffleNative: - case NI_Vector64_ShuffleNativeFallback: - case NI_Vector128_ShuffleNativeFallback: + case NI_AdvSimd_OrNot: { - assert((sig->numArgs == 2) || (sig->numArgs == 3)); - assert((simdSize == 8) || (simdSize == 16)); - - // The Native variants are non-deterministic on arm64 (for element size > 1) - bool isShuffleNative = (intrinsic != NI_Vector64_Shuffle) && (intrinsic != NI_Vector128_Shuffle); - if (isShuffleNative && (genTypeSize(simdBaseType) > 1) && BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - - GenTree* indices = impStackTop(0).val; - - // Check if the required intrinsics to emit are available. - bool canBecomeValidForShuffle = false; - if (!IsValidForShuffle(indices, simdSize, simdBaseType, &canBecomeValidForShuffle, isShuffleNative)) - { - // All cases on arm64 are either valid or invalid, they cannot become valid later - assert(!canBecomeValidForShuffle); - break; - } - - // If the indices might become constant later, then we don't emit for now, delay until later. - if (!indices->IsCnsVec()) - { - assert(sig->numArgs == 2); - - if (opts.OptimizationEnabled()) - { - // Only enable late stage rewriting if optimizations are enabled - // as we won't otherwise encounter a constant at the later point - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); + assert(sig->numArgs == 2); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + // We don't want to support creating OR_NOT nodes prior to LIR + // as it can break important optimizations. We'll produces this + // in lowering instead so decompose into the individual operations + // on import - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - break; - } - } + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); - if (sig->numArgs == 2) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseType, simdSize, isShuffleNative); - } + op2 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseType, simdSize)); + retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseType, simdSize); break; } - case NI_Vector64_Sqrt: - case NI_Vector128_Sqrt: + case NI_AdvSimd_LoadVector64: + case NI_AdvSimd_LoadVector128: { assert(sig->numArgs == 1); + op1 = impPopStack().val; - if (varTypeIsFloating(simdBaseType)) + if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) { - op1 = impSIMDPopStack(); - retNode = gtNewSimdSqrtNode(retType, op1, simdBaseType, simdSize); + // If what we have is a BYREF, that's what we really want, so throw away the cast. + op1 = op1->gtGetOp1(); } + + retNode = gtNewSimdLoadNode(retType, op1, simdBaseType, simdSize); break; } @@ -2391,107 +841,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector64_StoreUnsafe: - case NI_Vector128_StoreUnsafe: - { - assert(retType == TYP_VOID); - - if (sig->numArgs == 3) - { - impSpillSideEffect(true, - stackState.esStackDepth - 3 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - - op3 = impPopStack().val; - } - else - { - assert(sig->numArgs == 2); - - impSpillSideEffect(true, - stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - } - - op2 = impPopStack().val; - - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } - - if (sig->numArgs == 3) - { - op4 = gtNewIconNode(genTypeSize(simdBaseType), op3->TypeGet()); - op3 = gtNewOperNode(GT_MUL, op3->TypeGet(), op3, op4); - op2 = gtNewOperNode(GT_ADD, op2->TypeGet(), op2, op3); - } - - op1 = impSIMDPopStack(); - - retNode = gtNewSimdStoreNode(op2, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_StoreAligned: - case NI_Vector128_StoreAligned: - { - assert(sig->numArgs == 2); - assert(retType == TYP_VOID); - - if (opts.OptimizationDisabled()) - { - // ARM64 doesn't have aligned stores, but aligned stores are only validated to be - // aligned when optimizations are disable, so only skip the intrinsic handling - // if optimizations are enabled - break; - } - - impSpillSideEffect(true, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - - op2 = impPopStack().val; - - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } - - op1 = impSIMDPopStack(); - - retNode = gtNewSimdStoreAlignedNode(op2, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_StoreAlignedNonTemporal: - case NI_Vector128_StoreAlignedNonTemporal: - { - assert(sig->numArgs == 2); - assert(retType == TYP_VOID); - - if (opts.OptimizationDisabled()) - { - // ARM64 doesn't have aligned stores, but aligned stores are only validated to be - // aligned when optimizations are disable, so only skip the intrinsic handling - // if optimizations are enabled - break; - } - - impSpillSideEffect(true, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - - op2 = impPopStack().val; - - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } - - op1 = impSIMDPopStack(); - - retNode = gtNewSimdStoreNonTemporalNode(op2, op1, simdBaseType, simdSize); - break; - } - case NI_AdvSimd_StoreVectorAndZip: case NI_AdvSimd_Arm64_StoreVectorAndZip: { @@ -2603,218 +952,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector64_SubtractSaturate: - case NI_Vector128_SubtractSaturate: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (varTypeIsFloating(simdBaseType)) - { - retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); - } - else - { - intrinsic = NI_AdvSimd_SubtractSaturate; - - if ((simdSize == 8) && varTypeIsLong(simdBaseType)) - { - intrinsic = NI_AdvSimd_SubtractSaturateScalar; - } - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - } - break; - } - - case NI_Vector64_Sum: - case NI_Vector128_Sum: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdSumNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_Truncate: - case NI_Vector128_Truncate: - { - assert(sig->numArgs == 1); - - if (!varTypeIsFloating(simdBaseType)) - { - retNode = impSIMDPopStack(); - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdTruncNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_WidenLower: - case NI_Vector128_WidenLower: - { - assert(sig->numArgs == 1); - - op1 = impSIMDPopStack(); - - retNode = gtNewSimdWidenLowerNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_WidenUpper: - case NI_Vector128_WidenUpper: - { - assert(sig->numArgs == 1); - - op1 = impSIMDPopStack(); - - retNode = gtNewSimdWidenUpperNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_WithElement: - case NI_Vector128_WithElement: - { - assert(numArgs == 3); - GenTree* indexOp = impStackTop(1).val; - - if (!indexOp->OperIsConst()) - { - if (!opts.OptimizationEnabled()) - { - // Only enable late stage rewriting if optimizations are enabled - // as we won't otherwise encounter a constant at the later point - return nullptr; - } - - op3 = impPopStack().val; - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseType, simdSize); - - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - break; - } - - ssize_t imm8 = indexOp->AsIntCon()->IconValue(); - ssize_t count = simdSize / genTypeSize(simdBaseType); - - if ((imm8 >= count) || (imm8 < 0)) - { - // Using software fallback if index is out of range (throw exception) - return nullptr; - } - - GenTree* valueOp = impPopStack().val; - impPopStack(); // pop the indexOp that we already have. - GenTree* vectorOp = impSIMDPopStack(); - - retNode = gtNewSimdWithElementNode(retType, vectorOp, indexOp, valueOp, simdBaseType, simdSize); - break; - } - - case NI_Vector128_WithLower: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - retNode = gtNewSimdWithLowerNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_WithUpper: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - retNode = gtNewSimdWithUpperNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector64_ConcatLowerLower: - case NI_Vector128_ConcatLowerLower: - case NI_Vector64_ConcatLowerUpper: - case NI_Vector128_ConcatLowerUpper: - case NI_Vector64_ConcatUpperLower: - case NI_Vector128_ConcatUpperLower: - case NI_Vector64_ConcatUpperUpper: - case NI_Vector128_ConcatUpperUpper: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - bool leftUpper = - (intrinsic == NI_Vector64_ConcatUpperLower) || (intrinsic == NI_Vector128_ConcatUpperLower) || - (intrinsic == NI_Vector64_ConcatUpperUpper) || (intrinsic == NI_Vector128_ConcatUpperUpper); - bool rightUpper = - (intrinsic == NI_Vector64_ConcatLowerUpper) || (intrinsic == NI_Vector128_ConcatLowerUpper) || - (intrinsic == NI_Vector64_ConcatUpperUpper) || (intrinsic == NI_Vector128_ConcatUpperUpper); - - retNode = gtNewSimdConcatNode(retType, op1, op2, simdBaseType, simdSize, leftUpper, rightUpper); - break; - } - - case NI_Vector64_ZipLower: - case NI_Vector128_ZipLower: - case NI_Vector64_ZipUpper: - case NI_Vector128_ZipUpper: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - bool upper = (intrinsic == NI_Vector64_ZipUpper) || (intrinsic == NI_Vector128_ZipUpper); - retNode = gtNewSimdZipNode(retType, op1, op2, simdBaseType, simdSize, upper); - break; - } - - case NI_Vector64_UnzipEven: - case NI_Vector128_UnzipEven: - case NI_Vector64_UnzipOdd: - case NI_Vector128_UnzipOdd: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - bool odd = (intrinsic == NI_Vector64_UnzipOdd) || (intrinsic == NI_Vector128_UnzipOdd); - retNode = gtNewSimdUnzipNode(retType, op1, op2, simdBaseType, simdSize, odd); - break; - } - - case NI_Vector64_Reverse: - case NI_Vector128_Reverse: - { - assert(sig->numArgs == 1); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdReverseNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector64_op_ExclusiveOr: - case NI_Vector128_op_ExclusiveOr: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseType, simdSize); - break; - } - case NI_AdvSimd_Load2xVector64AndUnzip: case NI_AdvSimd_Load3xVector64AndUnzip: case NI_AdvSimd_Load4xVector64AndUnzip: @@ -3491,32 +1628,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } } - if (isMinMaxIntrinsic) - { - assert(sig->numArgs == 2); - assert(retNode == nullptr); - - if (isNative && BlockNonDeterministicIntrinsics(mustExpand)) - { - return nullptr; - } - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (isNative) - { - assert(!isMagnitude && !isNumber); - retNode = gtNewSimdMinMaxNativeNode(retType, op1, op2, simdBaseType, simdSize, isMax); - } - else - { - retNode = gtNewSimdMinMaxNode(retType, op1, op2, simdBaseType, simdSize, isMax, isMagnitude, isNumber); - } - } - assert(!isScalar || isValidScalarIntrinsic); - return retNode; } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 5595530a079572..afbec43b14bafa 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -894,10 +894,27 @@ void CodeGen::checkRMWRegisters(const HWIntrinsic intrin, regNumber targetReg) // void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { - const HWIntrinsic intrin(node); + const HWIntrinsic intrin(node); + CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrin.id); // We need to validate that other phases of the compiler haven't introduced unsupported intrinsics - assert(m_compiler->compIsaSupportedDebugOnly(HWIntrinsicInfo::lookupIsa(intrin.id))); + + if (isa == InstructionSet_Vector) + { + if (node->GetSimdSize() == 16) + { + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_Vector128)); + } + else + { + assert(node->GetSimdSize() <= 8); + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_Vector64)); + } + } + else + { + assert(m_compiler->compIsaSupportedDebugOnly(isa)); + } regNumber targetReg = node->GetRegNum(); @@ -1643,8 +1660,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector64_CreateScalarUnsafe: - case NI_Vector128_CreateScalarUnsafe: + case NI_Vector_CreateScalarUnsafe: if (intrin.op1->isContainedFltOrDblImmed()) { // fmov reg, #imm8 @@ -1858,18 +1874,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector64_ToVector128: + case NI_Vector_ToVector128: GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false); break; - case NI_Vector64_ToVector128Unsafe: - case NI_Vector128_AsVector128Unsafe: - case NI_Vector128_GetLower: + case NI_Vector_ToVector128Unsafe: + case NI_Vector_AsVector128Unsafe: + case NI_Vector_GetLower: GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ true); break; - case NI_Vector64_GetElement: - case NI_Vector128_GetElement: + case NI_Vector_GetElement: { assert(intrin.numOperands == 2); assert(!intrin.op1->isContained()); @@ -1904,14 +1919,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector128_GetUpper: + case NI_Vector_GetUpper: { const int byteIndex = 8; GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op1Reg, byteIndex, INS_OPTS_16B); break; } - case NI_Vector128_AsVector3: + case NI_Vector_AsVector3: { // AsVector3 can be a no-op when it's already in the right register, otherwise // we just need to move the value over. Vector3 operations will themselves mask @@ -1922,8 +1937,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector64_ToScalar: - case NI_Vector128_ToScalar: + case NI_Vector_ToScalar: { if ((varTypeIsFloating(intrin.baseType) && (targetReg == op1Reg))) { diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 25dbdff2b9150b..a32b7dc56a0585 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -384,10 +384,33 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* embMaskNode = nullptr; GenTree* embMaskOp = nullptr; +#if DEBUG // We need to validate that other phases of the compiler haven't introduced unsupported intrinsics - assert(m_compiler->compIsaSupportedDebugOnly(isa)); + + if (isa == InstructionSet_Vector) + { + if (node->GetSimdSize() == 64) + { + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_Vector512)); + } + else if (node->GetSimdSize() == 32) + { + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_Vector256)); + } + else + { + assert(node->GetSimdSize() <= 16); + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_Vector128)); + } + } + else + { + assert(m_compiler->compIsaSupportedDebugOnly(isa)); + } + assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId)); assert(!HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) || !varTypeIsSmall(node->GetSimdBaseType())); +#endif bool isTableDriven = HWIntrinsicInfo::genIsTableDrivenHWIntrinsic(intrinsicId, category); insOpts instOptions = INS_OPTS_NONE; @@ -994,9 +1017,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (isa) { - case InstructionSet_Vector128: - case InstructionSet_Vector256: - case InstructionSet_Vector512: + case InstructionSet_Vector: { genBaseIntrinsic(node, instOptions); break; @@ -1936,12 +1957,8 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) switch (intrinsicId) { - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: { if (varTypeIsIntegral(baseType)) { @@ -2042,9 +2059,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - case NI_Vector128_WithElement: - case NI_Vector256_WithElement: - case NI_Vector512_WithElement: + case NI_Vector_WithElement: { // Optimize the case where op2 is not a constant. @@ -2096,9 +2111,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - case NI_Vector128_GetElement: - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: + case NI_Vector_GetElement: { assert(instOptions == INS_OPTS_NONE); @@ -2184,7 +2197,8 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) } else if (op2->OperIsConst()) { - assert(intrinsicId == NI_Vector128_GetElement); + assert(intrinsicId == NI_Vector_GetElement); + assert(simdType == TYP_SIMD16); assert(varTypeIsFloating(baseType)); assert(op1Reg != REG_NA); @@ -2255,12 +2269,10 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - case NI_Vector128_AsVector128Unsafe: - case NI_Vector128_AsVector2: - case NI_Vector128_AsVector3: - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: + case NI_Vector_AsVector128Unsafe: + case NI_Vector_AsVector2: + case NI_Vector_AsVector3: + case NI_Vector_ToScalar: { // genOperandDesc looks through a contained CreateScalar/CreateScalarUnsafe to the operand it // wraps, which may itself live in a register (e.g. Vector128.CreateScalarUnsafe(x).ToScalar()). @@ -2303,16 +2315,16 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - case NI_Vector128_ToVector256: - case NI_Vector128_ToVector512: - case NI_Vector256_ToVector512: + case NI_Vector_ToVector256: + case NI_Vector_ToVector512: { // ToVector256 has zero-extend semantics in order to ensure it is deterministic // We always emit a move to the target register, even when op1Reg == targetReg, // in order to ensure that Bits MAXVL-1:128 are zeroed. - if (intrinsicId == NI_Vector256_ToVector512) + if (simdType == TYP_SIMD32) { + assert(intrinsicId == NI_Vector_ToVector512); attr = emitTypeSize(TYP_SIMD32); } else @@ -2334,11 +2346,10 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - case NI_Vector128_ToVector256Unsafe: - case NI_Vector256_ToVector512Unsafe: - case NI_Vector256_GetLower: - case NI_Vector512_GetLower: - case NI_Vector512_GetLower128: + case NI_Vector_ToVector256Unsafe: + case NI_Vector_ToVector512Unsafe: + case NI_Vector_GetLower: + case NI_Vector_GetLower128: { if (op1->isContained() || op1->isUsedFromSpillTemp()) { @@ -2346,7 +2357,11 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) // // For ToVector256Unsafe the upper bits don't matter and for GetLower we // only actually need the lower 16-bytes, so we can just be "more efficient" - if ((intrinsicId == NI_Vector512_GetLower) || (intrinsicId == NI_Vector256_ToVector512Unsafe)) + if (intrinsicId == NI_Vector_GetLower) + { + attr = emitTypeSize(node->TypeGet()); + } + else if (intrinsicId == NI_Vector_ToVector512Unsafe) { attr = emitTypeSize(TYP_SIMD32); } @@ -2367,7 +2382,11 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) // so the upper bits aren't impactful either allowing the same. // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs - if ((intrinsicId == NI_Vector128_ToVector256Unsafe) || (intrinsicId == NI_Vector256_GetLower)) + if (intrinsicId == NI_Vector_GetLower) + { + attr = emitTypeSize(simdType); + } + else if (intrinsicId == NI_Vector_ToVector256Unsafe) { attr = emitTypeSize(TYP_SIMD32); } @@ -2380,8 +2399,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - case NI_Vector128_op_Division: - case NI_Vector256_op_Division: + case NI_Vector_op_Division: { // We can emulate SIMD integer division by converting the 32-bit integer -> 64-bit double, // perform a 64-bit double divide, then convert back to a 32-bit integer. This is generating diff --git a/src/coreclr/jit/hwintrinsiclist.h b/src/coreclr/jit/hwintrinsiclist.h new file mode 100644 index 00000000000000..a0350afde1d542 --- /dev/null +++ b/src/coreclr/jit/hwintrinsiclist.h @@ -0,0 +1,276 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*****************************************************************************/ +#ifndef HARDWARE_INTRINSIC +#error Define HARDWARE_INTRINSIC before including this file +#endif +/*****************************************************************************/ + +// clang-format off + +#ifdef FEATURE_HW_INTRINSICS +/* Note + 1) Each hardware intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic` + 2) All the overloads of an intrinsic in an ISA class share one Intrinsic ID + 3) The intrinsic that generates instructions with a fixed imm8 operand has a `ival` field with "not -1" value, e.g., Sse.CompareEqual(v1,v2) -> cmpps xmm0, xmm1, 0 + 4) SIMD intrinsics have a non-zero `SIMD size` field based-on that operate over `Vector128`(16) or `Vector256`(32) + 5) Scalar intrinsics that operate over general purpose registers (e.g., Sse41.Crc32) have `SIMD size` with 0 + 6) Each intrinsic has a `NumArg` for number of parameters, and some intrinsics that are overloaded on multiple parameter numbers have this field with -1 + 7) Each intrinsic has 11 `instructions` fields that list the instructions should be generated based-on the base type + 8) Each intrinsic has one category with type of `enum HWIntrinsicCategory`, please see the definition of HWIntrinsicCategory for details + 9) Each intrinsic has one or more flags with type of `enum HWIntrinsicFlag` +*/ + +#if !defined(TARGET_XARCH) +#define HW_Flag_AvxOnlyCompatible (0) +#define HW_Flag_NoContainment (0) +#endif + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// Intrinsics for Vector64/128/256/512 and Vector +#define FIRST_NI_Vector NI_Vector_Abs +HARDWARE_INTRINSIC(Vector, Abs, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, AddSaturate, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, AndNot, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, As, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsByte, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsDouble, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsInt16, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsInt32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsInt64, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsNInt, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsNUInt, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsSByte, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsSingle, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsUInt16, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsUInt32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsUInt64, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsVector, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsVector128, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) + +#if defined(TARGET_XARCH) +HARDWARE_INTRINSIC(Vector, AsVector128Unsafe, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(Vector, AsVector2, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsd_simd, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector, AsVector256, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, AsVector3, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +#elif defined(TARGET_ARM64) +HARDWARE_INTRINSIC(Vector, AsVector128Unsafe, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid, -1, 1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector, AsVector2, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, 1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVector3, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid, -1, 1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +#else +HARDWARE_INTRINSIC(Vector, AsVector128Unsafe, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, 1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVector2, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, 1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVector3, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, 1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +#endif + +HARDWARE_INTRINSIC(Vector, AsVector4, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) + +#if defined(TARGET_XARCH) +HARDWARE_INTRINSIC(Vector, AsVector512, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +#endif + +HARDWARE_INTRINSIC(Vector, Ceiling, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, ConcatLowerLower, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, ConcatLowerUpper, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, ConcatUpperLower, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, ConcatUpperUpper, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) + +#if defined(TARGET_XARCH) +HARDWARE_INTRINSIC(Vector, ConditionalSelect, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +#else +HARDWARE_INTRINSIC(Vector, ConditionalSelect, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +#endif + +HARDWARE_INTRINSIC(Vector, ConvertToDouble, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToInt32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, ConvertToInt32Native, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, ConvertToInt64, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToInt64Native, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToSingle, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, ConvertToUInt32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToUInt32Native, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToUInt64, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToUInt64Native, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, Create, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, CreateAlternatingSequence, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, CreateGeometricSequence, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) + +#if defined(TARGET_XARCH) +HARDWARE_INTRINSIC(Vector, CreateScalar, -1, 1, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, CreateScalarUnsafe, -1, 1, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_AvxOnlyCompatible) +#elif defined(TARGET_ARM64) +HARDWARE_INTRINSIC(Vector, CreateScalar, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector, CreateScalarUnsafe, -1, 1, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov, -1, -1, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +#else +HARDWARE_INTRINSIC(Vector, CreateScalar, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector, CreateScalarUnsafe, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +#endif + +HARDWARE_INTRINSIC(Vector, CreateSequence, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, Dot, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, Equals, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, EqualsAny, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ExtractMostSignificantBits, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector, Floor, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, FusedMultiplyAdd, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) + +#if defined(TARGET_XARCH) +HARDWARE_INTRINSIC(Vector, GetElement, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_extractps, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT|HW_Flag_AvxOnlyCompatible) +#elif defined(TARGET_ARM64) +HARDWARE_INTRINSIC(Vector, GetElement, -1, 2, INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT|HW_Flag_SupportsContainment) +#else +HARDWARE_INTRINSIC(Vector, GetElement, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT) +#endif + +#if defined(TARGET_XARCH) +HARDWARE_INTRINSIC(Vector, GetLower, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, GetLower128, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, GetUpper, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +#elif defined(TARGET_ARM64) +HARDWARE_INTRINSIC(Vector, GetLower, -1, 1, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, -1, -1, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector, GetUpper, -1, 1, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, -1, -1, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +#endif + +HARDWARE_INTRINSIC(Vector, GreaterThan, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, GreaterThanAll, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, GreaterThanAny, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, GreaterThanOrEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, GreaterThanOrEqualAll, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, GreaterThanOrEqualAny, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, IsEvenInteger, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsFinite, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsInfinity, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsInteger, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsNaN, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsNegative, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsNegativeInfinity, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsNormal, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsOddInteger, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsPositive, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsPositiveInfinity, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsSubnormal, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsZero, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, LessThan, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, LessThanAll, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, LessThanAny, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, LessThanOrEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, LessThanOrEqualAll, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, LessThanOrEqualAny, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, LoadAligned, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, LoadAlignedNonTemporal, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, LoadUnsafe, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, Max, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MaxMagnitude, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MaxMagnitudeNumber, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MaxNative, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MaxNumber, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Min, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MinMagnitude, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MinMagnitudeNumber, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MinNative, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MinNumber, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MultiplyAddEstimate, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Narrow, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) + +#if defined(TARGET_ARM64) +HARDWARE_INTRINSIC(Vector, NarrowWithSaturation, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +#else +HARDWARE_INTRINSIC(Vector, NarrowWithSaturation, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +#endif + +HARDWARE_INTRINSIC(Vector, Reverse, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Round, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, ShiftLeft, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Shuffle, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector, ShuffleNative, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector, ShuffleNativeFallback, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector, Sqrt, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, StoreAligned, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, SubtractSaturate, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Sum, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) + +#if defined(TARGET_XARCH) +HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT|HW_Flag_SpecialImport|HW_Flag_NoRMWSemantics|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, ToVector256, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector, ToVector256Unsafe, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector, ToVector512, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector, ToVector512Unsafe, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) +#elif defined(TARGET_ARM64) +HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(Vector, ToVector128, -1, 1, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ToVector128Unsafe, -1, 1, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +#else +HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT) +#endif + +HARDWARE_INTRINSIC(Vector, Truncate, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, UnzipEven, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, UnzipOdd, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, WidenLower, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, WidenUpper, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, WithElement, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) + +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +HARDWARE_INTRINSIC(Vector, WithLower, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, WithUpper, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +#endif + +HARDWARE_INTRINSIC(Vector, ZipLower, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, ZipUpper, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_AllBitsSet, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_E, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_Epsilon, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_Indices, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_NaN, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_NegativeInfinity, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_NegativeOne, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_NegativeZero, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_One, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_Pi, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_PositiveInfinity, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_SignSequence, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_Tau, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_BitwiseAnd, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, op_BitwiseOr, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) + +#if defined(TARGET_XARCH) +HARDWARE_INTRINSIC(Vector, op_Division, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialImport) +#else +HARDWARE_INTRINSIC(Vector, op_Division, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +#endif + +HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(Vector, op_ExclusiveOr, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(Vector, op_LeftShift, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_Multiply, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_OnesComplement, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, op_RightShift, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_Subtraction, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_UnaryNegation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_UnaryPlus, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector, op_UnsignedRightShift, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +#define LAST_NI_Vector NI_Vector_op_UnsignedRightShift + +#if defined(TARGET_XARCH) +#include "hwintrinsiclistxarch.h" +#elif defined(TARGET_ARM64) +#include "hwintrinsiclistarm64.h" +#elif defined(TARGET_WASM) +#include "hwintrinsiclistwasm.h" +#else +#error Unsupported platform +#endif +#endif // FEATURE_HW_INTRINSIC + +#undef HARDWARE_INTRINSIC + +// clang-format on diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index bfda592a20c866..c04f9bf9a8be30 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -11,905 +11,605 @@ #ifdef FEATURE_HW_INTRINSICS // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Vector64 Intrinsics -#define FIRST_NI_Vector64 NI_Vector64_Abs -HARDWARE_INTRINSIC(Vector64, Abs, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, AddSaturate, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, AndNot, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, As, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsByte, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsDouble, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsInt16, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsNInt, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsNUInt, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsSByte, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsUInt16, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, AsUInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, Ceiling, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, ConcatLowerLower, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, ConcatLowerUpper, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, ConcatUpperLower, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, ConcatUpperUpper, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, ConditionalSelect, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, ConvertToDouble, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToInt32Native, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToInt64Native, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToUInt32Native, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToUInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToUInt64Native, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, CreateAlternatingSequence, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, CreateGeometricSequence, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, CreateScalar, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(Vector64, CreateSequence, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, Dot, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, Equals, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, EqualsAny, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ExtractMostSignificantBits, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Floor, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, FusedMultiplyAdd, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, GetElement, 8, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SupportsContainment|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector64, GreaterThan, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, GreaterThanAll, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, GreaterThanAny, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, GreaterThanOrEqual, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, GreaterThanOrEqualAll, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, GreaterThanOrEqualAny, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, IsEvenInteger, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsFinite, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsInfinity, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsInteger, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsNaN, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsNegative, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsNegativeInfinity, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsNormal, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsOddInteger, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsPositive, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsPositiveInfinity, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsSubnormal, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, IsZero, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, LessThan, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, LessThanAll, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, LessThanAny, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, LessThanOrEqual, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, LessThanOrEqualAll, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, LessThanOrEqualAny, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, LoadAligned, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, LoadAlignedNonTemporal, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, LoadUnsafe, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, Max, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, MaxMagnitude, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, MaxMagnitudeNumber, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, MaxNative, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, MaxNumber, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, Min, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, MinMagnitude, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, MinMagnitudeNumber, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, MinNative, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, MinNumber, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, MultiplyAddEstimate, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, Narrow, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, NarrowWithSaturation, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, Reverse, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, Round, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector64, ShuffleNative, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector64, ShuffleNativeFallback, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, StoreAlignedNonTemporal, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, StoreUnsafe, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, SubtractSaturate, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, Sum, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ToScalar, 8, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector64, ToVector128, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector64, ToVector128Unsafe, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector64, Truncate, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, UnzipEven, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, UnzipOdd, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, WidenLower, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, WidenUpper, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, WithElement, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, ZipLower, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, ZipUpper, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_E, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_Epsilon, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_Indices, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_NaN, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_NegativeInfinity, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_NegativeOne, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_NegativeZero, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_One, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_Pi, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_PositiveInfinity, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_SignSequence, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_Tau, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_Addition, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_BitwiseAnd, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector64, op_BitwiseOr, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector64, op_Division, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_Equality, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector64, op_ExclusiveOr, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_Inequality, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector64, op_LeftShift, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_Multiply, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_OnesComplement, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_RightShift, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_Subtraction, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_UnaryNegation, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_UnaryPlus, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_UnsignedRightShift, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -#define LAST_NI_Vector64 NI_Vector64_op_UnsignedRightShift - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Vector128 Intrinsics -#define FIRST_NI_Vector128 NI_Vector128_Abs -HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, As, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsNInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsNUInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsSByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConcatLowerLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConcatLowerUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConcatUpperLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConcatUpperUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, CreateAlternatingSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, CreateGeometricSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Equals, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, FusedMultiplyAdd, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SupportsContainment|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector128, GetLower, 16, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, GetUpper, 16, 1, {INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, IsEvenInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsFinite, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNaN, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNegative, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNormal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsOddInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsPositive, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsSubnormal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, LessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LoadAligned, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, LoadAlignedNonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, LoadUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Max, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxMagnitude, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxMagnitudeNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Min, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinMagnitude, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinMagnitudeNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Reverse, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, SubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector128, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, UnzipEven, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, UnzipOdd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, WithLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, WithUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ZipLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ZipUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_E, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Epsilon, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Indices, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NaN, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeOne, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeZero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Pi, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_PositiveInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_SignSequence, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Tau, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -#define LAST_NI_Vector128 NI_Vector128_op_UnsignedRightShift - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AdvSimd Intrinsics -#define FIRST_NI_AdvSimd NI_AdvSimd_Abs -HARDWARE_INTRINSIC(AdvSimd, Abs, -1, 1, {INS_abs, INS_invalid, INS_abs, INS_invalid, INS_abs, INS_invalid, INS_invalid, INS_invalid, INS_fabs, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, AbsSaturate, -1, 1, {INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, AbsScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fabs, INS_fabs}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteCompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteCompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteCompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteCompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifference, -1, 2, {INS_sabd, INS_uabd, INS_sabd, INS_uabd, INS_sabd, INS_uabd, INS_invalid, INS_invalid, INS_fabd, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceAdd, -1, 3, {INS_saba, INS_uaba, INS_saba, INS_uaba, INS_saba, INS_uaba, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceWideningLower, 8, 2, {INS_sabdl, INS_uabdl, INS_sabdl, INS_uabdl, INS_sabdl, INS_uabdl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceWideningLowerAndAdd, 8, 3, {INS_sabal, INS_uabal, INS_sabal, INS_uabal, INS_sabal, INS_uabal, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceWideningUpper, 16, 2, {INS_sabdl2, INS_uabdl2, INS_sabdl2, INS_uabdl2, INS_sabdl2, INS_uabdl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceWideningUpperAndAdd, 16, 3, {INS_sabal2, INS_uabal2, INS_sabal2, INS_uabal2, INS_sabal2, INS_uabal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, Add, -1, 2, {INS_add, INS_add, INS_add, INS_add, INS_add, INS_add, INS_add, INS_add, INS_fadd, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, AddHighNarrowingLower, 8, 2, {INS_addhn, INS_addhn, INS_addhn, INS_addhn, INS_addhn, INS_addhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, AddHighNarrowingUpper, 16, 3, {INS_addhn2, INS_addhn2, INS_addhn2, INS_addhn2, INS_addhn2, INS_addhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, AddPairwise, 8, 2, {INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_invalid, INS_invalid, INS_faddp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, AddPairwiseWidening, -1, 1, {INS_saddlp, INS_uaddlp, INS_saddlp, INS_uaddlp, INS_saddlp, INS_uaddlp, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, AddPairwiseWideningAndAdd, -1, 2, {INS_sadalp, INS_uadalp, INS_sadalp, INS_uadalp, INS_sadalp, INS_uadalp, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, AddPairwiseWideningAndAddScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sadalp, INS_uadalp, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, AddPairwiseWideningScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_saddlp, INS_uaddlp, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, AddRoundedHighNarrowingLower, 8, 2, {INS_raddhn, INS_raddhn, INS_raddhn, INS_raddhn, INS_raddhn, INS_raddhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, AddRoundedHighNarrowingUpper, 16, 3, {INS_raddhn2, INS_raddhn2, INS_raddhn2, INS_raddhn2, INS_raddhn2, INS_raddhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, AddSaturate, -1, 2, {INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, AddSaturateScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqadd, INS_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, AddScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_add, INS_add, INS_fadd, INS_fadd}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, AddWideningLower, 8, 2, {INS_saddl, INS_uaddl, INS_saddl, INS_uaddl, INS_saddl, INS_uaddl, INS_saddw, INS_uaddw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, AddWideningUpper, 16, 2, {INS_saddl2, INS_uaddl2, INS_saddl2, INS_uaddl2, INS_saddl2, INS_uaddl2, INS_saddw2, INS_uaddw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, And, -1, 2, {INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, BitwiseClear, -1, 2, {INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic}, HW_Category_SIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, {INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, CeilingScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd, CompareLessThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd, CompareLessThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd, CompareTest, -1, 2, {INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_invalid, INS_invalid, INS_cmtst, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundAwayFromZero, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtas, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundAwayFromZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtas, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToEven, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtns, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToEvenScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtns, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToNegativeInfinity, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtms, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToNegativeInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtms, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToPositiveInfinity, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtps, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToPositiveInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtps, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToZero, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzs, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzs, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_scvtf, INS_ucvtf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToSingleScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_scvtf, INS_ucvtf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundAwayFromZero, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtau, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundAwayFromZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtau, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToEven, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtnu, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToEvenScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtnu, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToNegativeInfinity, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtmu, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToNegativeInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtmu, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToPositiveInfinity, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtpu, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToPositiveInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtpu, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToZero, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzu, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzu, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, DivideScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fdiv, INS_fdiv}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, DuplicateSelectedScalarToVector128, -1, 2, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_invalid, INS_invalid, INS_dup, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, DuplicateSelectedScalarToVector64, -1, 2, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_invalid, INS_invalid, INS_dup, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, DuplicateToVector128, 16, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_invalid, INS_invalid, INS_dup, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd, DuplicateToVector64, 8, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_invalid, INS_invalid, INS_dup, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd, Extract, -1, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingLower, 8, 1, {INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingSaturateLower, 8, 1, {INS_sqxtn, INS_uqxtn, INS_sqxtn, INS_uqxtn, INS_sqxtn, INS_uqxtn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingSaturateUnsignedLower, 8, 1, {INS_invalid, INS_sqxtun, INS_invalid, INS_sqxtun, INS_invalid, INS_sqxtun, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingSaturateUnsignedUpper, 16, 2, {INS_invalid, INS_sqxtun2, INS_invalid, INS_sqxtun2, INS_invalid, INS_sqxtun2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingSaturateUpper, 16, 2, {INS_sqxtn2, INS_uqxtn2, INS_sqxtn2, INS_uqxtn2, INS_sqxtn2, INS_uqxtn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingUpper, 16, 2, {INS_xtn2, INS_xtn2, INS_xtn2, INS_xtn2, INS_xtn2, INS_xtn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ExtractVector128, 16, 3, {INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext}, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, ExtractVector64, 8, 3, {INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_invalid, INS_invalid, INS_ext, INS_invalid}, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, Floor, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, FloorScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_frintm}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, FusedAddHalving, -1, 2, {INS_shadd, INS_uhadd, INS_shadd, INS_uhadd, INS_shadd, INS_uhadd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, FusedAddRoundedHalving, -1, 2, {INS_srhadd, INS_urhadd, INS_srhadd, INS_urhadd, INS_srhadd, INS_urhadd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, FusedMultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, FusedMultiplyAddNegatedScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fnmadd, INS_fnmadd}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, FusedMultiplyAddScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmadd, INS_fmadd}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, FusedMultiplySubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, FusedMultiplySubtractNegatedScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fnmsub, INS_fnmsub}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, FusedMultiplySubtractScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmsub, INS_fmsub}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, FusedSubtractHalving, -1, 2, {INS_shsub, INS_uhsub, INS_shsub, INS_uhsub, INS_shsub, INS_uhsub, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, Insert, -1, 3, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins}, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd, InsertScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ins, INS_ins, INS_invalid, INS_ins}, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, LeadingSignCount, -1, 1, {INS_cls, INS_invalid, INS_cls, INS_invalid, INS_cls, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, LeadingZeroCount, -1, 1, {INS_clz, INS_clz, INS_clz, INS_clz, INS_clz, INS_clz, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, Load2xVector64, 8, 1, {INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_invalid, INS_invalid, INS_ld1_2regs, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, Load2xVector64AndUnzip, 8, 1, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_invalid, INS_invalid, INS_ld2, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, Load3xVector64, 8, 1, {INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_invalid, INS_invalid, INS_ld1_3regs, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, Load3xVector64AndUnzip, 8, 1, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_invalid, INS_invalid, INS_ld3, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, Load4xVector64, 8, 1, {INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_invalid, INS_invalid, INS_ld1_4regs, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, Load4xVector64AndUnzip, 8, 1, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_invalid, INS_invalid, INS_ld4, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, LoadAndInsertScalar, -1, 3, {INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, LoadAndInsertScalarVector64x2, 8, 3, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_invalid, INS_invalid, INS_ld2, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, LoadAndInsertScalarVector64x3, 8, 3, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_invalid, INS_invalid, INS_ld3, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, LoadAndInsertScalarVector64x4, 8, 3, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_invalid, INS_invalid, INS_ld4, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector128, 16, 1, {INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_invalid, INS_invalid, INS_ld1r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64, 8, 1, {INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_invalid, INS_invalid, INS_ld1r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x2, 8, 1, {INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_invalid, INS_invalid, INS_ld2r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x3, 8, 1, {INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_invalid, INS_invalid, INS_ld3r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x4, 8, 1, {INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_invalid, INS_invalid, INS_ld4r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AdvSimd, LoadVector64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AdvSimd, Max, -1, 2, {INS_smax, INS_umax, INS_smax, INS_umax, INS_smax, INS_umax, INS_invalid, INS_invalid, INS_fmax, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MaxNumber, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnm, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MaxNumberScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnm, INS_fmaxnm}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, MaxPairwise, 8, 2, {INS_smaxp, INS_umaxp, INS_smaxp, INS_umaxp, INS_smaxp, INS_umaxp, INS_invalid, INS_invalid, INS_fmaxp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, Min, -1, 2, {INS_smin, INS_umin, INS_smin, INS_umin, INS_smin, INS_umin, INS_invalid, INS_invalid, INS_fmin, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MinNumber, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnm, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MinNumberScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnm, INS_fminnm}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, MinPairwise, 8, 2, {INS_sminp, INS_uminp, INS_sminp, INS_uminp, INS_sminp, INS_uminp, INS_invalid, INS_invalid, INS_fminp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, Multiply, -1, 2, {INS_mul, INS_mul, INS_mul, INS_mul, INS_mul, INS_mul, INS_invalid, INS_invalid, INS_fmul, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MultiplyAdd, -1, 3, {INS_mla, INS_mla, INS_mla, INS_mla, INS_mla, INS_mla, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyAddByScalar, -1, 3, {INS_invalid, INS_invalid, INS_mla, INS_mla, INS_mla, INS_mla, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyAddBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_mla, INS_mla, INS_mla, INS_mla, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyByScalar, -1, 2, {INS_invalid, INS_invalid, INS_mul, INS_mul, INS_mul, INS_mul, INS_invalid, INS_invalid, INS_fmul, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalar, -1, 3, {INS_invalid, INS_invalid, INS_mul, INS_mul, INS_mul, INS_mul, INS_invalid, INS_invalid, INS_fmul, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningLower, 8, 3, {INS_invalid, INS_invalid, INS_smull, INS_umull, INS_smull, INS_umull, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningLowerAndAdd, 8, 4, {INS_invalid, INS_invalid, INS_smlal, INS_umlal, INS_smlal, INS_umlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningLowerAndSubtract, 8, 4, {INS_invalid, INS_invalid, INS_smlsl, INS_umlsl, INS_smlsl, INS_umlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningUpper, 16, 3, {INS_invalid, INS_invalid, INS_smull2, INS_umull2, INS_smull2, INS_umull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningUpperAndAdd, 16, 4, {INS_invalid, INS_invalid, INS_smlal2, INS_umlal2, INS_smlal2, INS_umlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningUpperAndSubtract, 16, 4, {INS_invalid, INS_invalid, INS_smlsl2, INS_umlsl2, INS_smlsl2, INS_umlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingByScalarSaturateHigh, -1, 2, {INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingBySelectedScalarSaturateHigh, -1, 3, {INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingSaturateHigh, -1, 2, {INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerAndAddSaturate, 8, 3, {INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerAndSubtractSaturate, 8, 3, {INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerByScalarAndAddSaturate, 8, 3, {INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerByScalarAndSubtractSaturate, 8, 3, {INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerBySelectedScalarAndAddSaturate, 8, 4, {INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerBySelectedScalarAndSubtractSaturate, 8, 4, {INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateLower, 8, 2, {INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateLowerByScalar, 8, 2, {INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateLowerBySelectedScalar, 8, 3, {INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateUpper, 16, 2, {INS_invalid, INS_invalid, INS_sqdmull2, INS_invalid, INS_sqdmull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateUpperByScalar, 16, 2, {INS_invalid, INS_invalid, INS_sqdmull2, INS_invalid, INS_sqdmull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateUpperBySelectedScalar, 16, 3, {INS_invalid, INS_invalid, INS_sqdmull2, INS_invalid, INS_sqdmull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperAndAddSaturate, 16, 3, {INS_invalid, INS_invalid, INS_sqdmlal2, INS_invalid, INS_sqdmlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperAndSubtractSaturate, 16, 3, {INS_invalid, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperByScalarAndAddSaturate, 16, 3, {INS_invalid, INS_invalid, INS_sqdmlal2, INS_invalid, INS_sqdmlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperByScalarAndSubtractSaturate, 16, 3, {INS_invalid, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperBySelectedScalarAndAddSaturate, 16, 4, {INS_invalid, INS_invalid, INS_sqdmlal2, INS_invalid, INS_sqdmlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperBySelectedScalarAndSubtractSaturate, 16, 4, {INS_invalid, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyRoundedDoublingByScalarSaturateHigh, -1, 2, {INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, MultiplyRoundedDoublingBySelectedScalarSaturateHigh, -1, 3, {INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, MultiplyRoundedDoublingSaturateHigh, -1, 2, {INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MultiplyScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul, INS_fmul}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, MultiplyScalarBySelectedScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, MultiplySubtract, -1, 3, {INS_mls, INS_mls, INS_mls, INS_mls, INS_mls, INS_mls, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplySubtractByScalar, -1, 3, {INS_invalid, INS_invalid, INS_mls, INS_mls, INS_mls, INS_mls, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplySubtractBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_mls, INS_mls, INS_mls, INS_mls, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningLower, 8, 2, {INS_smull, INS_umull, INS_smull, INS_umull, INS_smull, INS_umull, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningLowerAndAdd, 8, 3, {INS_smlal, INS_umlal, INS_smlal, INS_umlal, INS_smlal, INS_umlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningLowerAndSubtract, 8, 3, {INS_smlsl, INS_umlsl, INS_smlsl, INS_umlsl, INS_smlsl, INS_umlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningUpper, 16, 2, {INS_smull2, INS_umull2, INS_smull2, INS_umull2, INS_smull2, INS_umull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningUpperAndAdd, 16, 3, {INS_smlal2, INS_umlal2, INS_smlal2, INS_umlal2, INS_smlal2, INS_umlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningUpperAndSubtract, 16, 3, {INS_smlsl2, INS_umlsl2, INS_smlsl2, INS_umlsl2, INS_smlsl2, INS_umlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, Negate, -1, 1, {INS_neg, INS_invalid, INS_neg, INS_invalid, INS_neg, INS_invalid, INS_invalid, INS_invalid, INS_fneg, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, NegateSaturate, -1, 1, {INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, NegateScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fneg, INS_fneg}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, Not, -1, 1, {INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, Or, -1, 2, {INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, OrNot, -1, 2, {INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn}, HW_Category_SIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd, PolynomialMultiply, -1, 2, {INS_pmul, INS_pmul, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, PolynomialMultiplyWideningLower, 8, 2, {INS_pmull, INS_pmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, PolynomialMultiplyWideningUpper, 16, 2, {INS_pmull2, INS_pmull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, PopCount, -1, 1, {INS_cnt, INS_cnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ReciprocalEstimate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_urecpe, INS_invalid, INS_invalid, INS_frecpe, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ReciprocalSquareRootEstimate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ursqrte, INS_invalid, INS_invalid, INS_frsqrte, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ReciprocalSquareRootStep, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrts, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, ReciprocalStep, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecps, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, ReverseElement16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rev32, INS_rev32, INS_rev64, INS_rev64, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, ReverseElement32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rev64, INS_rev64, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, ReverseElement8, -1, 1, {INS_invalid, INS_invalid, INS_rev16, INS_rev16, INS_rev32, INS_rev32, INS_rev64, INS_rev64, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, RoundAwayFromZero, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frinta, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, RoundAwayFromZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frinta, INS_frinta}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, RoundToNearest, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintn, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, RoundToNearestScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintn, INS_frintn}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, RoundToNegativeInfinity, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, RoundToNegativeInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_frintm}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, RoundToPositiveInfinity, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, RoundToPositiveInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, RoundToZero, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintz, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, RoundToZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintz, INS_frintz}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftArithmetic, -1, 2, {INS_sshl, INS_invalid, INS_sshl, INS_invalid, INS_sshl, INS_invalid, INS_sshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticRounded, -1, 2, {INS_srshl, INS_invalid, INS_srshl, INS_invalid, INS_srshl, INS_invalid, INS_srshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticRoundedSaturate, -1, 2, {INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticRoundedSaturateScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticRoundedScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_srshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticSaturate, -1, 2, {INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticSaturateScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftAndInsert, -1, 3, {INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftAndInsertScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sli, INS_sli, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogical, -1, 2, {INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_NoJmpTableIMM) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturate, -1, 2, {INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturateScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqshl, INS_uqshl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturateUnsigned, -1, 2, {INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturateUnsignedScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqshlu, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shl, INS_shl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_NoJmpTableIMM) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalWideningLower, 8, 2, {INS_sshll, INS_ushll, INS_sshll, INS_ushll, INS_sshll, INS_ushll, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalWideningUpper, 16, 2, {INS_sshll2, INS_ushll2, INS_sshll2, INS_ushll2, INS_sshll2, INS_ushll2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftLogical, -1, 2, {INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalRounded, -1, 2, {INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalRoundedSaturate, -1, 2, {INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalRoundedSaturateScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_uqrshl, INS_uqrshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalRoundedScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_urshl, INS_urshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalSaturate, -1, 2, {INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalSaturateScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_uqshl, INS_uqshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ushl, INS_ushl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightAndInsert, -1, 3, {INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightAndInsertScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sri, INS_sri, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmetic, -1, 2, {INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_NoJmpTableIMM) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticAdd, -1, 3, {INS_ssra, INS_invalid, INS_ssra, INS_invalid, INS_ssra, INS_invalid, INS_ssra, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticAddScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ssra, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticNarrowingSaturateLower, 8, 2, {INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticNarrowingSaturateUnsignedLower, 8, 2, {INS_invalid, INS_sqshrun, INS_invalid, INS_sqshrun, INS_invalid, INS_sqshrun, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticNarrowingSaturateUnsignedUpper, 16, 3, {INS_invalid, INS_sqshrun2, INS_invalid, INS_sqshrun2, INS_invalid, INS_sqshrun2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticNarrowingSaturateUpper, 16, 3, {INS_sqshrn2, INS_invalid, INS_sqshrn2, INS_invalid, INS_sqshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRounded, -1, 2, {INS_srshr, INS_invalid, INS_srshr, INS_invalid, INS_srshr, INS_invalid, INS_srshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedAdd, -1, 3, {INS_srsra, INS_invalid, INS_srsra, INS_invalid, INS_srsra, INS_invalid, INS_srsra, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedAddScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_srsra, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateLower, 8, 2, {INS_sqrshrn, INS_invalid, INS_sqrshrn, INS_invalid, INS_sqrshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedLower, 8, 2, {INS_invalid, INS_sqrshrun, INS_invalid, INS_sqrshrun, INS_invalid, INS_sqrshrun, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedUpper, 16, 3, {INS_invalid, INS_sqrshrun2, INS_invalid, INS_sqrshrun2, INS_invalid, INS_sqrshrun2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateUpper, 16, 3, {INS_sqrshrn2, INS_invalid, INS_sqrshrn2, INS_invalid, INS_sqrshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_srshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_NoJmpTableIMM) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogical, -1, 2, {INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_NoJmpTableIMM) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalAdd, -1, 3, {INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalAddScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_usra, INS_usra, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalNarrowingLower, 8, 2, {INS_shrn, INS_shrn, INS_shrn, INS_shrn, INS_shrn, INS_shrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalNarrowingSaturateLower, 8, 2, {INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalNarrowingSaturateUpper, 16, 3, {INS_uqshrn2, INS_uqshrn2, INS_uqshrn2, INS_uqshrn2, INS_uqshrn2, INS_uqshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalNarrowingUpper, 16, 3, {INS_shrn2, INS_shrn2, INS_shrn2, INS_shrn2, INS_shrn2, INS_shrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRounded, -1, 2, {INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedAdd, -1, 3, {INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedAddScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ursra, INS_ursra, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingLower, 8, 2, {INS_rshrn, INS_rshrn, INS_rshrn, INS_rshrn, INS_rshrn, INS_rshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingSaturateLower, 8, 2, {INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingSaturateUpper, 16, 3, {INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingUpper, 16, 3, {INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_urshr, INS_urshr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ushr, INS_ushr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_NoJmpTableIMM) -HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower, 8, 1, {INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, -1, 3, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, StoreVectorAndZip, 8, 2, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, Subtract, -1, 2, {INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_fsub, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingLower, 8, 2, {INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingUpper, 16, 3, {INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, SubtractRoundedHighNarrowingLower, 8, 2, {INS_rsubhn, INS_rsubhn, INS_rsubhn, INS_rsubhn, INS_rsubhn, INS_rsubhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, SubtractRoundedHighNarrowingUpper, 16, 3, {INS_rsubhn2, INS_rsubhn2, INS_rsubhn2, INS_rsubhn2, INS_rsubhn2, INS_rsubhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd, SubtractSaturate, -1, 2, {INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, SubtractSaturateScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sub, INS_sub, INS_fsub, INS_fsub}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningUpper, 16, 1, {INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -#define LAST_NI_AdvSimd NI_AdvSimd_ZeroExtendWideningUpper +#define FIRST_NI_AdvSimd NI_AdvSimd_Abs +HARDWARE_INTRINSIC(AdvSimd, Abs, -1, 1, INS_abs, INS_invalid, INS_abs, INS_invalid, INS_abs, INS_invalid, INS_invalid, INS_invalid, INS_fabs, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, AbsSaturate, -1, 1, INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, AbsScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fabs, INS_fabs, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteCompareGreaterThan, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteCompareGreaterThanOrEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteCompareLessThan, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteCompareLessThanOrEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifference, -1, 2, INS_sabd, INS_uabd, INS_sabd, INS_uabd, INS_sabd, INS_uabd, INS_invalid, INS_invalid, INS_fabd, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceAdd, -1, 3, INS_saba, INS_uaba, INS_saba, INS_uaba, INS_saba, INS_uaba, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceWideningLower, 8, 2, INS_sabdl, INS_uabdl, INS_sabdl, INS_uabdl, INS_sabdl, INS_uabdl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceWideningLowerAndAdd, 8, 3, INS_sabal, INS_uabal, INS_sabal, INS_uabal, INS_sabal, INS_uabal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceWideningUpper, 16, 2, INS_sabdl2, INS_uabdl2, INS_sabdl2, INS_uabdl2, INS_sabdl2, INS_uabdl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, AbsoluteDifferenceWideningUpperAndAdd, 16, 3, INS_sabal2, INS_uabal2, INS_sabal2, INS_uabal2, INS_sabal2, INS_uabal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, Add, -1, 2, INS_add, INS_add, INS_add, INS_add, INS_add, INS_add, INS_add, INS_add, INS_fadd, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, AddHighNarrowingLower, 8, 2, INS_addhn, INS_addhn, INS_addhn, INS_addhn, INS_addhn, INS_addhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, AddHighNarrowingUpper, 16, 3, INS_addhn2, INS_addhn2, INS_addhn2, INS_addhn2, INS_addhn2, INS_addhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, AddPairwise, 8, 2, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_invalid, INS_invalid, INS_faddp, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, AddPairwiseWidening, -1, 1, INS_saddlp, INS_uaddlp, INS_saddlp, INS_uaddlp, INS_saddlp, INS_uaddlp, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, AddPairwiseWideningAndAdd, -1, 2, INS_sadalp, INS_uadalp, INS_sadalp, INS_uadalp, INS_sadalp, INS_uadalp, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, AddPairwiseWideningAndAddScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sadalp, INS_uadalp, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, AddPairwiseWideningScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_saddlp, INS_uaddlp, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, AddRoundedHighNarrowingLower, 8, 2, INS_raddhn, INS_raddhn, INS_raddhn, INS_raddhn, INS_raddhn, INS_raddhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, AddRoundedHighNarrowingUpper, 16, 3, INS_raddhn2, INS_raddhn2, INS_raddhn2, INS_raddhn2, INS_raddhn2, INS_raddhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, AddSaturate, -1, 2, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, AddSaturateScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqadd, INS_uqadd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, AddScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_add, INS_add, INS_fadd, INS_fadd, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, AddWideningLower, 8, 2, INS_saddl, INS_uaddl, INS_saddl, INS_uaddl, INS_saddl, INS_uaddl, INS_saddw, INS_uaddw, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, AddWideningUpper, 16, 2, INS_saddl2, INS_uaddl2, INS_saddl2, INS_uaddl2, INS_saddl2, INS_uaddl2, INS_saddw2, INS_uaddw2, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, And, -1, 2, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, BitwiseClear, -1, 2, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, -1, -1, HW_Category_SIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, CeilingScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd, CompareLessThan, -1, 2, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd, CompareLessThanOrEqual, -1, 2, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd, CompareTest, -1, 2, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_invalid, INS_invalid, INS_cmtst, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundAwayFromZero, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtas, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundAwayFromZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtas, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToEven, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtns, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToEvenScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtns, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToNegativeInfinity, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtms, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToNegativeInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtms, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToPositiveInfinity, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtps, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToPositiveInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtps, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToZero, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzs, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToInt32RoundToZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzs, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToSingle, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_scvtf, INS_ucvtf, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToSingleScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_scvtf, INS_ucvtf, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundAwayFromZero, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtau, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundAwayFromZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtau, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToEven, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtnu, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToEvenScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtnu, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToNegativeInfinity, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtmu, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToNegativeInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtmu, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToPositiveInfinity, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtpu, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToPositiveInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtpu, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToZero, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzu, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ConvertToUInt32RoundToZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzu, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, DivideScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fdiv, INS_fdiv, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, DuplicateSelectedScalarToVector128, -1, 2, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_invalid, INS_invalid, INS_dup, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, DuplicateSelectedScalarToVector64, -1, 2, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_invalid, INS_invalid, INS_dup, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, DuplicateToVector128, 16, 1, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_invalid, INS_invalid, INS_dup, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd, DuplicateToVector64, 8, 1, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_invalid, INS_invalid, INS_dup, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd, Extract, -1, 2, INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsScalarT) +HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingLower, 8, 1, INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingSaturateLower, 8, 1, INS_sqxtn, INS_uqxtn, INS_sqxtn, INS_uqxtn, INS_sqxtn, INS_uqxtn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingSaturateUnsignedLower, 8, 1, INS_invalid, INS_sqxtun, INS_invalid, INS_sqxtun, INS_invalid, INS_sqxtun, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingSaturateUnsignedUpper, 16, 2, INS_invalid, INS_sqxtun2, INS_invalid, INS_sqxtun2, INS_invalid, INS_sqxtun2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingSaturateUpper, 16, 2, INS_sqxtn2, INS_uqxtn2, INS_sqxtn2, INS_uqxtn2, INS_sqxtn2, INS_uqxtn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ExtractNarrowingUpper, 16, 2, INS_xtn2, INS_xtn2, INS_xtn2, INS_xtn2, INS_xtn2, INS_xtn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ExtractVector128, 16, 3, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, -1, -1, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, ExtractVector64, 8, 3, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_invalid, INS_invalid, INS_ext, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, Floor, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, FloorScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_frintm, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, FusedAddHalving, -1, 2, INS_shadd, INS_uhadd, INS_shadd, INS_uhadd, INS_shadd, INS_uhadd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, FusedAddRoundedHalving, -1, 2, INS_srhadd, INS_urhadd, INS_srhadd, INS_urhadd, INS_srhadd, INS_urhadd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, FusedMultiplyAdd, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, FusedMultiplyAddNegatedScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fnmadd, INS_fnmadd, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, FusedMultiplyAddScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmadd, INS_fmadd, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, FusedMultiplySubtract, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, FusedMultiplySubtractNegatedScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fnmsub, INS_fnmsub, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, FusedMultiplySubtractScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmsub, INS_fmsub, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, FusedSubtractHalving, -1, 2, INS_shsub, INS_uhsub, INS_shsub, INS_uhsub, INS_shsub, INS_uhsub, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, Insert, -1, 3, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, -1, -1, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd, InsertScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ins, INS_ins, INS_invalid, INS_ins, -1, -1, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, LeadingSignCount, -1, 1, INS_cls, INS_invalid, INS_cls, INS_invalid, INS_cls, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, LeadingZeroCount, -1, 1, INS_clz, INS_clz, INS_clz, INS_clz, INS_clz, INS_clz, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, Load2xVector64, 8, 1, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_invalid, INS_invalid, INS_ld1_2regs, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, Load2xVector64AndUnzip, 8, 1, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_invalid, INS_invalid, INS_ld2, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, Load3xVector64, 8, 1, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_invalid, INS_invalid, INS_ld1_3regs, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, Load3xVector64AndUnzip, 8, 1, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_invalid, INS_invalid, INS_ld3, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, Load4xVector64, 8, 1, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_invalid, INS_invalid, INS_ld1_4regs, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, Load4xVector64AndUnzip, 8, 1, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_invalid, INS_invalid, INS_ld4, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, LoadAndInsertScalar, -1, 3, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, -1, -1, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, LoadAndInsertScalarVector64x2, 8, 3, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_invalid, INS_invalid, INS_ld2, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, LoadAndInsertScalarVector64x3, 8, 3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_invalid, INS_invalid, INS_ld3, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, LoadAndInsertScalarVector64x4, 8, 3, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_invalid, INS_invalid, INS_ld4, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector128, 16, 1, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_invalid, INS_invalid, INS_ld1r, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64, 8, 1, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_invalid, INS_invalid, INS_ld1r, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x2, 8, 1, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_invalid, INS_invalid, INS_ld2r, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x3, 8, 1, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_invalid, INS_invalid, INS_ld3r, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x4, 8, 1, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_invalid, INS_invalid, INS_ld4r, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, LoadVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AdvSimd, LoadVector64, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AdvSimd, Max, -1, 2, INS_smax, INS_umax, INS_smax, INS_umax, INS_smax, INS_umax, INS_invalid, INS_invalid, INS_fmax, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MaxNumber, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnm, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MaxNumberScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnm, INS_fmaxnm, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, MaxPairwise, 8, 2, INS_smaxp, INS_umaxp, INS_smaxp, INS_umaxp, INS_smaxp, INS_umaxp, INS_invalid, INS_invalid, INS_fmaxp, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, Min, -1, 2, INS_smin, INS_umin, INS_smin, INS_umin, INS_smin, INS_umin, INS_invalid, INS_invalid, INS_fmin, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MinNumber, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnm, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MinNumberScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnm, INS_fminnm, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, MinPairwise, 8, 2, INS_sminp, INS_uminp, INS_sminp, INS_uminp, INS_sminp, INS_uminp, INS_invalid, INS_invalid, INS_fminp, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, Multiply, -1, 2, INS_mul, INS_mul, INS_mul, INS_mul, INS_mul, INS_mul, INS_invalid, INS_invalid, INS_fmul, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MultiplyAdd, -1, 3, INS_mla, INS_mla, INS_mla, INS_mla, INS_mla, INS_mla, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyAddByScalar, -1, 3, INS_invalid, INS_invalid, INS_mla, INS_mla, INS_mla, INS_mla, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyAddBySelectedScalar, -1, 4, INS_invalid, INS_invalid, INS_mla, INS_mla, INS_mla, INS_mla, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyByScalar, -1, 2, INS_invalid, INS_invalid, INS_mul, INS_mul, INS_mul, INS_mul, INS_invalid, INS_invalid, INS_fmul, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalar, -1, 3, INS_invalid, INS_invalid, INS_mul, INS_mul, INS_mul, INS_mul, INS_invalid, INS_invalid, INS_fmul, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningLower, 8, 3, INS_invalid, INS_invalid, INS_smull, INS_umull, INS_smull, INS_umull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningLowerAndAdd, 8, 4, INS_invalid, INS_invalid, INS_smlal, INS_umlal, INS_smlal, INS_umlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningLowerAndSubtract, 8, 4, INS_invalid, INS_invalid, INS_smlsl, INS_umlsl, INS_smlsl, INS_umlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningUpper, 16, 3, INS_invalid, INS_invalid, INS_smull2, INS_umull2, INS_smull2, INS_umull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningUpperAndAdd, 16, 4, INS_invalid, INS_invalid, INS_smlal2, INS_umlal2, INS_smlal2, INS_umlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyBySelectedScalarWideningUpperAndSubtract, 16, 4, INS_invalid, INS_invalid, INS_smlsl2, INS_umlsl2, INS_smlsl2, INS_umlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingByScalarSaturateHigh, -1, 2, INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingBySelectedScalarSaturateHigh, -1, 3, INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingSaturateHigh, -1, 2, INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerAndAddSaturate, 8, 3, INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerAndSubtractSaturate, 8, 3, INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerByScalarAndAddSaturate, 8, 3, INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerByScalarAndSubtractSaturate, 8, 3, INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerBySelectedScalarAndAddSaturate, 8, 4, INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningLowerBySelectedScalarAndSubtractSaturate, 8, 4, INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateLower, 8, 2, INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateLowerByScalar, 8, 2, INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateLowerBySelectedScalar, 8, 3, INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateUpper, 16, 2, INS_invalid, INS_invalid, INS_sqdmull2, INS_invalid, INS_sqdmull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateUpperByScalar, 16, 2, INS_invalid, INS_invalid, INS_sqdmull2, INS_invalid, INS_sqdmull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningSaturateUpperBySelectedScalar, 16, 3, INS_invalid, INS_invalid, INS_sqdmull2, INS_invalid, INS_sqdmull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperAndAddSaturate, 16, 3, INS_invalid, INS_invalid, INS_sqdmlal2, INS_invalid, INS_sqdmlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperAndSubtractSaturate, 16, 3, INS_invalid, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperByScalarAndAddSaturate, 16, 3, INS_invalid, INS_invalid, INS_sqdmlal2, INS_invalid, INS_sqdmlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperByScalarAndSubtractSaturate, 16, 3, INS_invalid, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperBySelectedScalarAndAddSaturate, 16, 4, INS_invalid, INS_invalid, INS_sqdmlal2, INS_invalid, INS_sqdmlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyDoublingWideningUpperBySelectedScalarAndSubtractSaturate, 16, 4, INS_invalid, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_sqdmlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyRoundedDoublingByScalarSaturateHigh, -1, 2, INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, MultiplyRoundedDoublingBySelectedScalarSaturateHigh, -1, 3, INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, MultiplyRoundedDoublingSaturateHigh, -1, 2, INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MultiplyScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul, INS_fmul, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, MultiplyScalarBySelectedScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, MultiplySubtract, -1, 3, INS_mls, INS_mls, INS_mls, INS_mls, INS_mls, INS_mls, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplySubtractByScalar, -1, 3, INS_invalid, INS_invalid, INS_mls, INS_mls, INS_mls, INS_mls, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplySubtractBySelectedScalar, -1, 4, INS_invalid, INS_invalid, INS_mls, INS_mls, INS_mls, INS_mls, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningLower, 8, 2, INS_smull, INS_umull, INS_smull, INS_umull, INS_smull, INS_umull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningLowerAndAdd, 8, 3, INS_smlal, INS_umlal, INS_smlal, INS_umlal, INS_smlal, INS_umlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningLowerAndSubtract, 8, 3, INS_smlsl, INS_umlsl, INS_smlsl, INS_umlsl, INS_smlsl, INS_umlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningUpper, 16, 2, INS_smull2, INS_umull2, INS_smull2, INS_umull2, INS_smull2, INS_umull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningUpperAndAdd, 16, 3, INS_smlal2, INS_umlal2, INS_smlal2, INS_umlal2, INS_smlal2, INS_umlal2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, MultiplyWideningUpperAndSubtract, 16, 3, INS_smlsl2, INS_umlsl2, INS_smlsl2, INS_umlsl2, INS_smlsl2, INS_umlsl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, Negate, -1, 1, INS_neg, INS_invalid, INS_neg, INS_invalid, INS_neg, INS_invalid, INS_invalid, INS_invalid, INS_fneg, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, NegateSaturate, -1, 1, INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, NegateScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fneg, INS_fneg, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, Not, -1, 1, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, INS_mvn, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, Or, -1, 2, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, INS_orr, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, OrNot, -1, 2, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, INS_orn, -1, -1, HW_Category_SIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd, PolynomialMultiply, -1, 2, INS_pmul, INS_pmul, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, PolynomialMultiplyWideningLower, 8, 2, INS_pmull, INS_pmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, PolynomialMultiplyWideningUpper, 16, 2, INS_pmull2, INS_pmull2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, PopCount, -1, 1, INS_cnt, INS_cnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ReciprocalEstimate, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_urecpe, INS_invalid, INS_invalid, INS_frecpe, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ReciprocalSquareRootEstimate, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ursqrte, INS_invalid, INS_invalid, INS_frsqrte, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ReciprocalSquareRootStep, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrts, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, ReciprocalStep, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecps, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, ReverseElement16, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rev32, INS_rev32, INS_rev64, INS_rev64, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, ReverseElement32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rev64, INS_rev64, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, ReverseElement8, -1, 1, INS_invalid, INS_invalid, INS_rev16, INS_rev16, INS_rev32, INS_rev32, INS_rev64, INS_rev64, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, RoundAwayFromZero, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frinta, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, RoundAwayFromZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frinta, INS_frinta, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, RoundToNearest, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintn, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, RoundToNearestScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintn, INS_frintn, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, RoundToNegativeInfinity, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, RoundToNegativeInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_frintm, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, RoundToPositiveInfinity, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, RoundToPositiveInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, RoundToZero, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintz, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, RoundToZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintz, INS_frintz, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftArithmetic, -1, 2, INS_sshl, INS_invalid, INS_sshl, INS_invalid, INS_sshl, INS_invalid, INS_sshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticRounded, -1, 2, INS_srshl, INS_invalid, INS_srshl, INS_invalid, INS_srshl, INS_invalid, INS_srshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticRoundedSaturate, -1, 2, INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticRoundedSaturateScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticRoundedScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_srshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticSaturate, -1, 2, INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticSaturateScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftArithmeticScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftAndInsert, -1, 3, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, INS_sli, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftAndInsertScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sli, INS_sli, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogical, -1, 2, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_shl, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_NoJmpTableIMM) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturate, -1, 2, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturateScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqshl, INS_uqshl, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturateUnsigned, -1, 2, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalSaturateUnsignedScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqshlu, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shl, INS_shl, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_NoJmpTableIMM) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalWideningLower, 8, 2, INS_sshll, INS_ushll, INS_sshll, INS_ushll, INS_sshll, INS_ushll, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftLeftLogicalWideningUpper, 16, 2, INS_sshll2, INS_ushll2, INS_sshll2, INS_ushll2, INS_sshll2, INS_ushll2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftLogical, -1, 2, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_ushl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalRounded, -1, 2, INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_urshl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalRoundedSaturate, -1, 2, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalRoundedSaturateScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_uqrshl, INS_uqrshl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalRoundedScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_urshl, INS_urshl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalSaturate, -1, 2, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalSaturateScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_uqshl, INS_uqshl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftLogicalScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ushl, INS_ushl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightAndInsert, -1, 3, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, INS_sri, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightAndInsertScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sri, INS_sri, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmetic, -1, 2, INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_sshr, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_NoJmpTableIMM) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticAdd, -1, 3, INS_ssra, INS_invalid, INS_ssra, INS_invalid, INS_ssra, INS_invalid, INS_ssra, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticAddScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ssra, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticNarrowingSaturateLower, 8, 2, INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticNarrowingSaturateUnsignedLower, 8, 2, INS_invalid, INS_sqshrun, INS_invalid, INS_sqshrun, INS_invalid, INS_sqshrun, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticNarrowingSaturateUnsignedUpper, 16, 3, INS_invalid, INS_sqshrun2, INS_invalid, INS_sqshrun2, INS_invalid, INS_sqshrun2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticNarrowingSaturateUpper, 16, 3, INS_sqshrn2, INS_invalid, INS_sqshrn2, INS_invalid, INS_sqshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRounded, -1, 2, INS_srshr, INS_invalid, INS_srshr, INS_invalid, INS_srshr, INS_invalid, INS_srshr, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedAdd, -1, 3, INS_srsra, INS_invalid, INS_srsra, INS_invalid, INS_srsra, INS_invalid, INS_srsra, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedAddScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_srsra, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateLower, 8, 2, INS_sqrshrn, INS_invalid, INS_sqrshrn, INS_invalid, INS_sqrshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedLower, 8, 2, INS_invalid, INS_sqrshrun, INS_invalid, INS_sqrshrun, INS_invalid, INS_sqrshrun, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedUpper, 16, 3, INS_invalid, INS_sqrshrun2, INS_invalid, INS_sqrshrun2, INS_invalid, INS_sqrshrun2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedNarrowingSaturateUpper, 16, 3, INS_sqrshrn2, INS_invalid, INS_sqrshrn2, INS_invalid, INS_sqrshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticRoundedScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_srshr, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightArithmeticScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sshr, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_NoJmpTableIMM) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogical, -1, 2, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_ushr, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_NoJmpTableIMM) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalAdd, -1, 3, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_usra, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalAddScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_usra, INS_usra, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalNarrowingLower, 8, 2, INS_shrn, INS_shrn, INS_shrn, INS_shrn, INS_shrn, INS_shrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalNarrowingSaturateLower, 8, 2, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalNarrowingSaturateUpper, 16, 3, INS_uqshrn2, INS_uqshrn2, INS_uqshrn2, INS_uqshrn2, INS_uqshrn2, INS_uqshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalNarrowingUpper, 16, 3, INS_shrn2, INS_shrn2, INS_shrn2, INS_shrn2, INS_shrn2, INS_shrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRounded, -1, 2, INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_urshr, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedAdd, -1, 3, INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_ursra, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedAddScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ursra, INS_ursra, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingLower, 8, 2, INS_rshrn, INS_rshrn, INS_rshrn, INS_rshrn, INS_rshrn, INS_rshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingSaturateLower, 8, 2, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingSaturateUpper, 16, 3, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_uqrshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedNarrowingUpper, 16, 3, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_rshrn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalRoundedScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_urshr, INS_urshr, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ushr, INS_ushr, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_NoJmpTableIMM) +HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower, 8, 1, INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, -1, 3, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, StoreVectorAndZip, 8, 2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, Subtract, -1, 2, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_fsub, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingLower, 8, 2, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingUpper, 16, 3, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, SubtractRoundedHighNarrowingLower, 8, 2, INS_rsubhn, INS_rsubhn, INS_rsubhn, INS_rsubhn, INS_rsubhn, INS_rsubhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, SubtractRoundedHighNarrowingUpper, 16, 3, INS_rsubhn2, INS_rsubhn2, INS_rsubhn2, INS_rsubhn2, INS_rsubhn2, INS_rsubhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, SubtractSaturate, -1, 2, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, SubtractSaturateScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sub, INS_sub, INS_fsub, INS_fsub, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningUpper, 16, 1, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +#define LAST_NI_AdvSimd NI_AdvSimd_ZeroExtendWideningUpper // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AdvSimd 64-bit only Intrinsics -#define FIRST_NI_AdvSimd_Arm64 NI_AdvSimd_Arm64_Abs -HARDWARE_INTRINSIC(AdvSimd_Arm64, Abs, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_abs, INS_invalid, INS_invalid, INS_fabs}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsSaturate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqabs, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsSaturateScalar, 8, 1, {INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_abs, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareGreaterThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, INS_facge}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareGreaterThanScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, INS_facgt}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareLessThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, INS_facge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareLessThanScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, INS_facgt}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteDifference, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fabd}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteDifferenceScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fabd, INS_fabd}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Add, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fadd}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AddAcross, -1, 1, {INS_addv, INS_addv, INS_addv, INS_addv, INS_addv, INS_addv, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AddAcrossWidening, -1, 1, {INS_saddlv, INS_uaddlv, INS_saddlv, INS_uaddlv, INS_saddlv, INS_uaddlv, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwise, 16, 2, {INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_faddp, INS_faddp}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwiseScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addp, INS_addp, INS_faddp, INS_faddp}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturate, -1, 2, {INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturateScalar, 8, 2, {INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareTest, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmtst, INS_cmtst, INS_invalid, INS_cmtst}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareTestScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmtst, INS_cmtst, INS_invalid, INS_cmtst}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_scvtf, INS_ucvtf, INS_fcvtl, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToDoubleScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_scvtf, INS_ucvtf, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToDoubleUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtl2, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundAwayFromZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtas}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundAwayFromZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtas}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToEven, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtns}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToEvenScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtns}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtms}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToNegativeInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtms}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtps}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToPositiveInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtps}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzs}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzs}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToSingleLower, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtn, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToSingleRoundToOddLower, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtxn, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToSingleRoundToOddUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtxn2, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToSingleUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtn2, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundAwayFromZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtau}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundAwayFromZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtau}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToEven, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtnu}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToEvenScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtnu}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtmu}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToNegativeInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtmu}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtpu}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToPositiveInfinityScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtpu}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzu}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToZeroScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzu}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Divide, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fdiv, INS_fdiv}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateSelectedScalarToVector128, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dup, INS_dup, INS_invalid, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dup, INS_dup, INS_invalid, INS_dup}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateToVector64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_fmov}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ExtractNarrowingSaturateScalar, 8, 1, {INS_sqxtn, INS_uqxtn, INS_sqxtn, INS_uqxtn, INS_sqxtn, INS_uqxtn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ExtractNarrowingSaturateUnsignedScalar, 8, 1, {INS_invalid, INS_sqxtun, INS_invalid, INS_sqxtun, INS_invalid, INS_sqxtun, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAdd, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAddByScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAddBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAddScalarBySelectedScalar, 8, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtract, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtractByScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls, INS_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtractBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls, INS_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtractScalarBySelectedScalar, 8, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls, INS_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, InsertSelectedScalar, -1, 4, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins}, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_NoJmpTableIMM|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Load2xVector128, 16, 1, {INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Load2xVector128AndUnzip, 16, 1, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Load3xVector128, 16, 1, {INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Load3xVector128AndUnzip, 16, 1, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Load4xVector128, 16, 1, {INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Load4xVector128AndUnzip, 16, 1, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x2, 16, 3, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x3, 16, 3, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x4, 16, 3, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndReplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ld1r, INS_ld1r, INS_invalid, INS_ld1r}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndReplicateToVector128x2, 16, 1, {INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndReplicateToVector128x3, 16, 1, {INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndReplicateToVector128x4, 16, 1, {INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairScalarVector64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ldp, INS_ldp, INS_invalid, INS_invalid, INS_ldp, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairScalarVector64NonTemporal, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ldnp, INS_ldnp, INS_invalid, INS_invalid, INS_ldnp, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector128, 16, 1, {INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector128NonTemporal, 16, 1, {INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector64, 8, 1, {INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector64NonTemporal, 8, 1, {INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Max, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmax}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxAcross, -1, 1, {INS_smaxv, INS_umaxv, INS_smaxv, INS_umaxv, INS_smaxv, INS_umaxv, INS_invalid, INS_invalid, INS_fmaxv, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnm}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxNumberAcross, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnmv, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxNumberPairwise, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnmp, INS_fmaxnmp}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxNumberPairwiseScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnmp, INS_fmaxnmp}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxPairwise, 16, 2, {INS_smaxp, INS_umaxp, INS_smaxp, INS_umaxp, INS_smaxp, INS_umaxp, INS_invalid, INS_invalid, INS_fmaxp, INS_fmaxp}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxPairwiseScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxp, INS_fmaxp}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmax, INS_fmax}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Min, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmin}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MinAcross, -1, 1, {INS_sminv, INS_uminv, INS_sminv, INS_uminv, INS_sminv, INS_uminv, INS_invalid, INS_invalid, INS_fminv, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MinNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnm}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MinNumberAcross, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnmv, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MinNumberPairwise, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnmp, INS_fminnmp}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MinNumberPairwiseScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnmp, INS_fminnmp}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MinPairwise, 16, 2, {INS_sminp, INS_uminp, INS_sminp, INS_uminp, INS_sminp, INS_uminp, INS_invalid, INS_invalid, INS_fminp, INS_fminp}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MinPairwiseScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminp, INS_fminp}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MinScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmin, INS_fmin}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyByScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyBySelectedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingSaturateHighScalar, 8, 2, {INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingScalarBySelectedScalarSaturateHigh, 8, 3, {INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningAndAddSaturateScalar, 8, 3, {INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningAndSubtractSaturateScalar, 8, 3, {INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningSaturateScalar, 8, 2, {INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningSaturateScalarBySelectedScalar, 8, 3, {INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningScalarBySelectedScalarAndAddSaturate, 8, 4, {INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningScalarBySelectedScalarAndSubtractSaturate, 8, 4, {INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtended, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx, INS_fmulx}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtendedByScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx}, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtendedBySelectedScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx, INS_fmulx}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtendedScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx, INS_fmulx}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtendedScalarBySelectedScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx, INS_fmulx}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyRoundedDoublingSaturateHighScalar, 8, 2, {INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyRoundedDoublingScalarBySelectedScalarSaturateHigh, 8, 3, {INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyScalarBySelectedScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Negate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_neg, INS_invalid, INS_invalid, INS_fneg}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, NegateSaturate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqneg, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, NegateSaturateScalar, 8, 1, {INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, NegateScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_neg, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalEstimate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecpe}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalEstimateScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecpe, INS_frecpe}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalExponentScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecpx, INS_frecpx}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalSquareRootEstimate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrte}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalSquareRootEstimateScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrte, INS_frsqrte}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalSquareRootStep, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrts}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalSquareRootStepScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrts, INS_frsqrts}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalStep, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecps}, HW_Category_SIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalStepScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecps, INS_frecps}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ReverseElementBits, -1, 1, {INS_rbit, INS_rbit, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundAwayFromZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frinta}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundToNearest, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintn}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintz}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftArithmeticRoundedSaturateScalar, 8, 2, {INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftArithmeticSaturateScalar, 8, 2, {INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftLeftLogicalSaturateScalar, 8, 2, {INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftLeftLogicalSaturateUnsignedScalar, 8, 2, {INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftLogicalRoundedSaturateScalar, 8, 2, {INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftLogicalSaturateScalar, 8, 2, {INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightArithmeticNarrowingSaturateScalar, 8, 2, {INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightArithmeticNarrowingSaturateUnsignedScalar, 8, 2, {INS_invalid, INS_sqshrun, INS_invalid, INS_sqshrun, INS_invalid, INS_sqshrun, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightArithmeticRoundedNarrowingSaturateScalar, 8, 2, {INS_sqrshrn, INS_invalid, INS_sqrshrn, INS_invalid, INS_sqrshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedScalar, 8, 2, {INS_invalid, INS_sqrshrun, INS_invalid, INS_sqrshrun, INS_invalid, INS_sqrshrun, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightLogicalNarrowingSaturateScalar, 8, 2, {INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightLogicalRoundedNarrowingSaturateScalar, 8, 2, {INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Sqrt, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Store, 16, 2, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePair, -1, 3, {INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stp, INS_stp, INS_invalid, INS_invalid, INS_stp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVectorAndZip, 16, 2, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, Subtract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsub}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, SubtractSaturateScalar, 8, 2, {INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, -1, 2, {INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, -1, 2, {INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) -#define LAST_NI_AdvSimd_Arm64 NI_AdvSimd_Arm64_ZipLow +#define FIRST_NI_AdvSimd_Arm64 NI_AdvSimd_Arm64_Abs +HARDWARE_INTRINSIC(AdvSimd_Arm64, Abs, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_abs, INS_invalid, INS_invalid, INS_fabs, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsSaturate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqabs, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsSaturateScalar, 8, 1, INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_sqabs, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_abs, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareGreaterThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareGreaterThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareGreaterThanOrEqualScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, INS_facge, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareGreaterThanScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, INS_facgt, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareLessThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareLessThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareLessThanOrEqualScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facge, INS_facge, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteCompareLessThanScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_facgt, INS_facgt, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteDifference, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fabd, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AbsoluteDifferenceScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fabd, INS_fabd, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Add, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fadd, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AddAcross, -1, 1, INS_addv, INS_addv, INS_addv, INS_addv, INS_addv, INS_addv, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AddAcrossWidening, -1, 1, INS_saddlv, INS_uaddlv, INS_saddlv, INS_uaddlv, INS_saddlv, INS_uaddlv, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwise, 16, 2, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_addp, INS_faddp, INS_faddp, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwiseScalar, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addp, INS_addp, INS_faddp, INS_faddp, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturate, -1, 2, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturateScalar, 8, 2, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Ceiling, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt, -1, -1, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge, -1, -1, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqualScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareTest, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmtst, INS_cmtst, INS_invalid, INS_cmtst, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareTestScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmtst, INS_cmtst, INS_invalid, INS_cmtst, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToDouble, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_scvtf, INS_ucvtf, INS_fcvtl, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToDoubleScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_scvtf, INS_ucvtf, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToDoubleUpper, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtl2, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundAwayFromZero, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtas, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundAwayFromZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtas, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToEven, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtns, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToEvenScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtns, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToNegativeInfinity, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtms, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToNegativeInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtms, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToPositiveInfinity, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtps, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToPositiveInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtps, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToZero, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzs, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToInt64RoundToZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzs, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToSingleLower, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtn, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToSingleRoundToOddLower, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtxn, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToSingleRoundToOddUpper, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtxn2, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToSingleUpper, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtn2, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundAwayFromZero, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtau, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundAwayFromZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtau, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToEven, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtnu, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToEvenScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtnu, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToNegativeInfinity, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtmu, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToNegativeInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtmu, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToPositiveInfinity, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtpu, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToPositiveInfinityScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtpu, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToZero, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzu, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ConvertToUInt64RoundToZeroScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fcvtzu, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Divide, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fdiv, INS_fdiv, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateSelectedScalarToVector128, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dup, INS_dup, INS_invalid, INS_dup, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateToVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dup, INS_dup, INS_invalid, INS_dup, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateToVector64, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_fmov, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ExtractNarrowingSaturateScalar, 8, 1, INS_sqxtn, INS_uqxtn, INS_sqxtn, INS_uqxtn, INS_sqxtn, INS_uqxtn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ExtractNarrowingSaturateUnsignedScalar, 8, 1, INS_invalid, INS_sqxtun, INS_invalid, INS_sqxtun, INS_invalid, INS_sqxtun, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Floor, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAdd, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAddByScalar, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_fmla, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAddBySelectedScalar, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_fmla, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAddScalarBySelectedScalar, 8, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_fmla, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtract, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtractByScalar, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls, INS_fmls, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtractBySelectedScalar, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls, INS_fmls, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtractScalarBySelectedScalar, 8, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls, INS_fmls, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, InsertSelectedScalar, -1, 4, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, -1, -1, HW_Category_SIMD, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_NoJmpTableIMM|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Load2xVector128, 16, 1, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Load2xVector128AndUnzip, 16, 1, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Load3xVector128, 16, 1, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Load3xVector128AndUnzip, 16, 1, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Load4xVector128, 16, 1, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Load4xVector128AndUnzip, 16, 1, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalar, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x2, 16, 3, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, -1, -1, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x3, 16, 3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, -1, -1, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x4, 16, 3, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, -1, -1, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndReplicateToVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ld1r, INS_ld1r, INS_invalid, INS_ld1r, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndReplicateToVector128x2, 16, 1, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndReplicateToVector128x3, 16, 1, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndReplicateToVector128x4, 16, 1, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairScalarVector64, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ldp, INS_ldp, INS_invalid, INS_invalid, INS_ldp, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairScalarVector64NonTemporal, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ldnp, INS_ldnp, INS_invalid, INS_invalid, INS_ldnp, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector128, 16, 1, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector128NonTemporal, 16, 1, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector64, 8, 1, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector64NonTemporal, 8, 1, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, -1, -1, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Max, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmax, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxAcross, -1, 1, INS_smaxv, INS_umaxv, INS_smaxv, INS_umaxv, INS_smaxv, INS_umaxv, INS_invalid, INS_invalid, INS_fmaxv, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxNumber, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnm, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxNumberAcross, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnmv, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxNumberPairwise, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnmp, INS_fmaxnmp, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxNumberPairwiseScalar, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnmp, INS_fmaxnmp, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxPairwise, 16, 2, INS_smaxp, INS_umaxp, INS_smaxp, INS_umaxp, INS_smaxp, INS_umaxp, INS_invalid, INS_invalid, INS_fmaxp, INS_fmaxp, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxPairwiseScalar, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxp, INS_fmaxp, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmax, INS_fmax, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Min, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmin, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MinAcross, -1, 1, INS_sminv, INS_uminv, INS_sminv, INS_uminv, INS_sminv, INS_uminv, INS_invalid, INS_invalid, INS_fminv, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MinNumber, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnm, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MinNumberAcross, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnmv, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MinNumberPairwise, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnmp, INS_fminnmp, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MinNumberPairwiseScalar, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminnmp, INS_fminnmp, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MinPairwise, 16, 2, INS_sminp, INS_uminp, INS_sminp, INS_uminp, INS_sminp, INS_uminp, INS_invalid, INS_invalid, INS_fminp, INS_fminp, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MinPairwiseScalar, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fminp, INS_fminp, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MinScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmin, INS_fmin, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Multiply, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyByScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyBySelectedScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingSaturateHighScalar, 8, 2, INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingScalarBySelectedScalarSaturateHigh, 8, 3, INS_invalid, INS_invalid, INS_sqdmulh, INS_invalid, INS_sqdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningAndAddSaturateScalar, 8, 3, INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningAndSubtractSaturateScalar, 8, 3, INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningSaturateScalar, 8, 2, INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningSaturateScalarBySelectedScalar, 8, 3, INS_invalid, INS_invalid, INS_sqdmull, INS_invalid, INS_sqdmull, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningScalarBySelectedScalarAndAddSaturate, 8, 4, INS_invalid, INS_invalid, INS_sqdmlal, INS_invalid, INS_sqdmlal, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyDoublingWideningScalarBySelectedScalarAndSubtractSaturate, 8, 4, INS_invalid, INS_invalid, INS_sqdmlsl, INS_invalid, INS_sqdmlsl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtended, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx, INS_fmulx, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtendedByScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtendedBySelectedScalar, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx, INS_fmulx, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtendedScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx, INS_fmulx, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyExtendedScalarBySelectedScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmulx, INS_fmulx, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyRoundedDoublingSaturateHighScalar, 8, 2, INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyRoundedDoublingScalarBySelectedScalarSaturateHigh, 8, 3, INS_invalid, INS_invalid, INS_sqrdmulh, INS_invalid, INS_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, MultiplyScalarBySelectedScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmul, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Negate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_neg, INS_invalid, INS_invalid, INS_fneg, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, NegateSaturate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqneg, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, NegateSaturateScalar, 8, 1, INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_sqneg, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, NegateScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_neg, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalEstimate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecpe, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalEstimateScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecpe, INS_frecpe, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalExponentScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecpx, INS_frecpx, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalSquareRootEstimate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrte, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalSquareRootEstimateScalar, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrte, INS_frsqrte, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalSquareRootStep, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrts, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalSquareRootStepScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frsqrts, INS_frsqrts, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalStep, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecps, -1, -1, HW_Category_SIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReciprocalStepScalar, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frecps, INS_frecps, -1, -1, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ReverseElementBits, -1, 1, INS_rbit, INS_rbit, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundAwayFromZero, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frinta, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundToNearest, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintn, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundToNegativeInfinity, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundToPositiveInfinity, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, RoundToZero, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintz, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftArithmeticRoundedSaturateScalar, 8, 2, INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_sqrshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftArithmeticSaturateScalar, 8, 2, INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_sqshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftLeftLogicalSaturateScalar, 8, 2, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_sqshl, INS_uqshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftLeftLogicalSaturateUnsignedScalar, 8, 2, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_sqshlu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftLogicalRoundedSaturateScalar, 8, 2, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_uqrshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftLogicalSaturateScalar, 8, 2, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_uqshl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightArithmeticNarrowingSaturateScalar, 8, 2, INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_sqshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightArithmeticNarrowingSaturateUnsignedScalar, 8, 2, INS_invalid, INS_sqshrun, INS_invalid, INS_sqshrun, INS_invalid, INS_sqshrun, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightArithmeticRoundedNarrowingSaturateScalar, 8, 2, INS_sqrshrn, INS_invalid, INS_sqrshrn, INS_invalid, INS_sqrshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedScalar, 8, 2, INS_invalid, INS_sqrshrun, INS_invalid, INS_sqrshrun, INS_invalid, INS_sqrshrun, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightLogicalNarrowingSaturateScalar, 8, 2, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_uqshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ShiftRightLogicalRoundedNarrowingSaturateScalar, 8, 2, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_uqrshrn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Sqrt, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Store, 16, 2, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePair, -1, 3, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, INS_stp, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stp, INS_stp, INS_invalid, INS_invalid, INS_stp, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVectorAndZip, 16, 2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_BaseTypeFromValueTupleArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, Subtract, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsub, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, SubtractSaturateScalar, 8, 2, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, -1, 2, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, -1, 2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +#define LAST_NI_AdvSimd_Arm64 NI_AdvSimd_Arm64_ZipLow // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AES Intrinsics -#define FIRST_NI_Aes NI_Aes_Decrypt -HARDWARE_INTRINSIC(Aes, Decrypt, 16, 2, {INS_invalid, INS_aesd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Aes, Encrypt, 16, 2, {INS_invalid, INS_aese, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Aes, InverseMixColumns, 16, 1, {INS_invalid, INS_aesimc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(Aes, MixColumns, 16, 1, {INS_invalid, INS_aesmc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(Aes, PolynomialMultiplyWideningLower, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmull, INS_pmull, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Aes, PolynomialMultiplyWideningUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmull2, INS_pmull2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -#define LAST_NI_Aes NI_Aes_PolynomialMultiplyWideningUpper +#define FIRST_NI_Aes NI_Aes_Decrypt +HARDWARE_INTRINSIC(Aes, Decrypt, 16, 2, INS_invalid, INS_aesd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Aes, Encrypt, 16, 2, INS_invalid, INS_aese, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Aes, InverseMixColumns, 16, 1, INS_invalid, INS_aesimc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(Aes, MixColumns, 16, 1, INS_invalid, INS_aesmc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(Aes, PolynomialMultiplyWideningLower, 8, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmull, INS_pmull, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Aes, PolynomialMultiplyWideningUpper, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmull2, INS_pmull2, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +#define LAST_NI_Aes NI_Aes_PolynomialMultiplyWideningUpper // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Base Intrinsics -#define FIRST_NI_ArmBase NI_ArmBase_LeadingZeroCount -HARDWARE_INTRINSIC(ArmBase, LeadingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_clz, INS_clz, INS_clz, INS_clz, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(ArmBase, ReverseElementBits, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rbit, INS_rbit, INS_rbit, INS_rbit, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(ArmBase, Yield, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_SpecialSideEffect_Other) -#define LAST_NI_ArmBase NI_ArmBase_Yield +#define FIRST_NI_ArmBase NI_ArmBase_LeadingZeroCount +HARDWARE_INTRINSIC(ArmBase, LeadingZeroCount, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_clz, INS_clz, INS_clz, INS_clz, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(ArmBase, ReverseElementBits, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rbit, INS_rbit, INS_rbit, INS_rbit, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(ArmBase, Yield, 0, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_SpecialSideEffect_Other) +#define LAST_NI_ArmBase NI_ArmBase_Yield // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Base 64-bit only Intrinsics -#define FIRST_NI_ArmBase_Arm64 NI_ArmBase_Arm64_LeadingSignCount -HARDWARE_INTRINSIC(ArmBase_Arm64, LeadingSignCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cls, INS_invalid, INS_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(ArmBase_Arm64, LeadingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_clz, INS_clz, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyHigh, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smulh, INS_umulh, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyLongAdd, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smaddl, INS_umaddl, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyLongNeg, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smnegl, INS_umnegl, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyLongSub, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smsubl, INS_umsubl, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(ArmBase_Arm64, ReverseElementBits, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rbit, INS_rbit, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -#define LAST_NI_ArmBase_Arm64 NI_ArmBase_Arm64_ReverseElementBits +#define FIRST_NI_ArmBase_Arm64 NI_ArmBase_Arm64_LeadingSignCount +HARDWARE_INTRINSIC(ArmBase_Arm64, LeadingSignCount, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cls, INS_invalid, INS_cls, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(ArmBase_Arm64, LeadingZeroCount, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_clz, INS_clz, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyHigh, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smulh, INS_umulh, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyLongAdd, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smaddl, INS_umaddl, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyLongNeg, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smnegl, INS_umnegl, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(ArmBase_Arm64, MultiplyLongSub, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_smsubl, INS_umsubl, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(ArmBase_Arm64, ReverseElementBits, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rbit, INS_rbit, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +#define LAST_NI_ArmBase_Arm64 NI_ArmBase_Arm64_ReverseElementBits // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // CRC32 Intrinsics -#define FIRST_NI_Crc32 NI_Crc32_ComputeCrc32 -HARDWARE_INTRINSIC(Crc32, ComputeCrc32, 0, 2, {INS_invalid, INS_crc32b, INS_invalid, INS_crc32h, INS_invalid, INS_crc32w, INS_invalid, INS_crc32x, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Crc32, ComputeCrc32C, 0, 2, {INS_invalid, INS_crc32cb, INS_invalid, INS_crc32ch, INS_invalid, INS_crc32cw, INS_invalid, INS_crc32cx, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) -#define LAST_NI_Crc32 NI_Crc32_ComputeCrc32C +#define FIRST_NI_Crc32 NI_Crc32_ComputeCrc32 +HARDWARE_INTRINSIC(Crc32, ComputeCrc32, 0, 2, INS_invalid, INS_crc32b, INS_invalid, INS_crc32h, INS_invalid, INS_crc32w, INS_invalid, INS_crc32x, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Crc32, ComputeCrc32C, 0, 2, INS_invalid, INS_crc32cb, INS_invalid, INS_crc32ch, INS_invalid, INS_crc32cw, INS_invalid, INS_crc32cx, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +#define LAST_NI_Crc32 NI_Crc32_ComputeCrc32C // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // CRC32 64-bit only Intrinsics -#define FIRST_NI_Crc32_Arm64 NI_Crc32_Arm64_ComputeCrc32 -HARDWARE_INTRINSIC(Crc32_Arm64, ComputeCrc32, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32x, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Crc32_Arm64, ComputeCrc32C, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32cx, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) -#define LAST_NI_Crc32_Arm64 NI_Crc32_Arm64_ComputeCrc32C +#define FIRST_NI_Crc32_Arm64 NI_Crc32_Arm64_ComputeCrc32 +HARDWARE_INTRINSIC(Crc32_Arm64, ComputeCrc32, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32x, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Crc32_Arm64, ComputeCrc32C, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32cx, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +#define LAST_NI_Crc32_Arm64 NI_Crc32_Arm64_ComputeCrc32C // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // DP Intrinsics -#define FIRST_NI_Dp NI_Dp_DotProduct -HARDWARE_INTRINSIC(Dp, DotProduct, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sdot, INS_udot, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Dp, DotProductBySelectedQuadruplet, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sdot, INS_udot, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -#define LAST_NI_Dp NI_Dp_DotProductBySelectedQuadruplet +#define FIRST_NI_Dp NI_Dp_DotProduct +HARDWARE_INTRINSIC(Dp, DotProduct, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sdot, INS_udot, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Dp, DotProductBySelectedQuadruplet, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sdot, INS_udot, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +#define LAST_NI_Dp NI_Dp_DotProductBySelectedQuadruplet // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // RDM Intrinsics -#define FIRST_NI_Rdm NI_Rdm_MultiplyRoundedDoublingAndAddSaturateHigh -HARDWARE_INTRINSIC(Rdm, MultiplyRoundedDoublingAndAddSaturateHigh, -1, 3, {INS_invalid, INS_invalid, INS_sqrdmlah, INS_invalid, INS_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Rdm, MultiplyRoundedDoublingAndSubtractSaturateHigh, -1, 3, {INS_invalid, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Rdm, MultiplyRoundedDoublingBySelectedScalarAndAddSaturateHigh, -1, 4, {INS_invalid, INS_invalid, INS_sqrdmlah, INS_invalid, INS_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Rdm, MultiplyRoundedDoublingBySelectedScalarAndSubtractSaturateHigh, -1, 4, {INS_invalid, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -#define LAST_NI_Rdm NI_Rdm_MultiplyRoundedDoublingBySelectedScalarAndSubtractSaturateHigh +#define FIRST_NI_Rdm NI_Rdm_MultiplyRoundedDoublingAndAddSaturateHigh +HARDWARE_INTRINSIC(Rdm, MultiplyRoundedDoublingAndAddSaturateHigh, -1, 3, INS_invalid, INS_invalid, INS_sqrdmlah, INS_invalid, INS_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Rdm, MultiplyRoundedDoublingAndSubtractSaturateHigh, -1, 3, INS_invalid, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Rdm, MultiplyRoundedDoublingBySelectedScalarAndAddSaturateHigh, -1, 4, INS_invalid, INS_invalid, INS_sqrdmlah, INS_invalid, INS_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Rdm, MultiplyRoundedDoublingBySelectedScalarAndSubtractSaturateHigh, -1, 4, INS_invalid, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +#define LAST_NI_Rdm NI_Rdm_MultiplyRoundedDoublingBySelectedScalarAndSubtractSaturateHigh // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // RDM 64-bit only Intrinsics -#define FIRST_NI_Rdm_Arm64 NI_Rdm_Arm64_MultiplyRoundedDoublingAndAddSaturateHighScalar -HARDWARE_INTRINSIC(Rdm_Arm64, MultiplyRoundedDoublingAndAddSaturateHighScalar, 8, 3, {INS_invalid, INS_invalid, INS_sqrdmlah, INS_invalid, INS_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(Rdm_Arm64, MultiplyRoundedDoublingAndSubtractSaturateHighScalar, 8, 3, {INS_invalid, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(Rdm_Arm64, MultiplyRoundedDoublingScalarBySelectedScalarAndAddSaturateHigh, 8, 4, {INS_invalid, INS_invalid, INS_sqrdmlah, INS_invalid, INS_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(Rdm_Arm64, MultiplyRoundedDoublingScalarBySelectedScalarAndSubtractSaturateHigh, 8, 4, {INS_invalid, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) -#define LAST_NI_Rdm_Arm64 NI_Rdm_Arm64_MultiplyRoundedDoublingScalarBySelectedScalarAndSubtractSaturateHigh +#define FIRST_NI_Rdm_Arm64 NI_Rdm_Arm64_MultiplyRoundedDoublingAndAddSaturateHighScalar +HARDWARE_INTRINSIC(Rdm_Arm64, MultiplyRoundedDoublingAndAddSaturateHighScalar, 8, 3, INS_invalid, INS_invalid, INS_sqrdmlah, INS_invalid, INS_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(Rdm_Arm64, MultiplyRoundedDoublingAndSubtractSaturateHighScalar, 8, 3, INS_invalid, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(Rdm_Arm64, MultiplyRoundedDoublingScalarBySelectedScalarAndAddSaturateHigh, 8, 4, INS_invalid, INS_invalid, INS_sqrdmlah, INS_invalid, INS_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(Rdm_Arm64, MultiplyRoundedDoublingScalarBySelectedScalarAndSubtractSaturateHigh, 8, 4, INS_invalid, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar) +#define LAST_NI_Rdm_Arm64 NI_Rdm_Arm64_MultiplyRoundedDoublingScalarBySelectedScalarAndSubtractSaturateHigh // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SHA1 Intrinsics -#define FIRST_NI_Sha1 NI_Sha1_FixedRotate -HARDWARE_INTRINSIC(Sha1, FixedRotate, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(Sha1, HashUpdateChoose, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1c, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sha1, HashUpdateMajority, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1m, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sha1, HashUpdateParity, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1p, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sha1, ScheduleUpdate0, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1su0, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sha1, ScheduleUpdate1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1su1, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -#define LAST_NI_Sha1 NI_Sha1_ScheduleUpdate1 +#define FIRST_NI_Sha1 NI_Sha1_FixedRotate +HARDWARE_INTRINSIC(Sha1, FixedRotate, 8, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(Sha1, HashUpdateChoose, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1c, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sha1, HashUpdateMajority, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1m, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sha1, HashUpdateParity, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1p, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sha1, ScheduleUpdate0, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1su0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sha1, ScheduleUpdate1, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha1su1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +#define LAST_NI_Sha1 NI_Sha1_ScheduleUpdate1 // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SHA256 Intrinsics -#define FIRST_NI_Sha256 NI_Sha256_HashUpdate1 -HARDWARE_INTRINSIC(Sha256, HashUpdate1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256h, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sha256, HashUpdate2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256h2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sha256, ScheduleUpdate0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256su0, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sha256, ScheduleUpdate1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256su1, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) -#define LAST_NI_Sha256 NI_Sha256_ScheduleUpdate1 +#define FIRST_NI_Sha256 NI_Sha256_HashUpdate1 +HARDWARE_INTRINSIC(Sha256, HashUpdate1, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sha256, HashUpdate2, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256h2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sha256, ScheduleUpdate0, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256su0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sha256, ScheduleUpdate1, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256su1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +#define LAST_NI_Sha256 NI_Sha256_ScheduleUpdate1 // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Sha3 -#define FIRST_NI_Sha3 NI_Sha3_BitwiseClearXor -HARDWARE_INTRINSIC(Sha3, BitwiseClearXor, 16, 3, {INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sha3, BitwiseRotateLeftBy1AndXor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rax1, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(Sha3, Xor, 16, 3, {INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sha3, XorRotateRight, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xar, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasImmediateOperand) -#define LAST_NI_Sha3 NI_Sha3_XorRotateRight +// SHA3 Intrinsics +#define FIRST_NI_Sha3 NI_Sha3_BitwiseClearXor +HARDWARE_INTRINSIC(Sha3, BitwiseClearXor, 16, 3, INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_bcax, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sha3, BitwiseRotateLeftBy1AndXor, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rax1, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(Sha3, Xor, 16, 3, INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_eor3, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sha3, XorRotateRight, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xar, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_HasImmediateOperand) +#define LAST_NI_Sha3 NI_Sha3_XorRotateRight #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index c473a0edd9e0df..81ccf47a89f8bf 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -11,580 +11,572 @@ #ifdef FEATURE_HW_INTRINSICS // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SVE Intrinsics -#define FIRST_NI_Sve NI_Sve_Abs -HARDWARE_INTRINSIC(Sve, Abs, -1, -1, {INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_fabs, INS_sve_fabs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, AbsoluteCompareGreaterThan, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facgt, INS_sve_facgt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, AbsoluteCompareGreaterThanOrEqual, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facge, INS_sve_facge}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, AbsoluteCompareLessThan, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_faclt, INS_sve_faclt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, AbsoluteCompareLessThanOrEqual, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facle, INS_sve_facle}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, AbsoluteDifference, -1, -1, {INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_fabd, INS_sve_fabd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Add, -1, -1, {INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_fadd, INS_sve_fadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, AddAcross, -1, 1, {INS_sve_saddv, INS_sve_uaddv, INS_sve_saddv, INS_sve_uaddv, INS_sve_saddv, INS_sve_uaddv, INS_sve_uaddv, INS_sve_uaddv, INS_sve_faddv, INS_sve_faddv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, AddRotateComplex, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcadd, INS_sve_fcadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve, AddSaturate, -1, 2, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, AddSequentialAcross, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fadda, INS_sve_fadda}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, And, -1, -1, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, AndAcross, -1, -1, {INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, BooleanNot, -1, -1, {INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Compact, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, CompareEqual, -1, -1, {INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_fcmeq, INS_sve_fcmeq}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CompareGreaterThan, -1, -1, {INS_sve_cmpgt, INS_sve_cmphi, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_fcmgt, INS_sve_fcmgt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CompareGreaterThanOrEqual, -1, -1, {INS_sve_cmpge, INS_sve_cmphs, INS_sve_cmpge, INS_sve_cmphs, INS_sve_cmpge, INS_sve_cmphs, INS_sve_cmpge, INS_sve_cmphs, INS_sve_fcmge, INS_sve_fcmge}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CompareLessThan, -1, -1, {INS_sve_cmplt, INS_sve_cmplo, INS_sve_cmplt, INS_sve_cmplo, INS_sve_cmplt, INS_sve_cmplo, INS_sve_cmplt, INS_sve_cmplo, INS_sve_fcmlt, INS_sve_fcmlt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CompareLessThanOrEqual, -1, -1, {INS_sve_cmple, INS_sve_cmpls, INS_sve_cmple, INS_sve_cmpls, INS_sve_cmple, INS_sve_cmpls, INS_sve_cmple, INS_sve_cmpls, INS_sve_fcmle, INS_sve_fcmle}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CompareNotEqualTo, -1, -1, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_fcmne, INS_sve_fcmne}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CompareUnordered, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmuo, INS_sve_fcmuo}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, Compute16BitAddresses, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Compute32BitAddresses, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Compute64BitAddresses, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Compute8BitAddresses, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElement, -1, 3, {INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementAndReplicate, -1, 3, {INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElement, -1, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementAndReplicate, -1, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, ConvertToDouble, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_fcvt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ConvertToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ConvertToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ConvertToSingle, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_scvtf, INS_sve_ucvtf, INS_invalid, INS_sve_fcvt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ConvertToUInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzu, INS_sve_fcvtzu}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ConvertToUInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzu, INS_sve_fcvtzu}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Count16BitElements, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cnth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(Sve, Count32BitElements, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(Sve, Count64BitElements, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(Sve, Count8BitElements, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(Sve, CreateBreakAfterMask, -1, 2, {INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, CreateBreakAfterPropagateMask, -1, 3, {INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CreateBreakBeforeMask, -1, 2, {INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, CreateBreakBeforePropagateMask, -1, 3, {INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CreateBreakPropagateMask, -1, -1, {INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskByte, -1, 0, {INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskDouble, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt16, -1, 0, {INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskSByte, -1, 0, {INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskSingle, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateMaskForFirstActiveElement, -1, 2, {INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, CreateMaskForNextActiveElement, -1, 2, {INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskByte, -1, 2, {INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskDouble, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskInt16, -1, 2, {INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskSByte, -1, 2, {INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskSingle, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskUInt16, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskByte, -1, 2, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskDouble, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskInt16, -1, 2, {INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskSByte, -1, 2, {INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskSingle, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskUInt16, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, Divide, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve, DuplicateSelectedScalarToVector, -1, 2, {INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve, ExtractAfterLastActiveElement, -1, 2, {INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ExtractAfterLastActiveElementScalar, 0, 2, {INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, ExtractLastActiveElement, -1, 2, {INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ExtractLastActiveElementScalar, 0, 2, {INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, ExtractVector, -1, 3, {INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, FloatingPointExponentialAccelerator, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fexpa, INS_invalid, INS_sve_fexpa, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, GatherPrefetch16Bit, -1, -1, {INS_invalid, INS_invalid, INS_sve_prfh, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherPrefetch32Bit, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherPrefetch64Bit, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherPrefetch8Bit, -1, -1, {INS_sve_prfb, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVector, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorByteZeroExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorByteZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorInt16SignExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorInt16SignExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorInt16WithByteOffsetsSignExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorInt16WithByteOffsetsSignExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorInt32SignExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorInt32SignExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sw, INS_sve_ldff1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorInt32WithByteOffsetsSignExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorInt32WithByteOffsetsSignExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sw, INS_sve_ldff1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorSByteSignExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_sve_ld1sb, INS_sve_ld1sb, INS_sve_ld1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorSByteSignExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorUInt16WithByteOffsetsZeroExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorUInt16WithByteOffsetsZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorUInt16ZeroExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorUInt16ZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtend, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorWithByteOffsetFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GatherVectorWithByteOffsets, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, {INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation) -HARDWARE_INTRINSIC(Sve, GetFfrByte, -1, 0, {INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GetFfrDouble, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GetFfrInt16, -1, 0, {INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GetFfrInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GetFfrInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GetFfrSByte, -1, 0, {INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GetFfrSingle, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GetFfrUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GetFfrUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, GetFfrUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, InsertIntoShiftedVector, -1, 2, {INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Load2xVectorAndUnzip, -1, 2, {INS_sve_ld2b, INS_sve_ld2b, INS_sve_ld2h, INS_sve_ld2h, INS_sve_ld2w, INS_sve_ld2w, INS_sve_ld2d, INS_sve_ld2d, INS_sve_ld2w, INS_sve_ld2d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, Load3xVectorAndUnzip, -1, 2, {INS_sve_ld3b, INS_sve_ld3b, INS_sve_ld3h, INS_sve_ld3h, INS_sve_ld3w, INS_sve_ld3w, INS_sve_ld3d, INS_sve_ld3d, INS_sve_ld3w, INS_sve_ld3d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, Load4xVectorAndUnzip, -1, 2, {INS_sve_ld4b, INS_sve_ld4b, INS_sve_ld4h, INS_sve_ld4h, INS_sve_ld4w, INS_sve_ld4w, INS_sve_ld4d, INS_sve_ld4d, INS_sve_ld4w, INS_sve_ld4d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVector128AndReplicateToVector, -1, 2, {INS_sve_ld1rqb, INS_sve_ld1rqb, INS_sve_ld1rqh, INS_sve_ld1rqh, INS_sve_ld1rqw, INS_sve_ld1rqw, INS_sve_ld1rqd, INS_sve_ld1rqd, INS_sve_ld1rqw, INS_sve_ld1rqd}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt16, -1, -1, {INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToUInt16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToUInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToUInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt16, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorFirstFaulting, -1, -1, {INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorInt32NonFaultingSignExtendToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorInt32NonFaultingSignExtendToUInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sw, INS_sve_ldff1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorNonFaulting, -1, -1, {INS_sve_ldnf1b, INS_sve_ldnf1b, INS_sve_ldnf1h, INS_sve_ldnf1h, INS_sve_ldnf1w, INS_sve_ldnf1w, INS_sve_ldnf1d, INS_sve_ldnf1d, INS_sve_ldnf1w, INS_sve_ldnf1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorNonTemporal, -1, 2, {INS_sve_ldnt1b, INS_sve_ldnt1b, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_sve_ldnt1d, INS_sve_ldnt1d, INS_sve_ldnt1w, INS_sve_ldnt1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToInt16, -1, -1, {INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToUInt16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToUInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToUInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt16, -1, 2, {INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt16, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToUInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToUInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt32NonFaultingZeroExtendToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt32NonFaultingZeroExtendToUInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, Max, -1, -1, {INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_fmax, INS_sve_fmax}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, MaxAcross, -1, -1, {INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_fmaxv, INS_sve_fmaxv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, MaxNumber, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnm, INS_sve_fmaxnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, MaxNumberAcross, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnmv, INS_sve_fmaxnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, Min, -1, -1, {INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_fmin, INS_sve_fmin}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, MinAcross, -1, -1, {INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_fminv, INS_sve_fminv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, MinNumber, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnm, INS_sve_fminnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplexBySelectedScalar, -1, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Negate, -1, -1, {INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_fneg, INS_sve_fneg}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Not, -1, -1, {INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) -HARDWARE_INTRINSIC(Sve, Or, -1, -1, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Prefetch16Bit, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, Prefetch32Bit, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, Prefetch64Bit, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, Prefetch8Bit, -1, 3, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, ReciprocalEstimate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecpe, INS_sve_frecpe}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, ReciprocalExponent, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecpx, INS_sve_frecpx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ReciprocalSqrtEstimate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frsqrte, INS_sve_frsqrte}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, ReciprocalSqrtStep, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frsqrts, INS_sve_frsqrts}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, ReciprocalStep, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecps, INS_sve_frecps}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, ReverseBits, -1, -1, {INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ReverseElement8, -1, -1, {INS_invalid, INS_invalid, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, RoundAwayFromZero, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frinta, INS_sve_frinta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, RoundToNearest, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frintn, INS_sve_frintn}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, RoundToNegativeInfinity, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frintm, INS_sve_frintm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, RoundToPositiveInfinity, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frintp, INS_sve_frintp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, RoundToZero, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frintz, INS_sve_frintz}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy16BitElementCount, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqdech, INS_sve_uqdech, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy32BitElementCount, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecw, INS_sve_uqdecw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy64BitElementCount, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecd, INS_sve_uqdecd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy8BitElementCount, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecb, INS_sve_uqdecb, INS_sve_sqdecb, INS_sve_uqdecb, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingDecrementByActiveElementCount, -1, 2, {INS_invalid, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy16BitElementCount, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqinch, INS_sve_uqinch, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy32BitElementCount, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincw, INS_sve_uqincw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCount, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincd, INS_sve_uqincd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy8BitElementCount, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincb, INS_sve_uqincb, INS_sve_sqincb, INS_sve_uqincb, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingIncrementByActiveElementCount, -1, 2, {INS_invalid, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, Scale, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fscale, INS_sve_fscale}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, Scatter, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Scatter16BitNarrowing, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Scatter16BitWithByteOffsetsNarrowing, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Scatter32BitNarrowing, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Scatter32BitWithByteOffsetsNarrowing, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Scatter8BitNarrowing, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Scatter8BitWithByteOffsetsNarrowing, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ScatterWithByteOffsets, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, SetFfr, -1, 1, {INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, {INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve, ShiftRightLogical, -1, -1, {INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, Splice, -1, 3, {INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Sqrt, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fsqrt, INS_sve_fsqrt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, StoreNarrowing, -1, 3, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, StoreNonTemporal, -1, 3, {INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, TestAnyTrue, -1, 2, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, TestFirstTrue, -1, 2, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, TestLastTrue, -1, 2, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, TrigonometricMultiplyAddCoefficient, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftmad, INS_sve_ftmad}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, TrigonometricSelectCoefficient, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftssel, INS_sve_ftssel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, TrigonometricStartingValue, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftsmul, INS_sve_ftsmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, VectorTableLookup, -1, 2, {INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, Xor, -1, -1, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, XorAcross, -1, -1, {INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ZeroExtendWideningLower, -1, 1, {INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, ZeroExtendWideningUpper, -1, 1, {INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) -HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) -#define LAST_NI_Sve NI_Sve_ZipLow +#define FIRST_NI_Sve NI_Sve_Abs +HARDWARE_INTRINSIC(Sve, Abs, -1, -1, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_fabs, INS_sve_fabs, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, AbsoluteCompareGreaterThan, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facgt, INS_sve_facgt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, AbsoluteCompareGreaterThanOrEqual, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facge, INS_sve_facge, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, AbsoluteCompareLessThan, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_faclt, INS_sve_faclt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, AbsoluteCompareLessThanOrEqual, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facle, INS_sve_facle, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, AbsoluteDifference, -1, -1, INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_fabd, INS_sve_fabd, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Add, -1, -1, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_fadd, INS_sve_fadd, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, AddAcross, -1, 1, INS_sve_saddv, INS_sve_uaddv, INS_sve_saddv, INS_sve_uaddv, INS_sve_saddv, INS_sve_uaddv, INS_sve_uaddv, INS_sve_uaddv, INS_sve_faddv, INS_sve_faddv, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve, AddRotateComplex, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcadd, INS_sve_fcadd, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve, AddSaturate, -1, 2, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, AddSequentialAcross, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fadda, INS_sve_fadda, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve, And, -1, -1, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, AndAcross, -1, -1, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, BooleanNot, -1, -1, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Compact, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, CompareEqual, -1, -1, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_fcmeq, INS_sve_fcmeq, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, CompareGreaterThan, -1, -1, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_fcmgt, INS_sve_fcmgt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, CompareGreaterThanOrEqual, -1, -1, INS_sve_cmpge, INS_sve_cmphs, INS_sve_cmpge, INS_sve_cmphs, INS_sve_cmpge, INS_sve_cmphs, INS_sve_cmpge, INS_sve_cmphs, INS_sve_fcmge, INS_sve_fcmge, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, CompareLessThan, -1, -1, INS_sve_cmplt, INS_sve_cmplo, INS_sve_cmplt, INS_sve_cmplo, INS_sve_cmplt, INS_sve_cmplo, INS_sve_cmplt, INS_sve_cmplo, INS_sve_fcmlt, INS_sve_fcmlt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, CompareLessThanOrEqual, -1, -1, INS_sve_cmple, INS_sve_cmpls, INS_sve_cmple, INS_sve_cmpls, INS_sve_cmple, INS_sve_cmpls, INS_sve_cmple, INS_sve_cmpls, INS_sve_fcmle, INS_sve_fcmle, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, CompareNotEqualTo, -1, -1, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_fcmne, INS_sve_fcmne, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, CompareUnordered, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmuo, INS_sve_fcmuo, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, Compute16BitAddresses, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Compute32BitAddresses, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Compute64BitAddresses, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Compute8BitAddresses, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElement, -1, 3, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementAndReplicate, -1, 3, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElement, -1, 3, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementAndReplicate, -1, 3, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, ConvertToDouble, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_fcvt, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ConvertToInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ConvertToInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ConvertToSingle, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_scvtf, INS_sve_ucvtf, INS_invalid, INS_sve_fcvt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ConvertToUInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzu, INS_sve_fcvtzu, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ConvertToUInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzu, INS_sve_fcvtzu, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Count16BitElements, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cnth, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(Sve, Count32BitElements, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntw, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(Sve, Count64BitElements, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntd, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(Sve, Count8BitElements, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(Sve, CreateBreakAfterMask, -1, 2, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, INS_sve_brka, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, CreateBreakAfterPropagateMask, -1, 3, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, CreateBreakBeforeMask, -1, 2, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, CreateBreakBeforePropagateMask, -1, 3, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, CreateBreakPropagateMask, -1, -1, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskByte, -1, 0, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskDouble, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt16, -1, 0, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt32, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt64, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskSByte, -1, 0, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskSingle, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt16, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt32, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt64, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateMaskForFirstActiveElement, -1, 2, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, CreateMaskForNextActiveElement, -1, 2, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskByte, -1, 2, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskDouble, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskInt16, -1, 2, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskSByte, -1, 2, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskSingle, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskUInt16, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskByte, -1, 2, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskDouble, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskInt16, -1, 2, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskSByte, -1, 2, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskSingle, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskUInt16, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMaskUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, Divide, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve, DuplicateSelectedScalarToVector, -1, 2, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve, ExtractAfterLastActiveElement, -1, 2, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ExtractAfterLastActiveElementScalar, 0, 2, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, ExtractLastActiveElement, -1, 2, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ExtractLastActiveElementScalar, 0, 2, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, ExtractVector, -1, 3, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FloatingPointExponentialAccelerator, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fexpa, INS_invalid, INS_sve_fexpa, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, GatherPrefetch16Bit, -1, -1, INS_invalid, INS_invalid, INS_sve_prfh, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherPrefetch32Bit, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherPrefetch64Bit, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_sve_prfd, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherPrefetch8Bit, -1, -1, INS_sve_prfb, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVector, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorByteZeroExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorByteZeroExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorInt16SignExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorInt16SignExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorInt16WithByteOffsetsSignExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorInt16WithByteOffsetsSignExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorInt32SignExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_sve_ld1sw, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorInt32SignExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sw, INS_sve_ldff1sw, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorInt32WithByteOffsetsSignExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_sve_ld1sw, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorInt32WithByteOffsetsSignExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sw, INS_sve_ldff1sw, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorSByteSignExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_sve_ld1sb, INS_sve_ld1sb, INS_sve_ld1sb, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorSByteSignExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorUInt16WithByteOffsetsZeroExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorUInt16WithByteOffsetsZeroExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorUInt16ZeroExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorUInt16ZeroExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtend, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorWithByteOffsetFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GatherVectorWithByteOffsets, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation) +HARDWARE_INTRINSIC(Sve, GetFfrByte, -1, 0, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GetFfrDouble, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GetFfrInt16, -1, 0, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GetFfrInt32, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GetFfrInt64, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GetFfrSByte, -1, 0, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GetFfrSingle, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GetFfrUInt16, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GetFfrUInt32, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, GetFfrUInt64, -1, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, InsertIntoShiftedVector, -1, 2, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Load2xVectorAndUnzip, -1, 2, INS_sve_ld2b, INS_sve_ld2b, INS_sve_ld2h, INS_sve_ld2h, INS_sve_ld2w, INS_sve_ld2w, INS_sve_ld2d, INS_sve_ld2d, INS_sve_ld2w, INS_sve_ld2d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, Load3xVectorAndUnzip, -1, 2, INS_sve_ld3b, INS_sve_ld3b, INS_sve_ld3h, INS_sve_ld3h, INS_sve_ld3w, INS_sve_ld3w, INS_sve_ld3d, INS_sve_ld3d, INS_sve_ld3w, INS_sve_ld3d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, Load4xVectorAndUnzip, -1, 2, INS_sve_ld4b, INS_sve_ld4b, INS_sve_ld4h, INS_sve_ld4h, INS_sve_ld4w, INS_sve_ld4w, INS_sve_ld4d, INS_sve_ld4d, INS_sve_ld4w, INS_sve_ld4d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVector128AndReplicateToVector, -1, 2, INS_sve_ld1rqb, INS_sve_ld1rqb, INS_sve_ld1rqh, INS_sve_ld1rqh, INS_sve_ld1rqw, INS_sve_ld1rqw, INS_sve_ld1rqd, INS_sve_ld1rqd, INS_sve_ld1rqw, INS_sve_ld1rqd, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt16, -1, -1, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToUInt16, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToUInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToUInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt16, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorFirstFaulting, -1, -1, INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_sve_ldff1sh, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorInt32NonFaultingSignExtendToInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sw, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorInt32NonFaultingSignExtendToUInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sw, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1sw, INS_sve_ldff1sw, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorNonFaulting, -1, -1, INS_sve_ldnf1b, INS_sve_ldnf1b, INS_sve_ldnf1h, INS_sve_ldnf1h, INS_sve_ldnf1w, INS_sve_ldnf1w, INS_sve_ldnf1d, INS_sve_ldnf1d, INS_sve_ldnf1w, INS_sve_ldnf1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorNonTemporal, -1, 2, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_sve_ldnt1d, INS_sve_ldnt1d, INS_sve_ldnt1w, INS_sve_ldnt1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToInt16, -1, -1, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToUInt16, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToUInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToUInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_sve_ldff1sb, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt16, -1, 2, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt16, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToUInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToUInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt32NonFaultingZeroExtendToInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1w, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt32NonFaultingZeroExtendToUInt64, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendFirstFaulting, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve, Max, -1, -1, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_fmax, INS_sve_fmax, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MaxAcross, -1, -1, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_smaxv, INS_sve_umaxv, INS_sve_fmaxv, INS_sve_fmaxv, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve, MaxNumber, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnm, INS_sve_fmaxnm, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MaxNumberAcross, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnmv, INS_sve_fmaxnmv, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve, Min, -1, -1, INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_smin, INS_sve_umin, INS_sve_fmin, INS_sve_fmin, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MinAcross, -1, -1, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_fminv, INS_sve_fminv, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve, MinNumber, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnm, INS_sve_fminnm, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplexBySelectedScalar, -1, 5, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Negate, -1, -1, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_fneg, INS_sve_fneg, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Not, -1, -1, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) +HARDWARE_INTRINSIC(Sve, Or, -1, -1, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Prefetch16Bit, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, Prefetch32Bit, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, Prefetch64Bit, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, Prefetch8Bit, -1, 3, INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, ReciprocalEstimate, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecpe, INS_sve_frecpe, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, ReciprocalExponent, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecpx, INS_sve_frecpx, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ReciprocalSqrtEstimate, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frsqrte, INS_sve_frsqrte, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, ReciprocalSqrtStep, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frsqrts, INS_sve_frsqrts, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, ReciprocalStep, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecps, INS_sve_frecps, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, ReverseBits, -1, -1, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ReverseElement8, -1, -1, INS_invalid, INS_invalid, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, RoundAwayFromZero, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frinta, INS_sve_frinta, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, RoundToNearest, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frintn, INS_sve_frintn, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, RoundToNegativeInfinity, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frintm, INS_sve_frintm, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, RoundToPositiveInfinity, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frintp, INS_sve_frintp, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, RoundToZero, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frintz, INS_sve_frintz, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy16BitElementCount, -1, 3, INS_invalid, INS_invalid, INS_sve_sqdech, INS_sve_uqdech, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy32BitElementCount, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecw, INS_sve_uqdecw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy64BitElementCount, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecd, INS_sve_uqdecd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy8BitElementCount, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecb, INS_sve_uqdecb, INS_sve_sqdecb, INS_sve_uqdecb, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingDecrementByActiveElementCount, -1, 2, INS_invalid, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_sve_sqdecp, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy16BitElementCount, -1, 3, INS_invalid, INS_invalid, INS_sve_sqinch, INS_sve_uqinch, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy32BitElementCount, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincw, INS_sve_uqincw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCount, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincd, INS_sve_uqincd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy8BitElementCount, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincb, INS_sve_uqincb, INS_sve_sqincb, INS_sve_uqincb, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingIncrementByActiveElementCount, -1, 2, INS_invalid, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, Scale, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fscale, INS_sve_fscale, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, Scatter, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Scatter16BitNarrowing, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Scatter16BitWithByteOffsetsNarrowing, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Scatter32BitNarrowing, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Scatter32BitWithByteOffsetsNarrowing, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Scatter8BitNarrowing, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Scatter8BitWithByteOffsetsNarrowing, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ScatterWithByteOffsets, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SetFfr, -1, 1, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve, ShiftRightLogical, -1, -1, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, Splice, -1, 3, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Sqrt, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fsqrt, INS_sve_fsqrt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, StoreNarrowing, -1, 3, INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, StoreNonTemporal, -1, 3, INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, TestAnyTrue, -1, 2, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, TestFirstTrue, -1, 2, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, TestLastTrue, -1, 2, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, TrigonometricMultiplyAddCoefficient, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftmad, INS_sve_ftmad, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, TrigonometricSelectCoefficient, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftssel, INS_sve_ftssel, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, TrigonometricStartingValue, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftsmul, INS_sve_ftsmul, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, VectorTableLookup, -1, 2, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, Xor, -1, -1, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, XorAcross, -1, -1, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ZeroExtendWideningLower, -1, 1, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, ZeroExtendWideningUpper, -1, 1, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +#define LAST_NI_Sve NI_Sve_ZipLow // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SVE2 Intrinsics -#define FIRST_NI_Sve2 NI_Sve2_AbsSaturate -HARDWARE_INTRINSIC(Sve2, AbsSaturate, -1, -1, {INS_sve_sqabs, INS_invalid, INS_sve_sqabs, INS_invalid, INS_sve_sqabs, INS_invalid, INS_sve_sqabs, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAdd, -1, 3, {INS_sve_saba, INS_sve_uaba, INS_sve_saba, INS_sve_uaba, INS_sve_saba, INS_sve_uaba, INS_sve_saba, INS_sve_uaba, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningLowerAndAddEven, -1, 3, {INS_invalid, INS_invalid, INS_sve_sabalb, INS_sve_uabalb, INS_sve_sabalb, INS_sve_uabalb, INS_sve_sabalb, INS_sve_uabalb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningLowerAndAddOdd, -1, 3, {INS_invalid, INS_invalid, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, AddCarryWideningEven, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclb, INS_invalid, INS_sve_adclb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, AddCarryWideningOdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclt, INS_invalid, INS_sve_adclt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, AddHighNarrowingEven, -1, 2, {INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, AddHighNarrowingOdd, -1, 3, {INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, AddPairwise, -1, -1, {INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_faddp, INS_sve_faddp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve2, AddPairwiseWideningAndAdd, -1, -1, {INS_invalid, INS_invalid, INS_sve_sadalp, INS_sve_uadalp, INS_sve_sadalp, INS_sve_uadalp, INS_sve_sadalp, INS_sve_uadalp, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, AddRotateComplex, -1, 3, {INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve2, AddRoundedHighNarrowingEven, -1, 2, {INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, AddRoundedHighNarrowingOdd, -1, 3, {INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, AddSaturate, -1, -1, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve2, AddSaturateRotateComplex, -1, 3, {INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve2, AddWideningEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_saddwb, INS_sve_uaddwb, INS_sve_saddwb, INS_sve_uaddwb, INS_sve_saddwb, INS_sve_uaddwb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve2, AddWideningEvenOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, AddWideningOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_saddwt, INS_sve_uaddwt, INS_sve_saddwt, INS_sve_uaddwt, INS_sve_saddwt, INS_sve_uaddwt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve2, BitwiseClearXor, -1, 3, {INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, BitwiseSelect, -1, 3, {INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted, -1, 3, {INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, BitwiseSelectRightInverted, -1, 3, {INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ConvertToDoubleOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtlt, INS_sve_fcvtlt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, ConvertToSingleEvenRoundToOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtx, INS_sve_fcvtx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, ConvertToSingleOdd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtnt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ConvertToSingleOddRoundToOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtxnt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, CountMatchingElements, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_histcnt, INS_sve_histcnt, INS_sve_histcnt, INS_sve_histcnt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, CountMatchingElementsIn128BitSegments, -1, 2, {INS_sve_histseg, INS_sve_histseg, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskByte, -1, 2, {INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskDouble, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskInt16, -1, 2, {INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskSByte, -1, 2, {INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskSingle, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskUInt16, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskByte, -1, 2, {INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskDouble, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskInt16, -1, 2, {INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskSByte, -1, 2, {INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskSingle, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskUInt16, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskByte, -1, 2, {INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskDouble, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskInt16, -1, 2, {INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskSByte, -1, 2, {INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskSingle, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskUInt16, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskByte, -1, 2, {INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskDouble, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskInt16, -1, 2, {INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskSByte, -1, 2, {INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskSingle, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskUInt16, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve2, DotProductRotateComplex, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, DotProductRotateComplexBySelectedIndex, -1, 5, {INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(Sve2, FusedAddHalving, -1, -1, {INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, FusedAddRoundedHalving, -1, -1, {INS_sve_srhadd, INS_sve_urhadd, INS_sve_srhadd, INS_sve_urhadd, INS_sve_srhadd, INS_sve_urhadd, INS_sve_srhadd, INS_sve_urhadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, FusedSubtractHalving, -1, -1, {INS_sve_shsub, INS_sve_uhsub, INS_sve_shsub, INS_sve_uhsub, INS_sve_shsub, INS_sve_uhsub, INS_sve_shsub, INS_sve_uhsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorByteZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorInt16SignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorInt16WithByteOffsetsSignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorInt32SignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sw, INS_sve_ldnt1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorInt32WithByteOffsetsSignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sw, INS_sve_ldnt1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_sve_ldnt1d, INS_sve_ldnt1d, INS_sve_ldnt1w, INS_sve_ldnt1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorSByteSignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sb, INS_sve_ldnt1sb, INS_sve_ldnt1sb, INS_sve_ldnt1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorUInt16WithByteOffsetsZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorUInt16ZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorUInt32WithByteOffsetsZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorUInt32ZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, GatherVectorWithByteOffsetsNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_sve_ldnt1d, INS_sve_ldnt1d, INS_sve_ldnt1w, INS_sve_ldnt1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, InterleavingXorEvenOdd, -1, 3, {INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, InterleavingXorOddEven, -1, 3, {INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, Log2, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_flogb, INS_invalid, INS_sve_flogb, INS_invalid, INS_sve_flogb, INS_sve_flogb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, Match, -1, 3, {INS_sve_match, INS_sve_match, INS_sve_match, INS_sve_match, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, MaxNumberPairwise, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnmp, INS_sve_fmaxnmp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MaxPairwise, -1, -1, {INS_sve_smaxp, INS_sve_umaxp, INS_sve_smaxp, INS_sve_umaxp, INS_sve_smaxp, INS_sve_umaxp, INS_sve_smaxp, INS_sve_umaxp, INS_sve_fmaxp, INS_sve_fmaxp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve2, MinNumberPairwise, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmp, INS_sve_fminnmp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MinPairwise, -1, -1, {INS_sve_sminp, INS_sve_uminp, INS_sve_sminp, INS_sve_uminp, INS_sve_sminp, INS_sve_uminp, INS_sve_sminp, INS_sve_uminp, INS_sve_fminp, INS_sve_fminp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyAddBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyAddRotateComplex, -1, 4, {INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, MultiplyAddRotateComplexBySelectedScalar, -1, 5, {INS_invalid, INS_invalid, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve2, MultiplyAddRoundedDoublingSaturateHighRotateComplex, -1, 4, {INS_sve_sqrdcmlah, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, MultiplyAddRoundedDoublingSaturateHighRotateComplexBySelectedScalar, -1, 5, {INS_invalid, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalar, -1, 3, {INS_invalid, INS_invalid, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningEven, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smullb, INS_sve_umullb, INS_sve_smullb, INS_sve_umullb, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningEvenAndAdd, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smlalb, INS_sve_umlalb, INS_sve_smlalb, INS_sve_umlalb, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningEvenAndSubtract, -1, 4, {INS_invalid, INS_invalid, INS_sve_smlslb, INS_sve_umlslb, INS_sve_smlslb, INS_sve_umlslb, INS_sve_smlslb, INS_sve_umlslb, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningOdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smullt, INS_sve_umullt, INS_sve_smullt, INS_sve_umullt, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningOddAndAdd, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smlalt, INS_sve_umlalt, INS_sve_smlalt, INS_sve_umlalt, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningOddAndSubtract, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smlslt, INS_sve_umlslt, INS_sve_smlslt, INS_sve_umlslt, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingBySelectedScalarSaturateHigh, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingSaturateHigh, -1, 2, {INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndAddSaturateEven, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndAddSaturateEvenOdd, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqdmlalbt, INS_invalid, INS_sve_sqdmlalbt, INS_invalid, INS_sve_sqdmlalbt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndAddSaturateOdd, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndSubtractSaturateEven, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndSubtractSaturateEvenOdd, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqdmlslbt, INS_invalid, INS_sve_sqdmlslbt, INS_invalid, INS_sve_sqdmlslbt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndSubtractSaturateOdd, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningBySelectedScalarAndAddSaturateEven, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningBySelectedScalarAndAddSaturateOdd, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningBySelectedScalarAndSubtractSaturateEven, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningBySelectedScalarAndSubtractSaturateOdd, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningSaturateEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningSaturateEvenBySelectedScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningSaturateOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningSaturateOddBySelectedScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingBySelectedScalarSaturateHigh, -1, 3, {INS_invalid, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateAndAddHigh, -1, 3, {INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateAndSubtractHigh, -1, 3, {INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateBySelectedScalarAndAddHigh, -1, 4, {INS_invalid, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateBySelectedScalarAndSubtractHigh, -1, 4, {INS_invalid, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateHigh, -1, 2, {INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, MultiplySubtractBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation) -HARDWARE_INTRINSIC(Sve2, MultiplyWideningEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_smullb, INS_sve_umullb, INS_sve_smullb, INS_sve_umullb, INS_sve_smullb, INS_sve_umullb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, MultiplyWideningEvenAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_sve_smlalb, INS_sve_umlalb, INS_sve_smlalb, INS_sve_umlalb, INS_sve_smlalb, INS_sve_umlalb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyWideningEvenAndSubtract, -1, 3, {INS_invalid, INS_invalid, INS_sve_smlslb, INS_sve_umlslb, INS_sve_smlslb, INS_sve_umlslb, INS_sve_smlslb, INS_sve_umlslb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyWideningOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_smullt, INS_sve_umullt, INS_sve_smullt, INS_sve_umullt, INS_sve_smullt, INS_sve_umullt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, MultiplyWideningOddAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_sve_smlalt, INS_sve_umlalt, INS_sve_smlalt, INS_sve_umlalt, INS_sve_smlalt, INS_sve_umlalt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, MultiplyWideningOddAndSubtract, -1, 3, {INS_invalid, INS_invalid, INS_sve_smlslt, INS_sve_umlslt, INS_sve_smlslt, INS_sve_umlslt, INS_sve_smlslt, INS_sve_umlslt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, NegateSaturate, -1, -1, {INS_sve_sqneg, INS_invalid, INS_sve_sqneg, INS_invalid, INS_sve_sqneg, INS_invalid, INS_sve_sqneg, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, NoMatch, -1, 3, {INS_sve_nmatch, INS_sve_nmatch, INS_sve_nmatch, INS_sve_nmatch, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve2, PolynomialMultiply, -1, 2, {INS_sve_pmul, INS_sve_pmul, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, PolynomialMultiplyWideningEven, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_pmullb, INS_invalid, INS_invalid, INS_invalid, INS_sve_pmullb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, PolynomialMultiplyWideningOdd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_pmullt, INS_invalid, INS_invalid, INS_invalid, INS_sve_pmullt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, ReciprocalEstimate, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_urecpe, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, ReciprocalSqrtEstimate, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ursqrte, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, SaturatingExtractNarrowingLower, -1, 1, {INS_sve_sqxtnb, INS_sve_uqxtnb, INS_sve_sqxtnb, INS_sve_uqxtnb, INS_sve_sqxtnb, INS_sve_uqxtnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, SaturatingExtractNarrowingUpper, -1, 2, {INS_sve_sqxtnt, INS_sve_uqxtnt, INS_sve_sqxtnt, INS_sve_uqxtnt, INS_sve_sqxtnt, INS_sve_uqxtnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, SaturatingExtractUnsignedNarrowingLower, -1, 1, {INS_invalid, INS_sve_sqxtunb, INS_invalid, INS_sve_sqxtunb, INS_invalid, INS_sve_sqxtunb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, SaturatingExtractUnsignedNarrowingUpper, -1, 2, {INS_invalid, INS_sve_sqxtunt, INS_invalid, INS_sve_sqxtunt, INS_invalid, INS_sve_sqxtunt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, Scatter16BitNarrowingNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1h, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, Scatter16BitWithByteOffsetsNarrowingNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1h, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, Scatter32BitNarrowingNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1w, INS_sve_stnt1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, Scatter32BitWithByteOffsetsNarrowingNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1w, INS_sve_stnt1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, Scatter8BitNarrowingNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, Scatter8BitWithByteOffsetsNarrowingNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, ScatterNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, ScatterWithByteOffsetsNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, ShiftArithmeticRounded, -1, -1, {INS_sve_srshl, INS_invalid, INS_sve_srshl, INS_invalid, INS_sve_srshl, INS_invalid, INS_sve_srshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, ShiftArithmeticRoundedSaturate, -1, -1, {INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, ShiftArithmeticSaturate, -1, -1, {INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve2, ShiftLeftAndInsert, -1, 3, {INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalSaturate, -1, -1, {INS_invalid, INS_sve_uqshl, INS_invalid, INS_sve_uqshl, INS_invalid, INS_sve_uqshl, INS_invalid, INS_sve_uqshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalSaturateUnsigned, -1, -1, {INS_invalid, INS_sve_sqshlu, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalWideningEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_sshllb, INS_sve_ushllb, INS_sve_sshllb, INS_sve_ushllb, INS_sve_sshllb, INS_sve_ushllb, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalWideningOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_sshllt, INS_sve_ushllt, INS_sve_sshllt, INS_sve_ushllt, INS_sve_sshllt, INS_sve_ushllt, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftLogicalRounded, -1, -1, {INS_invalid, INS_sve_urshl, INS_invalid, INS_sve_urshl, INS_invalid, INS_sve_urshl, INS_invalid, INS_sve_urshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftLogicalRoundedSaturate, -1, -1, {INS_invalid, INS_sve_uqrshl, INS_invalid, INS_sve_uqrshl, INS_invalid, INS_sve_uqrshl, INS_invalid, INS_sve_uqrshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightAndInsert, -1, 3, {INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticAdd, -1, 3, {INS_sve_ssra, INS_invalid, INS_sve_ssra, INS_invalid, INS_sve_ssra, INS_invalid, INS_sve_ssra, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticNarrowingSaturateEven, -1, 2, {INS_sve_sqshrnb, INS_invalid, INS_sve_sqshrnb, INS_invalid, INS_sve_sqshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticNarrowingSaturateOdd, -1, 3, {INS_sve_sqshrnt, INS_invalid, INS_sve_sqshrnt, INS_invalid, INS_sve_sqshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticNarrowingSaturateUnsignedEven, -1, 2, {INS_invalid, INS_sve_sqshrunb, INS_invalid, INS_sve_sqshrunb, INS_invalid, INS_sve_sqshrunb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticNarrowingSaturateUnsignedOdd, -1, 3, {INS_invalid, INS_sve_sqshrunt, INS_invalid, INS_sve_sqshrunt, INS_invalid, INS_sve_sqshrunt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRounded, -1, -1, {INS_sve_srshr, INS_invalid, INS_sve_srshr, INS_invalid, INS_sve_srshr, INS_invalid, INS_sve_srshr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedAdd, -1, 3, {INS_sve_srsra, INS_invalid, INS_sve_srsra, INS_invalid, INS_sve_srsra, INS_invalid, INS_sve_srsra, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedNarrowingSaturateEven, -1, 2, {INS_sve_sqrshrnb, INS_invalid, INS_sve_sqrshrnb, INS_invalid, INS_sve_sqrshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedNarrowingSaturateOdd, -1, 3, {INS_sve_sqrshrnt, INS_invalid, INS_sve_sqrshrnt, INS_invalid, INS_sve_sqrshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedEven, -1, 2, {INS_invalid, INS_sve_sqrshrunb, INS_invalid, INS_sve_sqrshrunb, INS_invalid, INS_sve_sqrshrunb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedOdd, -1, 3, {INS_invalid, INS_sve_sqrshrunt, INS_invalid, INS_sve_sqrshrunt, INS_invalid, INS_sve_sqrshrunt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalAdd, -1, 3, {INS_invalid, INS_sve_usra, INS_invalid, INS_sve_usra, INS_invalid, INS_sve_usra, INS_invalid, INS_sve_usra, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalNarrowingEven, -1, 2, {INS_sve_shrnb, INS_sve_shrnb, INS_sve_shrnb, INS_sve_shrnb, INS_sve_shrnb, INS_sve_shrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalNarrowingOdd, -1, 3, {INS_sve_shrnt, INS_sve_shrnt, INS_sve_shrnt, INS_sve_shrnt, INS_sve_shrnt, INS_sve_shrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalNarrowingSaturateEven, -1, 2, {INS_invalid, INS_sve_uqshrnb, INS_invalid, INS_sve_uqshrnb, INS_invalid, INS_sve_uqshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalNarrowingSaturateOdd, -1, 3, {INS_invalid, INS_sve_uqshrnt, INS_invalid, INS_sve_uqshrnt, INS_invalid, INS_sve_uqshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRounded, -1, -1, {INS_invalid, INS_sve_urshr, INS_invalid, INS_sve_urshr, INS_invalid, INS_sve_urshr, INS_invalid, INS_sve_urshr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedAdd, -1, 3, {INS_invalid, INS_sve_ursra, INS_invalid, INS_sve_ursra, INS_invalid, INS_sve_ursra, INS_invalid, INS_sve_ursra, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedNarrowingEven, -1, 2, {INS_sve_rshrnb, INS_sve_rshrnb, INS_sve_rshrnb, INS_sve_rshrnb, INS_sve_rshrnb, INS_sve_rshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedNarrowingOdd, -1, 3, {INS_sve_rshrnt, INS_sve_rshrnt, INS_sve_rshrnt, INS_sve_rshrnt, INS_sve_rshrnt, INS_sve_rshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedNarrowingSaturateEven, -1, 2, {INS_invalid, INS_sve_uqrshrnb, INS_invalid, INS_sve_uqrshrnb, INS_invalid, INS_sve_uqrshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) -HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedNarrowingSaturateOdd, -1, 3, {INS_invalid, INS_sve_uqrshrnt, INS_invalid, INS_sve_uqrshrnt, INS_invalid, INS_sve_uqrshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, SubtractBorrowWideningEven, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sbclb, INS_invalid, INS_sve_sbclb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, SubtractBorrowWideningOdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sbclt, INS_invalid, INS_sve_sbclt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, SubtractHighNarrowingEven, -1, 2, {INS_sve_subhnb, INS_sve_subhnb, INS_sve_subhnb, INS_sve_subhnb, INS_sve_subhnb, INS_sve_subhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, SubtractHighNarrowingOdd, -1, 3, {INS_sve_subhnt, INS_sve_subhnt, INS_sve_subhnt, INS_sve_subhnt, INS_sve_subhnt, INS_sve_subhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, SubtractRoundedHighNarrowingEven, -1, 2, {INS_sve_rsubhnb, INS_sve_rsubhnb, INS_sve_rsubhnb, INS_sve_rsubhnb, INS_sve_rsubhnb, INS_sve_rsubhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, SubtractRoundedHighNarrowingOdd, -1, 3, {INS_sve_rsubhnt, INS_sve_rsubhnt, INS_sve_rsubhnt, INS_sve_rsubhnt, INS_sve_rsubhnt, INS_sve_rsubhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, SubtractSaturate, -1, -1, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, SubtractWideningEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_ssubwb, INS_sve_usubwb, INS_sve_ssubwb, INS_sve_usubwb, INS_sve_ssubwb, INS_sve_usubwb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve2, SubtractWideningEvenOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_ssublbt, INS_invalid, INS_sve_ssublbt, INS_invalid, INS_sve_ssublbt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, SubtractWideningOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_ssubwt, INS_sve_usubwt, INS_sve_ssubwt, INS_sve_usubwt, INS_sve_ssubwt, INS_sve_usubwt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve2, SubtractWideningOddEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_ssubltb, INS_invalid, INS_sve_ssubltb, INS_invalid, INS_sve_ssubltb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve2, VectorTableLookup, -1, 2, {INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve2, VectorTableLookupExtension, -1, 3, {INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, Xor, -1, 3, {INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, XorRotateRight, -1, 3, {INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) -#define LAST_NI_Sve2 NI_Sve2_XorRotateRight +#define FIRST_NI_Sve2 NI_Sve2_AbsSaturate +HARDWARE_INTRINSIC(Sve2, AbsSaturate, -1, -1, INS_sve_sqabs, INS_invalid, INS_sve_sqabs, INS_invalid, INS_sve_sqabs, INS_invalid, INS_sve_sqabs, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAdd, -1, 3, INS_sve_saba, INS_sve_uaba, INS_sve_saba, INS_sve_uaba, INS_sve_saba, INS_sve_uaba, INS_sve_saba, INS_sve_uaba, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningEven, -1, 2, INS_invalid, INS_invalid, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningLowerAndAddEven, -1, 3, INS_invalid, INS_invalid, INS_sve_sabalb, INS_sve_uabalb, INS_sve_sabalb, INS_sve_uabalb, INS_sve_sabalb, INS_sve_uabalb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningLowerAndAddOdd, -1, 3, INS_invalid, INS_invalid, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningOdd, -1, 2, INS_invalid, INS_invalid, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, AddCarryWideningEven, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclb, INS_invalid, INS_sve_adclb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AddCarryWideningOdd, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclt, INS_invalid, INS_sve_adclt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AddHighNarrowingEven, -1, 2, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_sve_addhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, AddHighNarrowingOdd, -1, 3, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_sve_addhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AddPairwise, -1, -1, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_addp, INS_sve_faddp, INS_sve_faddp, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve2, AddPairwiseWideningAndAdd, -1, -1, INS_invalid, INS_invalid, INS_sve_sadalp, INS_sve_uadalp, INS_sve_sadalp, INS_sve_uadalp, INS_sve_sadalp, INS_sve_uadalp, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, AddRotateComplex, -1, 3, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve2, AddRoundedHighNarrowingEven, -1, 2, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, AddRoundedHighNarrowingOdd, -1, 3, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, AddSaturate, -1, -1, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve2, AddSaturateRotateComplex, -1, 3, INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve2, AddWideningEven, -1, 2, INS_invalid, INS_invalid, INS_sve_saddwb, INS_sve_uaddwb, INS_sve_saddwb, INS_sve_uaddwb, INS_sve_saddwb, INS_sve_uaddwb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve2, AddWideningEvenOdd, -1, 2, INS_invalid, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, AddWideningOdd, -1, 2, INS_invalid, INS_invalid, INS_sve_saddwt, INS_sve_uaddwt, INS_sve_saddwt, INS_sve_uaddwt, INS_sve_saddwt, INS_sve_uaddwt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve2, BitwiseClearXor, -1, 3, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, BitwiseSelect, -1, 3, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted, -1, 3, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, BitwiseSelectRightInverted, -1, 3, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ConvertToDoubleOdd, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtlt, INS_sve_fcvtlt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ConvertToSingleEvenRoundToOdd, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtx, INS_sve_fcvtx, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ConvertToSingleOdd, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtnt, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ConvertToSingleOddRoundToOdd, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtxnt, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, CountMatchingElements, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_histcnt, INS_sve_histcnt, INS_sve_histcnt, INS_sve_histcnt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, CountMatchingElementsIn128BitSegments, -1, 2, INS_sve_histseg, INS_sve_histseg, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskByte, -1, 2, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskDouble, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskInt16, -1, 2, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskSByte, -1, 2, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskSingle, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskUInt16, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanMaskUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilegt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskByte, -1, 2, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskDouble, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskInt16, -1, 2, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskSByte, -1, 2, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskSingle, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskUInt16, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileGreaterThanOrEqualMaskUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilege, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskByte, -1, 2, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskDouble, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskInt16, -1, 2, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskSByte, -1, 2, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskSingle, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskUInt16, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileReadAfterWriteMaskUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilerw, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskByte, -1, 2, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskDouble, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskInt16, -1, 2, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskSByte, -1, 2, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskSingle, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskUInt16, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskUInt32, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, CreateWhileWriteAfterReadMaskUInt64, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilewr, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve2, DotProductRotateComplex, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, DotProductRotateComplexBySelectedIndex, -1, 5, INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(Sve2, FusedAddHalving, -1, -1, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, FusedAddRoundedHalving, -1, -1, INS_sve_srhadd, INS_sve_urhadd, INS_sve_srhadd, INS_sve_urhadd, INS_sve_srhadd, INS_sve_urhadd, INS_sve_srhadd, INS_sve_urhadd, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, FusedSubtractHalving, -1, -1, INS_sve_shsub, INS_sve_uhsub, INS_sve_shsub, INS_sve_uhsub, INS_sve_shsub, INS_sve_uhsub, INS_sve_shsub, INS_sve_uhsub, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorByteZeroExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorInt16SignExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorInt16WithByteOffsetsSignExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorInt32SignExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sw, INS_sve_ldnt1sw, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorInt32WithByteOffsetsSignExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sw, INS_sve_ldnt1sw, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_sve_ldnt1d, INS_sve_ldnt1d, INS_sve_ldnt1w, INS_sve_ldnt1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorSByteSignExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sb, INS_sve_ldnt1sb, INS_sve_ldnt1sb, INS_sve_ldnt1sb, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorUInt16WithByteOffsetsZeroExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorUInt16ZeroExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorUInt32WithByteOffsetsZeroExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorUInt32ZeroExtendNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, GatherVectorWithByteOffsetsNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_sve_ldnt1d, INS_sve_ldnt1d, INS_sve_ldnt1w, INS_sve_ldnt1d, -1, -1, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, InterleavingXorEvenOdd, -1, 3, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, InterleavingXorOddEven, -1, 3, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, Log2, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_flogb, INS_invalid, INS_sve_flogb, INS_invalid, INS_sve_flogb, INS_sve_flogb, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, Match, -1, 3, INS_sve_match, INS_sve_match, INS_sve_match, INS_sve_match, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, MaxNumberPairwise, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmaxnmp, INS_sve_fmaxnmp, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MaxPairwise, -1, -1, INS_sve_smaxp, INS_sve_umaxp, INS_sve_smaxp, INS_sve_umaxp, INS_sve_smaxp, INS_sve_umaxp, INS_sve_smaxp, INS_sve_umaxp, INS_sve_fmaxp, INS_sve_fmaxp, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve2, MinNumberPairwise, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmp, INS_sve_fminnmp, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MinPairwise, -1, -1, INS_sve_sminp, INS_sve_uminp, INS_sve_sminp, INS_sve_uminp, INS_sve_sminp, INS_sve_uminp, INS_sve_sminp, INS_sve_uminp, INS_sve_fminp, INS_sve_fminp, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyAddBySelectedScalar, -1, 4, INS_invalid, INS_invalid, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyAddRotateComplex, -1, 4, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, MultiplyAddRotateComplexBySelectedScalar, -1, 5, INS_invalid, INS_invalid, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_sve_cmla, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve2, MultiplyAddRoundedDoublingSaturateHighRotateComplex, -1, 4, INS_sve_sqrdcmlah, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, MultiplyAddRoundedDoublingSaturateHighRotateComplexBySelectedScalar, -1, 5, INS_invalid, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_sve_sqrdcmlah, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalar, -1, 3, INS_invalid, INS_invalid, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningEven, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smullb, INS_sve_umullb, INS_sve_smullb, INS_sve_umullb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningEvenAndAdd, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smlalb, INS_sve_umlalb, INS_sve_smlalb, INS_sve_umlalb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningEvenAndSubtract, -1, 4, INS_invalid, INS_invalid, INS_sve_smlslb, INS_sve_umlslb, INS_sve_smlslb, INS_sve_umlslb, INS_sve_smlslb, INS_sve_umlslb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningOdd, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smullt, INS_sve_umullt, INS_sve_smullt, INS_sve_umullt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningOddAndAdd, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smlalt, INS_sve_umlalt, INS_sve_smlalt, INS_sve_umlalt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyBySelectedScalarWideningOddAndSubtract, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_smlslt, INS_sve_umlslt, INS_sve_smlslt, INS_sve_umlslt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingBySelectedScalarSaturateHigh, -1, 3, INS_invalid, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingSaturateHigh, -1, 2, INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_sve_sqdmulh, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndAddSaturateEven, -1, 3, INS_invalid, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndAddSaturateEvenOdd, -1, 3, INS_invalid, INS_invalid, INS_sve_sqdmlalbt, INS_invalid, INS_sve_sqdmlalbt, INS_invalid, INS_sve_sqdmlalbt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndAddSaturateOdd, -1, 3, INS_invalid, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndSubtractSaturateEven, -1, 3, INS_invalid, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndSubtractSaturateEvenOdd, -1, 3, INS_invalid, INS_invalid, INS_sve_sqdmlslbt, INS_invalid, INS_sve_sqdmlslbt, INS_invalid, INS_sve_sqdmlslbt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningAndSubtractSaturateOdd, -1, 3, INS_invalid, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningBySelectedScalarAndAddSaturateEven, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_sve_sqdmlalb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningBySelectedScalarAndAddSaturateOdd, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_sve_sqdmlalt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningBySelectedScalarAndSubtractSaturateEven, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_sve_sqdmlslb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningBySelectedScalarAndSubtractSaturateOdd, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_sve_sqdmlslt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningSaturateEven, -1, 2, INS_invalid, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningSaturateEvenBySelectedScalar, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_sve_sqdmullb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningSaturateOdd, -1, 2, INS_invalid, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, MultiplyDoublingWideningSaturateOddBySelectedScalar, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_sve_sqdmullt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingBySelectedScalarSaturateHigh, -1, 3, INS_invalid, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateAndAddHigh, -1, 3, INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateAndSubtractHigh, -1, 3, INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateBySelectedScalarAndAddHigh, -1, 4, INS_invalid, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_sve_sqrdmlah, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateBySelectedScalarAndSubtractHigh, -1, 4, INS_invalid, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_sve_sqrdmlsh, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyRoundedDoublingSaturateHigh, -1, 2, INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_sve_sqrdmulh, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, MultiplySubtractBySelectedScalar, -1, 4, INS_invalid, INS_invalid, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve2, MultiplyWideningEven, -1, 2, INS_invalid, INS_invalid, INS_sve_smullb, INS_sve_umullb, INS_sve_smullb, INS_sve_umullb, INS_sve_smullb, INS_sve_umullb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, MultiplyWideningEvenAndAdd, -1, 3, INS_invalid, INS_invalid, INS_sve_smlalb, INS_sve_umlalb, INS_sve_smlalb, INS_sve_umlalb, INS_sve_smlalb, INS_sve_umlalb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyWideningEvenAndSubtract, -1, 3, INS_invalid, INS_invalid, INS_sve_smlslb, INS_sve_umlslb, INS_sve_smlslb, INS_sve_umlslb, INS_sve_smlslb, INS_sve_umlslb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyWideningOdd, -1, 2, INS_invalid, INS_invalid, INS_sve_smullt, INS_sve_umullt, INS_sve_smullt, INS_sve_umullt, INS_sve_smullt, INS_sve_umullt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, MultiplyWideningOddAndAdd, -1, 3, INS_invalid, INS_invalid, INS_sve_smlalt, INS_sve_umlalt, INS_sve_smlalt, INS_sve_umlalt, INS_sve_smlalt, INS_sve_umlalt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, MultiplyWideningOddAndSubtract, -1, 3, INS_invalid, INS_invalid, INS_sve_smlslt, INS_sve_umlslt, INS_sve_smlslt, INS_sve_umlslt, INS_sve_smlslt, INS_sve_umlslt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, NegateSaturate, -1, -1, INS_sve_sqneg, INS_invalid, INS_sve_sqneg, INS_invalid, INS_sve_sqneg, INS_invalid, INS_sve_sqneg, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, NoMatch, -1, 3, INS_sve_nmatch, INS_sve_nmatch, INS_sve_nmatch, INS_sve_nmatch, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) +HARDWARE_INTRINSIC(Sve2, PolynomialMultiply, -1, 2, INS_sve_pmul, INS_sve_pmul, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, PolynomialMultiplyWideningEven, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_pmullb, INS_invalid, INS_invalid, INS_invalid, INS_sve_pmullb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, PolynomialMultiplyWideningOdd, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_sve_pmullt, INS_invalid, INS_invalid, INS_invalid, INS_sve_pmullt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, ReciprocalEstimate, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_urecpe, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ReciprocalSqrtEstimate, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ursqrte, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, SaturatingExtractNarrowingLower, -1, 1, INS_sve_sqxtnb, INS_sve_uqxtnb, INS_sve_sqxtnb, INS_sve_uqxtnb, INS_sve_sqxtnb, INS_sve_uqxtnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, SaturatingExtractNarrowingUpper, -1, 2, INS_sve_sqxtnt, INS_sve_uqxtnt, INS_sve_sqxtnt, INS_sve_uqxtnt, INS_sve_sqxtnt, INS_sve_uqxtnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, SaturatingExtractUnsignedNarrowingLower, -1, 1, INS_invalid, INS_sve_sqxtunb, INS_invalid, INS_sve_sqxtunb, INS_invalid, INS_sve_sqxtunb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, SaturatingExtractUnsignedNarrowingUpper, -1, 2, INS_invalid, INS_sve_sqxtunt, INS_invalid, INS_sve_sqxtunt, INS_invalid, INS_sve_sqxtunt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, Scatter16BitNarrowingNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, Scatter16BitWithByteOffsetsNarrowingNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1h, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, Scatter32BitNarrowingNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1w, INS_sve_stnt1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, Scatter32BitWithByteOffsetsNarrowingNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1w, INS_sve_stnt1w, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, Scatter8BitNarrowingNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, Scatter8BitWithByteOffsetsNarrowingNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1b, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ScatterNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ScatterWithByteOffsetsNonTemporal, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ShiftArithmeticRounded, -1, -1, INS_sve_srshl, INS_invalid, INS_sve_srshl, INS_invalid, INS_sve_srshl, INS_invalid, INS_sve_srshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ShiftArithmeticRoundedSaturate, -1, -1, INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ShiftArithmeticSaturate, -1, -1, INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ShiftLeftAndInsert, -1, 3, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalSaturate, -1, -1, INS_invalid, INS_sve_uqshl, INS_invalid, INS_sve_uqshl, INS_invalid, INS_sve_uqshl, INS_invalid, INS_sve_uqshl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalSaturateUnsigned, -1, -1, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalWideningEven, -1, 2, INS_invalid, INS_invalid, INS_sve_sshllb, INS_sve_ushllb, INS_sve_sshllb, INS_sve_ushllb, INS_sve_sshllb, INS_sve_ushllb, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalWideningOdd, -1, 2, INS_invalid, INS_invalid, INS_sve_sshllt, INS_sve_ushllt, INS_sve_sshllt, INS_sve_ushllt, INS_sve_sshllt, INS_sve_ushllt, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftLogicalRounded, -1, -1, INS_invalid, INS_sve_urshl, INS_invalid, INS_sve_urshl, INS_invalid, INS_sve_urshl, INS_invalid, INS_sve_urshl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftLogicalRoundedSaturate, -1, -1, INS_invalid, INS_sve_uqrshl, INS_invalid, INS_sve_uqrshl, INS_invalid, INS_sve_uqrshl, INS_invalid, INS_sve_uqrshl, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightAndInsert, -1, 3, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_sve_sri, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticAdd, -1, 3, INS_sve_ssra, INS_invalid, INS_sve_ssra, INS_invalid, INS_sve_ssra, INS_invalid, INS_sve_ssra, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticNarrowingSaturateEven, -1, 2, INS_sve_sqshrnb, INS_invalid, INS_sve_sqshrnb, INS_invalid, INS_sve_sqshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticNarrowingSaturateOdd, -1, 3, INS_sve_sqshrnt, INS_invalid, INS_sve_sqshrnt, INS_invalid, INS_sve_sqshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticNarrowingSaturateUnsignedEven, -1, 2, INS_invalid, INS_sve_sqshrunb, INS_invalid, INS_sve_sqshrunb, INS_invalid, INS_sve_sqshrunb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticNarrowingSaturateUnsignedOdd, -1, 3, INS_invalid, INS_sve_sqshrunt, INS_invalid, INS_sve_sqshrunt, INS_invalid, INS_sve_sqshrunt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRounded, -1, -1, INS_sve_srshr, INS_invalid, INS_sve_srshr, INS_invalid, INS_sve_srshr, INS_invalid, INS_sve_srshr, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedAdd, -1, 3, INS_sve_srsra, INS_invalid, INS_sve_srsra, INS_invalid, INS_sve_srsra, INS_invalid, INS_sve_srsra, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedNarrowingSaturateEven, -1, 2, INS_sve_sqrshrnb, INS_invalid, INS_sve_sqrshrnb, INS_invalid, INS_sve_sqrshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedNarrowingSaturateOdd, -1, 3, INS_sve_sqrshrnt, INS_invalid, INS_sve_sqrshrnt, INS_invalid, INS_sve_sqrshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedEven, -1, 2, INS_invalid, INS_sve_sqrshrunb, INS_invalid, INS_sve_sqrshrunb, INS_invalid, INS_sve_sqrshrunb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftRightArithmeticRoundedNarrowingSaturateUnsignedOdd, -1, 3, INS_invalid, INS_sve_sqrshrunt, INS_invalid, INS_sve_sqrshrunt, INS_invalid, INS_sve_sqrshrunt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalAdd, -1, 3, INS_invalid, INS_sve_usra, INS_invalid, INS_sve_usra, INS_invalid, INS_sve_usra, INS_invalid, INS_sve_usra, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalNarrowingEven, -1, 2, INS_sve_shrnb, INS_sve_shrnb, INS_sve_shrnb, INS_sve_shrnb, INS_sve_shrnb, INS_sve_shrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalNarrowingOdd, -1, 3, INS_sve_shrnt, INS_sve_shrnt, INS_sve_shrnt, INS_sve_shrnt, INS_sve_shrnt, INS_sve_shrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalNarrowingSaturateEven, -1, 2, INS_invalid, INS_sve_uqshrnb, INS_invalid, INS_sve_uqshrnb, INS_invalid, INS_sve_uqshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalNarrowingSaturateOdd, -1, 3, INS_invalid, INS_sve_uqshrnt, INS_invalid, INS_sve_uqshrnt, INS_invalid, INS_sve_uqshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRounded, -1, -1, INS_invalid, INS_sve_urshr, INS_invalid, INS_sve_urshr, INS_invalid, INS_sve_urshr, INS_invalid, INS_sve_urshr, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedAdd, -1, 3, INS_invalid, INS_sve_ursra, INS_invalid, INS_sve_ursra, INS_invalid, INS_sve_ursra, INS_invalid, INS_sve_ursra, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedNarrowingEven, -1, 2, INS_sve_rshrnb, INS_sve_rshrnb, INS_sve_rshrnb, INS_sve_rshrnb, INS_sve_rshrnb, INS_sve_rshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedNarrowingOdd, -1, 3, INS_sve_rshrnt, INS_sve_rshrnt, INS_sve_rshrnt, INS_sve_rshrnt, INS_sve_rshrnt, INS_sve_rshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedNarrowingSaturateEven, -1, 2, INS_invalid, INS_sve_uqrshrnb, INS_invalid, INS_sve_uqrshrnb, INS_invalid, INS_sve_uqrshrnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve2, ShiftRightLogicalRoundedNarrowingSaturateOdd, -1, 3, INS_invalid, INS_sve_uqrshrnt, INS_invalid, INS_sve_uqrshrnt, INS_invalid, INS_sve_uqrshrnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, SubtractBorrowWideningEven, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sbclb, INS_invalid, INS_sve_sbclb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, SubtractBorrowWideningOdd, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sbclt, INS_invalid, INS_sve_sbclt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, SubtractHighNarrowingEven, -1, 2, INS_sve_subhnb, INS_sve_subhnb, INS_sve_subhnb, INS_sve_subhnb, INS_sve_subhnb, INS_sve_subhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, SubtractHighNarrowingOdd, -1, 3, INS_sve_subhnt, INS_sve_subhnt, INS_sve_subhnt, INS_sve_subhnt, INS_sve_subhnt, INS_sve_subhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, SubtractRoundedHighNarrowingEven, -1, 2, INS_sve_rsubhnb, INS_sve_rsubhnb, INS_sve_rsubhnb, INS_sve_rsubhnb, INS_sve_rsubhnb, INS_sve_rsubhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, SubtractRoundedHighNarrowingOdd, -1, 3, INS_sve_rsubhnt, INS_sve_rsubhnt, INS_sve_rsubhnt, INS_sve_rsubhnt, INS_sve_rsubhnt, INS_sve_rsubhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, SubtractSaturate, -1, -1, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, SubtractWideningEven, -1, 2, INS_invalid, INS_invalid, INS_sve_ssubwb, INS_sve_usubwb, INS_sve_ssubwb, INS_sve_usubwb, INS_sve_ssubwb, INS_sve_usubwb, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve2, SubtractWideningEvenOdd, -1, 2, INS_invalid, INS_invalid, INS_sve_ssublbt, INS_invalid, INS_sve_ssublbt, INS_invalid, INS_sve_ssublbt, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, SubtractWideningOdd, -1, 2, INS_invalid, INS_invalid, INS_sve_ssubwt, INS_sve_usubwt, INS_sve_ssubwt, INS_sve_usubwt, INS_sve_ssubwt, INS_sve_usubwt, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve2, SubtractWideningOddEven, -1, 2, INS_invalid, INS_invalid, INS_sve_ssubltb, INS_invalid, INS_sve_ssubltb, INS_invalid, INS_sve_ssubltb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve2, VectorTableLookup, -1, 2, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve2, VectorTableLookupExtension, -1, 3, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, INS_sve_tbx, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, Xor, -1, 3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, XorRotateRight, -1, 3, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_invalid, INS_invalid, -1, -1, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) +#define LAST_NI_Sve2 NI_Sve2_XorRotateRight // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Sha3 -#define FIRST_NI_SveSha3 NI_SveSha3_BitwiseRotateLeftBy1AndXor -HARDWARE_INTRINSIC(SveSha3, BitwiseRotateLeftBy1AndXor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rax1, INS_sve_rax1, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -#define LAST_NI_SveSha3 NI_SveSha3_BitwiseRotateLeftBy1AndXor +// SVE SHA3 Intrinsics +#define FIRST_NI_SveSha3 NI_SveSha3_BitwiseRotateLeftBy1AndXor +HARDWARE_INTRINSIC(SveSha3, BitwiseRotateLeftBy1AndXor, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rax1, INS_sve_rax1, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +#define LAST_NI_SveSha3 NI_SveSha3_BitwiseRotateLeftBy1AndXor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Special intrinsics that are generated during importing or lowering -#define SPECIAL_NI_Sve NI_Sve_ConditionalExtractAfterLastActiveElementScalar -HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementScalar, 0, 3, {INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementScalar, 0, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation) +#define SPECIAL_NI_Sve NI_Sve_ConditionalExtractAfterLastActiveElementScalar +HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementScalar, 0, 3, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementScalar, 0, 3, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, -1, -1, HW_Category_Helper, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, -1, -1, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation) // True mask only used inside a ConvertVectorToMask -HARDWARE_INTRINSIC(Sve, ConversionTrueMask, -1, 0, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, ConversionTrueMask, -1, 0, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, -1, -1, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) // Scalar variants of Saturating*By*BitElementCount. There is 8bit versions as the generic version is scalar only. -HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy16BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdech, INS_sve_uqdech, INS_sve_sqdech, INS_sve_uqdech, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy32BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecw, INS_sve_uqdecw, INS_sve_sqdecw, INS_sve_uqdecw, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy64BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecd, INS_sve_uqdecd, INS_sve_sqdecd, INS_sve_uqdecd, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy16BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqinch, INS_sve_uqinch, INS_sve_sqinch, INS_sve_uqinch, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy32BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincw, INS_sve_uqincw, INS_sve_sqincw, INS_sve_uqincw, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincd, INS_sve_uqincd, INS_sve_sqincd, INS_sve_uqincd, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, StoreAndZipx2, -1, 3, {INS_sve_st2b, INS_sve_st2b, INS_sve_st2h, INS_sve_st2h, INS_sve_st2w, INS_sve_st2w, INS_sve_st2d, INS_sve_st2d, INS_sve_st2w, INS_sve_st2d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(Sve, StoreAndZipx3, -1, 3, {INS_sve_st3b, INS_sve_st3b, INS_sve_st3h, INS_sve_st3h, INS_sve_st3w, INS_sve_st3w, INS_sve_st3d, INS_sve_st3d, INS_sve_st3w, INS_sve_st3d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(Sve, StoreAndZipx4, -1, 3, {INS_sve_st4b, INS_sve_st4b, INS_sve_st4h, INS_sve_st4h, INS_sve_st4w, INS_sve_st4w, INS_sve_st4d, INS_sve_st4d, INS_sve_st4w, INS_sve_st4d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy16BitElementCountScalar, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdech, INS_sve_uqdech, INS_sve_sqdech, INS_sve_uqdech, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy32BitElementCountScalar, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecw, INS_sve_uqdecw, INS_sve_sqdecw, INS_sve_uqdecw, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy64BitElementCountScalar, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecd, INS_sve_uqdecd, INS_sve_sqdecd, INS_sve_uqdecd, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy16BitElementCountScalar, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqinch, INS_sve_uqinch, INS_sve_sqinch, INS_sve_uqinch, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy32BitElementCountScalar, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincw, INS_sve_uqincw, INS_sve_sqincw, INS_sve_uqincw, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCountScalar, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincd, INS_sve_uqincd, INS_sve_sqincd, INS_sve_uqincd, INS_invalid, INS_invalid, -1, -1, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, StoreAndZipx2, -1, 3, INS_sve_st2b, INS_sve_st2b, INS_sve_st2h, INS_sve_st2h, INS_sve_st2w, INS_sve_st2w, INS_sve_st2d, INS_sve_st2d, INS_sve_st2w, INS_sve_st2d, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(Sve, StoreAndZipx3, -1, 3, INS_sve_st3b, INS_sve_st3b, INS_sve_st3h, INS_sve_st3h, INS_sve_st3w, INS_sve_st3w, INS_sve_st3d, INS_sve_st3d, INS_sve_st3w, INS_sve_st3d, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(Sve, StoreAndZipx4, -1, 3, INS_sve_st4b, INS_sve_st4b, INS_sve_st4h, INS_sve_st4h, INS_sve_st4w, INS_sve_st4w, INS_sve_st4d, INS_sve_st4d, INS_sve_st4w, INS_sve_st4d, -1, -1, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) // Predicate variants of intrinsics, these are specialized for operating on TYP_MASK type values. -HARDWARE_INTRINSIC(Sve, And_Predicates, -1, 2, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, BitwiseClear_Predicates, -1, 2, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Or_Predicates, -1, 2, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Xor_Predicates, -1, 2, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ConditionalSelect_Predicates, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ZipHigh_Predicates, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, ZipLow_Predicates, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, UnzipEven_Predicates, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, UnzipOdd_Predicates, -1, 2, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, TransposeEven_Predicates, -1, 2, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, TransposeOdd_Predicates, -1, 2, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, ReverseElement_Predicates, -1, 1, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// SVE Implementation of VectorT Intrinsics -#define FIRST_NI_VectorT NI_Illegal -#define LAST_NI_VectorT NI_Illegal +HARDWARE_INTRINSIC(Sve, And_Predicates, -1, 2, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, BitwiseClear_Predicates, -1, 2, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Or_Predicates, -1, 2, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Xor_Predicates, -1, 2, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ConditionalSelect_Predicates, -1, 3, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ZipHigh_Predicates, -1, 2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, ZipLow_Predicates, -1, 2, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, UnzipEven_Predicates, -1, 2, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, UnzipOdd_Predicates, -1, 2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, TransposeEven_Predicates, -1, 2, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, TransposeOdd_Predicates, -1, 2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, ReverseElement_Predicates, -1, 1, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, -1, -1, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/hwintrinsiclistwasm.h b/src/coreclr/jit/hwintrinsiclistwasm.h index 692835d03476fc..d728e7da3f8e00 100644 --- a/src/coreclr/jit/hwintrinsiclistwasm.h +++ b/src/coreclr/jit/hwintrinsiclistwasm.h @@ -11,126 +11,75 @@ #ifdef FEATURE_HW_INTRINSICS // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Wasm Intrinsics -// TODO-WASM: Fill in correct flags and direct mappings here for Vector128 xplat intrinsics implementation -#define FIRST_NI_Vector128 NI_Vector128_As -HARDWARE_INTRINSIC(Vector128, As, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsNInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsNUInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsSByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, WithLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, WithUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_E, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Epsilon, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NaN, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeOne, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeZero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Pi, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_PositiveInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Tau, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) - -#define LAST_NI_Vector128 NI_Vector128_op_UnaryPlus - -#define FIRST_NI_PackedSimd NI_PackedSimd_Abs -HARDWARE_INTRINSIC(PackedSimd, Abs, 16, 1, {INS_i8x16_abs, INS_invalid, INS_i16x8_abs, INS_invalid, INS_i32x4_abs, INS_invalid, INS_i64x2_abs, INS_invalid, INS_f32x4_abs, INS_f64x2_abs }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Add, 16, 2, {INS_i8x16_add, INS_i8x16_add, INS_i16x8_add, INS_i16x8_add, INS_i32x4_add, INS_i32x4_add, INS_i64x2_add, INS_i64x2_add, INS_f32x4_add, INS_f64x2_add }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, AddPairwiseWidening, 16, 1, {INS_i16x8_extadd_pairwise_s_i8x16, INS_i16x8_extadd_pairwise_u_i8x16, INS_i32x4_extadd_pairwise_s_i16x8, INS_i32x4_extadd_pairwise_u_i16x8, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, AddSaturate, 16, 2, {INS_i8x16_add_sat_s, INS_i8x16_add_sat_u, INS_i16x8_add_sat_s, INS_i16x8_add_sat_u, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, AllTrue, 16, 1, {INS_i8x16_all_true, INS_i8x16_all_true, INS_i16x8_all_true, INS_i16x8_all_true, INS_i32x4_all_true, INS_i32x4_all_true, INS_i64x2_all_true, INS_i64x2_all_true, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, And, 16, 2, {INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, AndNot, 16, 2, {INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, AnyTrue, 16, 1, {INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(PackedSimd, AverageRounded, 16, 2, {INS_invalid, INS_i8x16_avgr_u, INS_invalid, INS_i16x8_avgr_u, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, Bitmask, 16, 1, {INS_i8x16_bitmask, INS_i8x16_bitmask, INS_i16x8_bitmask, INS_i16x8_bitmask, INS_i32x4_bitmask, INS_i32x4_bitmask, INS_i64x2_bitmask, INS_i64x2_bitmask, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, BitwiseSelect, 16, 3, {INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_ceil, INS_f64x2_ceil }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, CompareEqual, 16, 2, {INS_i8x16_eq, INS_i8x16_eq, INS_i16x8_eq, INS_i16x8_eq, INS_i32x4_eq, INS_i32x4_eq, INS_i64x2_eq, INS_i64x2_eq, INS_f32x4_eq, INS_f64x2_eq }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +// PackedSimd Intrinsics +#define FIRST_NI_PackedSimd NI_PackedSimd_Abs +HARDWARE_INTRINSIC(PackedSimd, Abs, 16, 1, INS_i8x16_abs, INS_invalid, INS_i16x8_abs, INS_invalid, INS_i32x4_abs, INS_invalid, INS_i64x2_abs, INS_invalid, INS_f32x4_abs, INS_f64x2_abs, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Add, 16, 2, INS_i8x16_add, INS_i8x16_add, INS_i16x8_add, INS_i16x8_add, INS_i32x4_add, INS_i32x4_add, INS_i64x2_add, INS_i64x2_add, INS_f32x4_add, INS_f64x2_add, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, AddPairwiseWidening, 16, 1, INS_i16x8_extadd_pairwise_s_i8x16, INS_i16x8_extadd_pairwise_u_i8x16, INS_i32x4_extadd_pairwise_s_i16x8, INS_i32x4_extadd_pairwise_u_i16x8, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, AddSaturate, 16, 2, INS_i8x16_add_sat_s, INS_i8x16_add_sat_u, INS_i16x8_add_sat_s, INS_i16x8_add_sat_u, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, AllTrue, 16, 1, INS_i8x16_all_true, INS_i8x16_all_true, INS_i16x8_all_true, INS_i16x8_all_true, INS_i32x4_all_true, INS_i32x4_all_true, INS_i64x2_all_true, INS_i64x2_all_true, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, And, 16, 2, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, INS_v128_and, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, AndNot, 16, 2, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, INS_v128_andnot, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, AnyTrue, 16, 1, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, INS_v128_any_true, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(PackedSimd, AverageRounded, 16, 2, INS_invalid, INS_i8x16_avgr_u, INS_invalid, INS_i16x8_avgr_u, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, Bitmask, 16, 1, INS_i8x16_bitmask, INS_i8x16_bitmask, INS_i16x8_bitmask, INS_i16x8_bitmask, INS_i32x4_bitmask, INS_i32x4_bitmask, INS_i64x2_bitmask, INS_i64x2_bitmask, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, BitwiseSelect, 16, 3, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, INS_v128_bitselect, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Ceiling, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_ceil, INS_f64x2_ceil, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, CompareEqual, 16, 2, INS_i8x16_eq, INS_i8x16_eq, INS_i16x8_eq, INS_i16x8_eq, INS_i32x4_eq, INS_i32x4_eq, INS_i64x2_eq, INS_i64x2_eq, INS_f32x4_eq, INS_f64x2_eq, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) // TODO-WASM-SIMD: ULONG slot below is INS_invalid because the WASM SIMD spec has no unsigned i64x2 compare (lt_u/le_u/gt_u/ge_u). Requires special codegen (e.g., XOR sign bit + signed compare) to support Vector128 ordered compares. Until that lands, route through impSpecialIntrinsic via HW_Flag_InvalidNodeId so calls fail with a clear NYI instead of recursing into the self-referential managed bodies in PackedSimd.cs. -HARDWARE_INTRINSIC(PackedSimd, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, CompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) - -HARDWARE_INTRINSIC(PackedSimd, CompareNotEqual, 16, 2, {INS_i8x16_ne, INS_i8x16_ne, INS_i16x8_ne, INS_i16x8_ne, INS_i32x4_ne, INS_i32x4_ne, INS_i64x2_ne, INS_i64x2_ne, INS_f32x4_ne, INS_f64x2_ne }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(PackedSimd, ConvertNarrowingSaturateSigned, 16, 2, {INS_invalid, INS_invalid, INS_i8x16_narrow_i16x8_s, INS_invalid, INS_i16x8_narrow_i32x4_s, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, ConvertNarrowingSaturateUnsigned, 16, 2, {INS_invalid, INS_invalid, INS_i8x16_narrow_i16x8_u, INS_invalid, INS_i16x8_narrow_i32x4_u, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, ConvertToDoubleLower, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f64x2_convert_low_s_i32x4, INS_f64x2_convert_low_u_i32x4, INS_invalid, INS_invalid, INS_f64x2_promote_low_f32x4, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, ConvertToInt32Saturate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_i32x4_trunc_sat_s_f32x4, INS_i32x4_trunc_sat_s_f64x2_zero}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, ConvertToSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_convert_s_i32x4, INS_f32x4_convert_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_demote_f64x2_zero }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, ConvertToUInt32Saturate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_i32x4_trunc_sat_u_f32x4, INS_i32x4_trunc_sat_u_f64x2_zero}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_div, INS_f64x2_div }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Dot, 16, 2, {INS_invalid, INS_invalid, INS_i32x4_dot_i16x8_s, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, ExtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_floor, INS_f64x2_floor }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, LoadScalarAndInsert, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, LoadScalarAndSplatVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(PackedSimd, LoadWideningVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, Max, 16, 2, {INS_i8x16_max_s, INS_i8x16_max_u, INS_i16x8_max_s, INS_i16x8_max_u, INS_i32x4_max_s, INS_i32x4_max_u, INS_invalid, INS_invalid, INS_f32x4_max, INS_f64x2_max }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, Min, 16, 2, {INS_i8x16_min_s, INS_i8x16_min_u, INS_i16x8_min_s, INS_i16x8_min_u, INS_i32x4_min_s, INS_i32x4_min_u, INS_invalid, INS_invalid, INS_f32x4_min, INS_f64x2_min }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_i16x8_mul, INS_i16x8_mul, INS_i32x4_mul, INS_i32x4_mul, INS_i64x2_mul, INS_i64x2_mul, INS_f32x4_mul, INS_f64x2_mul }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, MultiplyRoundedSaturateQ15, 16, 2, {INS_invalid, INS_invalid, INS_i16x8_q15mulr_sat_s, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, MultiplyWideningLower, 16, 2, {INS_i16x8_extmul_low_s_i8x16, INS_i16x8_extmul_low_u_i8x16, INS_i32x4_extmul_low_s_i16x8, INS_i32x4_extmul_low_u_i16x8, INS_i64x2_extmul_low_s_i32x4, INS_i64x2_extmul_low_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, MultiplyWideningUpper, 16, 2, {INS_i16x8_extmul_high_s_i8x16, INS_i16x8_extmul_high_u_i8x16, INS_i32x4_extmul_high_s_i16x8, INS_i32x4_extmul_high_u_i16x8, INS_i64x2_extmul_high_s_i32x4, INS_i64x2_extmul_high_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Negate, 16, 1, {INS_i8x16_neg, INS_i8x16_neg, INS_i16x8_neg, INS_i16x8_neg, INS_i32x4_neg, INS_i32x4_neg, INS_i64x2_neg, INS_i64x2_neg, INS_f32x4_neg, INS_f64x2_neg }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Not, 16, 1, {INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Or, 16, 2, {INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, PopCount, 16, 1, {INS_invalid, INS_i8x16_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, PseudoMax, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_pmax, INS_f64x2_pmax }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, PseudoMin, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_pmin, INS_f64x2_pmin }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, ReplaceScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, RoundToNearest, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_nearest, INS_f64x2_nearest }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, SignExtendWideningLower, 16, 1, {INS_i16x8_extend_low_s_i8x16, INS_i16x8_extend_low_s_i8x16, INS_i32x4_extend_low_s_i16x8, INS_i32x4_extend_low_s_i16x8, INS_i64x2_extend_low_s_i32x4, INS_i64x2_extend_low_s_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, SignExtendWideningUpper, 16, 1, {INS_i16x8_extend_high_s_i8x16, INS_i16x8_extend_high_s_i8x16, INS_i32x4_extend_high_s_i16x8, INS_i32x4_extend_high_s_i16x8, INS_i64x2_extend_high_s_i32x4, INS_i64x2_extend_high_s_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Splat, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_sqrt, INS_f64x2_sqrt }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(PackedSimd, StoreSelectedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(PackedSimd, Subtract, 16, 2, {INS_i8x16_sub, INS_i8x16_sub, INS_i16x8_sub, INS_i16x8_sub, INS_i32x4_sub, INS_i32x4_sub, INS_i64x2_sub, INS_i64x2_sub, INS_f32x4_sub, INS_f64x2_sub }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, SubtractSaturate, 16, 2, {INS_i8x16_sub_sat_s, INS_i8x16_sub_sat_u, INS_i16x8_sub_sat_s, INS_i16x8_sub_sat_u, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Swizzle, 16, 2, {INS_i8x16_swizzle, INS_i8x16_swizzle, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_trunc, INS_f64x2_trunc }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, Xor, 16, 2, {INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) -HARDWARE_INTRINSIC(PackedSimd, ZeroExtendWideningLower, 16, 1, {INS_i16x8_extend_low_u_i8x16, INS_i16x8_extend_low_u_i8x16, INS_i32x4_extend_low_u_i16x8, INS_i32x4_extend_low_u_i16x8, INS_i64x2_extend_low_u_i32x4, INS_i64x2_extend_low_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(PackedSimd, ZeroExtendWideningUpper, 16, 1, {INS_i16x8_extend_high_u_i8x16, INS_i16x8_extend_high_u_i8x16, INS_i32x4_extend_high_u_i16x8, INS_i32x4_extend_high_u_i16x8, INS_i64x2_extend_high_u_i32x4, INS_i64x2_extend_high_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid }, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, CompareGreaterThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, CompareGreaterThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, CompareLessThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, CompareLessThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, CompareNotEqual, 16, 2, INS_i8x16_ne, INS_i8x16_ne, INS_i16x8_ne, INS_i16x8_ne, INS_i32x4_ne, INS_i32x4_ne, INS_i64x2_ne, INS_i64x2_ne, INS_f32x4_ne, INS_f64x2_ne, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(PackedSimd, ConvertNarrowingSaturateSigned, 16, 2, INS_invalid, INS_invalid, INS_i8x16_narrow_i16x8_s, INS_invalid, INS_i16x8_narrow_i32x4_s, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, ConvertNarrowingSaturateUnsigned, 16, 2, INS_invalid, INS_invalid, INS_i8x16_narrow_i16x8_u, INS_invalid, INS_i16x8_narrow_i32x4_u, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, ConvertToDoubleLower, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f64x2_convert_low_s_i32x4, INS_f64x2_convert_low_u_i32x4, INS_invalid, INS_invalid, INS_f64x2_promote_low_f32x4, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, ConvertToInt32Saturate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_i32x4_trunc_sat_s_f32x4, INS_i32x4_trunc_sat_s_f64x2_zero, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, ConvertToSingle, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_convert_s_i32x4, INS_f32x4_convert_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_demote_f64x2_zero, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, ConvertToUInt32Saturate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_i32x4_trunc_sat_u_f32x4, INS_i32x4_trunc_sat_u_f64x2_zero, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Divide, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_div, INS_f64x2_div, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Dot, 16, 2, INS_invalid, INS_invalid, INS_i32x4_dot_i16x8_s, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, ExtractScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, Floor, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_floor, INS_f64x2_floor, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, LoadScalarAndInsert, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, LoadScalarAndSplatVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, LoadScalarVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, LoadVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(PackedSimd, LoadWideningVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, Max, 16, 2, INS_i8x16_max_s, INS_i8x16_max_u, INS_i16x8_max_s, INS_i16x8_max_u, INS_i32x4_max_s, INS_i32x4_max_u, INS_invalid, INS_invalid, INS_f32x4_max, INS_f64x2_max, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, Min, 16, 2, INS_i8x16_min_s, INS_i8x16_min_u, INS_i16x8_min_s, INS_i16x8_min_u, INS_i32x4_min_s, INS_i32x4_min_u, INS_invalid, INS_invalid, INS_f32x4_min, INS_f64x2_min, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, Multiply, 16, 2, INS_invalid, INS_invalid, INS_i16x8_mul, INS_i16x8_mul, INS_i32x4_mul, INS_i32x4_mul, INS_i64x2_mul, INS_i64x2_mul, INS_f32x4_mul, INS_f64x2_mul, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, MultiplyRoundedSaturateQ15, 16, 2, INS_invalid, INS_invalid, INS_i16x8_q15mulr_sat_s, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, MultiplyWideningLower, 16, 2, INS_i16x8_extmul_low_s_i8x16, INS_i16x8_extmul_low_u_i8x16, INS_i32x4_extmul_low_s_i16x8, INS_i32x4_extmul_low_u_i16x8, INS_i64x2_extmul_low_s_i32x4, INS_i64x2_extmul_low_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, MultiplyWideningUpper, 16, 2, INS_i16x8_extmul_high_s_i8x16, INS_i16x8_extmul_high_u_i8x16, INS_i32x4_extmul_high_s_i16x8, INS_i32x4_extmul_high_u_i16x8, INS_i64x2_extmul_high_s_i32x4, INS_i64x2_extmul_high_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Negate, 16, 1, INS_i8x16_neg, INS_i8x16_neg, INS_i16x8_neg, INS_i16x8_neg, INS_i32x4_neg, INS_i32x4_neg, INS_i64x2_neg, INS_i64x2_neg, INS_f32x4_neg, INS_f64x2_neg, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Not, 16, 1, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, INS_v128_not, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Or, 16, 2, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, INS_v128_or, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, PopCount, 16, 1, INS_invalid, INS_i8x16_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, PseudoMax, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_pmax, INS_f64x2_pmax, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, PseudoMin, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_pmin, INS_f64x2_pmin, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, ReplaceScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, RoundToNearest, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_nearest, INS_f64x2_nearest, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, ShiftLeft, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, ShiftRightArithmetic, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, ShiftRightLogical, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, SignExtendWideningLower, 16, 1, INS_i16x8_extend_low_s_i8x16, INS_i16x8_extend_low_s_i8x16, INS_i32x4_extend_low_s_i16x8, INS_i32x4_extend_low_s_i16x8, INS_i64x2_extend_low_s_i32x4, INS_i64x2_extend_low_s_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, SignExtendWideningUpper, 16, 1, INS_i16x8_extend_high_s_i8x16, INS_i16x8_extend_high_s_i8x16, INS_i32x4_extend_high_s_i16x8, INS_i32x4_extend_high_s_i16x8, INS_i64x2_extend_high_s_i32x4, INS_i64x2_extend_high_s_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Splat, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, Sqrt, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_sqrt, INS_f64x2_sqrt, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Store, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(PackedSimd, StoreSelectedScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(PackedSimd, Subtract, 16, 2, INS_i8x16_sub, INS_i8x16_sub, INS_i16x8_sub, INS_i16x8_sub, INS_i32x4_sub, INS_i32x4_sub, INS_i64x2_sub, INS_i64x2_sub, INS_f32x4_sub, INS_f64x2_sub, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, SubtractSaturate, 16, 2, INS_i8x16_sub_sat_s, INS_i8x16_sub_sat_u, INS_i16x8_sub_sat_s, INS_i16x8_sub_sat_u, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Swizzle, 16, 2, INS_i8x16_swizzle, INS_i8x16_swizzle, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Truncate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_f32x4_trunc, INS_f64x2_trunc, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, Xor, 16, 2, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, INS_v128_xor, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_Commutative) +HARDWARE_INTRINSIC(PackedSimd, ZeroExtendWideningLower, 16, 1, INS_i16x8_extend_low_u_i8x16, INS_i16x8_extend_low_u_i8x16, INS_i32x4_extend_low_u_i16x8, INS_i32x4_extend_low_u_i16x8, INS_i64x2_extend_low_u_i32x4, INS_i64x2_extend_low_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(PackedSimd, ZeroExtendWideningUpper, 16, 1, INS_i16x8_extend_high_u_i8x16, INS_i16x8_extend_high_u_i8x16, INS_i32x4_extend_high_u_i16x8, INS_i32x4_extend_high_u_i16x8, INS_i64x2_extend_high_u_i32x4, INS_i64x2_extend_high_u_i32x4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) #define LAST_NI_PackedSimd NI_PackedSimd_ZeroExtendWideningUpper #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 93424d9966f1bc..12bea05dbae65f 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -23,1292 +23,836 @@ */ // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Intrinsics for Vector128 -#define FIRST_NI_Vector128 NI_Vector128_Abs -HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, As, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsNInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsNUInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsSByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsd_simd, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConcatLowerLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConcatLowerUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConcatUpperLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConcatUpperUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, CreateAlternatingSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, CreateGeometricSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Equals, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, FusedMultiplyAdd, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, IsEvenInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsFinite, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNaN, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNegative, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNormal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsOddInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsPositive, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsSubnormal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, LessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LoadAligned, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, LoadAlignedNonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, LoadUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Max, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxMagnitude, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxMagnitudeNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Min, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinMagnitude, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinMagnitudeNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Reverse, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, SubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector128, ToVector256, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector128, ToVector512, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector128, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, UnzipEven, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, UnzipOdd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ZipLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ZipUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_E, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Epsilon, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Indices, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NaN, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeOne, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeZero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Pi, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_PositiveInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_SignSequence, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Tau, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -#define LAST_NI_Vector128 NI_Vector128_op_UnsignedRightShift - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Intrinsics for Vector256 -#define FIRST_NI_Vector256 NI_Vector256_Abs -HARDWARE_INTRINSIC(Vector256, Abs, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, AddSaturate, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, As, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsByte, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsDouble, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsInt16, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsNInt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsNUInt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsSByte, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsSingle, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsUInt16, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsVector, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Ceiling, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConcatLowerLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConcatLowerUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConcatUpperLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConcatUpperUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConditionalSelect, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToDouble, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToInt32Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToInt64Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToSingle, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt32Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt64Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateAlternatingSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateGeometricSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, Equals, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, EqualsAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ExtractMostSignificantBits, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, FusedMultiplyAdd, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector256, GetUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, GreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, IsEvenInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsFinite, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsNaN, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsNegative, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsNegativeInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsNormal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsOddInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsPositive, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsPositiveInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsSubnormal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsZero, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, LessThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LoadAligned, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, LoadAlignedNonTemporal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, LoadUnsafe, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MaxMagnitude, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MaxMagnitudeNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MaxNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MaxNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MinMagnitude, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MinMagnitudeNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MinNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MinNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MultiplyAddEstimate, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, NarrowWithSaturation, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, Reverse, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Round, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, ShuffleNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, ShuffleNativeFallback, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, StoreUnsafe, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, SubtractSaturate, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Sum, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector256, ToVector512, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector256, Truncate, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, UnzipEven, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, UnzipOdd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, WidenLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, WidenUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, WithLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, WithUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ZipLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, ZipUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_E, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Epsilon, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Indices, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_NaN, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_NegativeInfinity, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_NegativeOne, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_NegativeZero, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_One, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Pi, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_PositiveInfinity, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_SignSequence, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Tau, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_Addition, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_BitwiseAnd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_BitwiseOr, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_Division, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_RightShift, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_Subtraction, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_UnaryNegation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_UnaryPlus, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_UnsignedRightShift, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -#define LAST_NI_Vector256 NI_Vector256_op_UnsignedRightShift - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Intrinsics for Vector512 -#define FIRST_NI_Vector512 NI_Vector512_Abs -HARDWARE_INTRINSIC(Vector512, Abs, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, AddSaturate, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, As, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsDouble, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsNInt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsNUInt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsSByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsSingle, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsUInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsUInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsUInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsVector, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Ceiling, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, ConcatLowerLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, ConcatLowerUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, ConcatUpperLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, ConcatUpperUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, ConditionalSelect, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, ConvertToDouble, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt32Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt64Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToSingle, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt32Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt64Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateAlternatingSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, CreateGeometricSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Dot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Equals, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, EqualsAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ExtractMostSignificantBits, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Floor, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, FusedMultiplyAdd, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, GetElement, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector512, GetLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector512, GetLower128, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GetUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, GreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, IsEvenInteger, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsFinite, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsInfinity, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsInteger, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsNaN, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsNegative, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsNegativeInfinity, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsNormal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsOddInteger, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsPositive, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsPositiveInfinity, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsSubnormal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsZero, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, LessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, LoadUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Max, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MaxMagnitude, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MaxMagnitudeNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MaxNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MaxNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Min, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MinMagnitude, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MinMagnitudeNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MinNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MinNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MultiplyAddEstimate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, NarrowWithSaturation, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Reverse, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Round, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, ShuffleNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, ShuffleNativeFallback, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, SubtractSaturate, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Sum, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ToScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(Vector512, Truncate, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, UnzipEven, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, UnzipOdd, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, WidenLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, WidenUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, WithElement, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, WithLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, WithUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, ZipLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, ZipUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_AllBitsSet, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_E, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Epsilon, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Indices, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_NaN, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_NegativeInfinity, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_NegativeOne, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_NegativeZero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_One, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Pi, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_PositiveInfinity, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_SignSequence, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Tau, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Addition, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_BitwiseAnd, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_BitwiseOr, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Division, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Equality, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_RightShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Subtraction, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_UnaryNegation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_UnaryPlus, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -#define LAST_NI_Vector512 NI_Vector512_op_UnsignedRightShift - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for X86Base, SSE, SSE2, SSE3, SSSE3, SSE41, SSE42, POPCNT #define FIRST_NI_X86Base NI_X86Base_Abs -HARDWARE_INTRINSIC(X86Base, Abs, 16, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, Add, 16, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, AddSaturate, 16, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, AddSubtract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, AlignRight, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, And, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_andps, INS_andpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_andnps, INS_andnpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, Average, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, Blend, 16, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, 1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, BlendVariable, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_blendvps, INS_blendvpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CeilingScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_cmpps, INS_cmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, 1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, 5, 4, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, 5, 4, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, 3, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_cvttsd2si32}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, 5, 5, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int16, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int64, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, 4, 5, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, Crc32, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid}, 25, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, DotProduct, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, -1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, Extract, 16, 2, {INS_pextrb, INS_pextrb, INS_pextrw, INS_pextrw, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, 4, 4, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(X86Base, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, FloorScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, HorizontalAdd, 16, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, 3, 6, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, HorizontalAddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, HorizontalSubtract, 16, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, 3, 6, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, HorizontalSubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, Insert, 16, 3, {INS_pinsrb, INS_pinsrb, INS_pinsrw, INS_pinsrw, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid}, 4, 1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128NonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, LoadAndDuplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, LoadDquVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, LoadFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(X86Base, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(X86Base, MaskMove, 16, 3, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, Max, 16, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(X86Base, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_maxsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, MemoryFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(X86Base, Min, 16, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_minps, INS_minpd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(X86Base, MinHorizontal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_phminposuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_minsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, MoveAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, MoveHighAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, MoveHighToLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(X86Base, MoveLowAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, MoveLowToHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(X86Base, MoveMask, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, 1, 1, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(X86Base, MultipleSumAbsoluteDifferences, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, 5, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(X86Base, MultiplyHigh, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, MultiplyHighRoundScale, 16, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, Or, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_orps, INS_orpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, PackSignedSaturate, 16, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base, Prefetch0, 0, 1, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, Prefetch1, 0, 1, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, Prefetch2, 0, 1, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, PrefetchNonTemporal, 0, 1, {INS_invalid, INS_prefetchnta, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, Reciprocal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, ReciprocalScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, ReciprocalSqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, ReciprocalSqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, RoundCurrentDirection, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundCurrentDirectionScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, RoundToNearestInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundToNearestIntegerScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundToZeroScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShiftRightLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, Shuffle, 16, -1, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, 1, 1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShuffleHigh, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShuffleLow, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, Sign, 16, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, StoreFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(X86Base, StoreHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, StoreLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti32, INS_movnti32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, Subtract, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, SubtractSaturate, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, SumAbsoluteDifferences, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, TestC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, TestNotZAndNotC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, TestZ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(X86Base, UnpackHigh, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, UnpackLow, 16, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, Xor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_xorps, INS_xorpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Abs, 16, 1, INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, Add, 16, 2, INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, AddSaturate, 16, 2, INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, AddScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, AddSubtract, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, AlignRight, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, And, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_andps, INS_andpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, AndNot, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_andnps, INS_andnpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Average, 16, 2, INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, Blend, 16, 3, INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd, 1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, BlendVariable, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_blendvps, INS_blendvpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, Ceiling, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CeilingScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, CompareEqual, 16, 2, INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_cmpps, INS_cmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareGreaterThan, 16, 2, INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareGreaterThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareLessThan, 16, 2, INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareLessThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotLessThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotLessThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareOrdered, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareScalarEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarLessThan, 16, 2, INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, 1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarLessThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrdered, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedNotEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnordered, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThan, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThanOrEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedNotEqual, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, CompareUnordered, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Double, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid, 5, 4, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Int32, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Single, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, 5, 4, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128UInt32, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToInt32, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32, 3, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToInt32WithTruncation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_cvttsd2si32, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToUInt32, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Double, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid, 5, 5, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int16, 16, 1, INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32, 16, 1, INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq, 1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32WithTruncation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int64, 16, 1, INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Single, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps, 4, 5, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, Crc32, 0, 2, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid, 25, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base, Divide, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, DivideScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd, -1, -1, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, DotProduct, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd, -1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, Extract, 16, 2, INS_pextrb, INS_pextrb, INS_pextrw, INS_pextrw, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid, 4, 4, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsScalarT) +HARDWARE_INTRINSIC(X86Base, Floor, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, FloorScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, HorizontalAdd, 16, 2, INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_haddps, INS_haddpd, 3, 6, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, HorizontalAddSaturate, 16, 2, INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, HorizontalSubtract, 16, 2, INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd, 3, 6, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, HorizontalSubtractSaturate, 16, 2, INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, Insert, 16, 3, INS_pinsrb, INS_pinsrb, INS_pinsrw, INS_pinsrw, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid, 4, 1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128NonTemporal, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, LoadAndDuplicateToVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadDquVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, LoadFence, 0, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, LoadHigh, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadLow, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadScalarVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(X86Base, MaskMove, 16, 3, INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, Max, 16, 2, INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_maxps, INS_maxpd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MaxScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_maxsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, MemoryFence, 0, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, Min, 16, 2, INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_minps, INS_minpd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MinHorizontal, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_phminposuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, MinScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_minsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, MoveAndDuplicate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, MoveHighAndDuplicate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, MoveHighToLow, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MoveLowAndDuplicate, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, MoveLowToHigh, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MoveMask, 16, 1, INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, MoveScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_movss, INS_movsd_simd, 1, 1, HW_Category_SIMDScalar, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MultipleSumAbsoluteDifferences, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, Multiply, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd, 5, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyAddAdjacent, 16, 2, INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MultiplyHigh, 16, 2, INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyHighRoundScale, 16, 2, INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, MultiplyLow, 16, 2, INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Or, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_orps, INS_orpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, PackSignedSaturate, 16, 2, INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, PackUnsignedSaturate, 16, 2, INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, PopCount, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base, Prefetch0, 0, 1, INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch1, 0, 1, INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch2, 0, 1, INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, PrefetchNonTemporal, 0, 1, INS_invalid, INS_prefetchnta, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Reciprocal, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalSqrt, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalSqrtScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, RoundCurrentDirection, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundCurrentDirectionScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, RoundToNearestInteger, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundToNearestIntegerScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinity, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinityScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinity, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinityScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, RoundToZero, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundToZeroScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, -1, 8, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical, 16, 2, INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical128BitLane, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, ShiftRightArithmetic, 16, 2, INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftRightLogical, 16, 2, INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftRightLogical128BitLane, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Shuffle, 16, -1, INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd, 1, 1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShuffleHigh, 16, 2, INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShuffleLow, 16, 2, INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, Sign, 16, 2, INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, Sqrt, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, SqrtScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd, -1, -1, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Store, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreAligned, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, StoreAlignedNonTemporal, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, StoreFence, 0, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, StoreHigh, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreLow, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreNonTemporal, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti32, INS_movnti32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, Subtract, 16, 2, INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, SubtractSaturate, 16, 2, INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, SubtractScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, SumAbsoluteDifferences, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, TestC, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, TestNotZAndNotC, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, TestZ, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(X86Base, UnpackHigh, 16, 2, INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, UnpackLow, 16, 2, INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, Xor, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_xorps, INS_xorpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) #define LAST_NI_X86Base NI_X86Base_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // 64-bit only Intrinsics for X86Base, SSE, SSE2, SSE3, SSSE3, SSE41, SSE42, POPCNT #define FIRST_NI_X86Base_X64 NI_X86Base_X64_BigMul -HARDWARE_INTRINSIC(X86Base_X64, BigMul, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_imulEAX, INS_mulEAX, INS_invalid, INS_invalid}, 4, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic|HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, 3, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_cvttsd2si64}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, 3, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, Crc32, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid}, 57, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base_X64, Extract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pextrq, INS_pextrq, INS_invalid, INS_invalid}, 4, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsScalarT) -HARDWARE_INTRINSIC(X86Base_X64, Insert, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pinsrq, INS_pinsrq, INS_invalid, INS_invalid}, 4, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(X86Base_X64, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti64, INS_movnti64, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base_X64, BigMul, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_imulEAX, INS_mulEAX, INS_invalid, INS_invalid, 4, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic|HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Double, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Int64, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Single, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128UInt64, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64, 3, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64WithTruncation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_cvttsd2si64, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToUInt64, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, 3, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, Crc32, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid, 57, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base_X64, Extract, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pextrq, INS_pextrq, INS_invalid, INS_invalid, 4, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsScalarT) +HARDWARE_INTRINSIC(X86Base_X64, Insert, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pinsrq, INS_pinsrq, INS_invalid, INS_invalid, 4, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(X86Base_X64, PopCount, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, StoreNonTemporal, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti64, INS_movnti64, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_X86Base_X64 NI_X86Base_X64_StoreNonTemporal // Intrinsics for AVX #define FIRST_NI_AVX NI_AVX_Add -HARDWARE_INTRINSIC(AVX, Add, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX, AddSubtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, And, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, Blend, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, BlendVariable, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vblendvps, INS_vblendvpd}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_vbroadcastsd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastf32x4, INS_vbroadcastf32x4}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Ceiling, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Compare, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareGreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareGreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareLessThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareLessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotGreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotGreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotLessThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, -1, 4, HW_Category_IMM, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq}, 7, 7, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 7, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, -1, 7, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, 7, 7, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, 4, 4, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, Divide, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, DotProduct, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_invalid}, -1, 13, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, DuplicateEvenIndexed, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, DuplicateOddIndexed, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ExtractVector128, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, HorizontalAdd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, -1, 6, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, HorizontalSubtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, -1, 6, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, InsertVector128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, LoadAlignedVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, LoadDquVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, LoadVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX, MaskLoad, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX, MoveMask, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, -1, 5, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX, Or, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX, Permute, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_vpermilpd}, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX, Permute2x128, 32, 3, {INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, PermuteVar, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_vpermilpdvar}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Reciprocal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, ReciprocalSqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, RoundCurrentDirection, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToNearestInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToNegativeInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToPositiveInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToZero, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Shuffle, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Store, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, Subtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, TestC, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, 6, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(AVX, TestNotZAndNotC, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, 6, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(AVX, TestZ, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, 6, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) -HARDWARE_INTRINSIC(AVX, UnpackHigh, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, UnpackLow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Xor, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX, Add, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, AddSubtract, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, And, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd, -1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, AndNot, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd, -1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, Blend, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, BlendVariable, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vblendvps, INS_vblendvpd, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector256, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_vbroadcastsd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, BroadcastVector128ToVector256, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastf32x4, INS_vbroadcastf32x4, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Ceiling, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Compare, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareEqual, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareGreaterThan, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareGreaterThanOrEqual, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareLessThan, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareLessThanOrEqual, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotEqual, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotGreaterThan, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotGreaterThanOrEqual, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotLessThan, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, -1, 4, HW_Category_IMM, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, 7, 7, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32WithTruncation, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 7, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps, -1, 7, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Double, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid, 7, 7, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, 4, 4, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32WithTruncation, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Single, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, Divide, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, DotProduct, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_invalid, -1, 13, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, DuplicateEvenIndexed, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, DuplicateOddIndexed, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ExtractVector128, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, Floor, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, HorizontalAdd, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_haddps, INS_haddpd, -1, 6, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, HorizontalSubtract, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd, -1, 6, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, InsertVector128, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, LoadAlignedVector256, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, LoadDquVector256, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, LoadVector256, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX, MaskLoad, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, MaskStore, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd, -1, -1, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, Max, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX, Min, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX, MoveMask, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd, -1, 5, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, Multiply, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, Or, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd, -1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, Permute, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_vpermilpd, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX, Permute2x128, 32, 3, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, PermuteVar, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_vpermilpdvar, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Reciprocal, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, ReciprocalSqrt, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, RoundCurrentDirection, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToNearestInteger, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToNegativeInfinity, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToPositiveInfinity, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToZero, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, -1, 8, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Shuffle, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX, Sqrt, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Store, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX, StoreAligned, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, StoreAlignedNonTemporal, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, Subtract, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, TestC, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd, 6, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(AVX, TestNotZAndNotC, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd, 6, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(AVX, TestZ, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd, 6, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_ReturnsBoolean) +HARDWARE_INTRINSIC(AVX, UnpackHigh, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, UnpackLow, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Xor, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd, -1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) #define LAST_NI_AVX NI_AVX_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE #define FIRST_NI_AVX2 NI_AVX2_Abs -HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, And, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, AndNot, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_InvalidNodeId|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2, Average, 32, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, BitFieldExtract, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 2, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Blend, -1, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_vpblendd, INS_vpblendd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, BlendVariable, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector128, 16, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_movddup}, 1, 1, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector256, 32, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, 3, 3, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, CompareEqual, 32, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2, ConvertToVector128Half, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ph, INS_invalid}, -1, 5, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vcvtph2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2, ConvertToVector256Half, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ph, INS_invalid}, -1, -1, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vcvtph2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 7, 7, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2, ExtractLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, GatherMaskVector128, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, -1, -1, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, GatherMaskVector256, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, -1, -1, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, GatherVector128, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, -1, -1, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, GatherVector256, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, -1, -1, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, GetMaskUpToLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2, HorizontalAdd, 32, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, HorizontalAddSaturate, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, HorizontalSubtract, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, HorizontalSubtractSaturate, 32, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, InsertVector128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, LeadingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_lzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2, LoadAlignedVector256NonTemporal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, MaskLoad, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Max, 32, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, Min, 32, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, MoveMask, 32, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, MultipleSumAbsoluteDifferences, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, MultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplyAddAdjacent, 32, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, MultiplyAddNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplyAddNegatedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX2, MultiplyAddScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX2, MultiplyAddSubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplyHigh, 32, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, MultiplyHighRoundScale, 32, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, MultiplyLow, 32, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics|HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX2, MultiplySubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplySubtractAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplySubtractNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplySubtractNegatedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX2, MultiplySubtractScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX2, Or, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, PackSignedSaturate, 32, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, PackUnsignedSaturate, 32, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Permute2x128, 32, 3, {INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Permute4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, PermuteVar8x32, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, ResetLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical, 32, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical128BitLane, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, 2, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, ShiftRightArithmetic, 32, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_vpsravd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 2, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogical, 32, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogical128BitLane, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, 2, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, Shuffle, 32, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM) -HARDWARE_INTRINSIC(AVX2, ShuffleHigh, 32, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShuffleLow, 32, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, Sign, 32, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Subtract, 32, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, SubtractSaturate, 32, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, SumAbsoluteDifferences, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, TrailingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2, Add, 32, 2, INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, And, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, AndNot, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_InvalidNodeId|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2, Average, 32, 2, INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, BitFieldExtract, 0, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 2, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Blend, -1, 3, INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_vpblendd, INS_vpblendd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, BlendVariable, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector128, 16, 1, INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_movddup, 1, 1, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector256, 32, 1, INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd, 3, 3, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, BroadcastVector128ToVector256, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, CompareEqual, 32, 2, INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, ConvertToVector128Half, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ph, INS_invalid, -1, 5, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ConvertToVector128Single, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_vcvtph2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Half, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ph, INS_invalid, -1, -1, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Single, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_vcvtph2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 7, 7, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2, ExtractLowestSetBit, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, GatherMaskVector128, 16, 5, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd, -1, -1, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, GatherMaskVector256, 32, 5, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd, -1, -1, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, GatherVector128, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd, -1, -1, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, GatherVector256, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd, -1, -1, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, GetMaskUpToLowestSetBit, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2, HorizontalAdd, 32, 2, INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, HorizontalAddSaturate, 32, 2, INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, HorizontalSubtract, 32, 2, INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, HorizontalSubtractSaturate, 32, 2, INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, InsertVector128, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, LeadingZeroCount, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_lzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, LoadAlignedVector256NonTemporal, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, MaskLoad, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, MaskStore, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Max, 32, 2, INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, Min, 32, 2, INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MoveMask, 32, 1, INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, MultipleSumAbsoluteDifferences, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Multiply, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MultiplyAdd, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplyAddAdjacent, 32, 2, INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, MultiplyAddNegated, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplyAddNegatedScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX2, MultiplyAddScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX2, MultiplyAddSubtract, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplyHigh, 32, 2, INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MultiplyHighRoundScale, 32, 2, INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, MultiplyLow, 32, 2, INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MultiplyNoFlags, 0, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics|HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX2, MultiplySubtract, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplySubtractAdd, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplySubtractNegated, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplySubtractNegatedScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX2, MultiplySubtractScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX2, Or, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, PackSignedSaturate, 32, 2, INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, PackUnsignedSaturate, 32, 2, INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, ParallelBitDeposit, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, ParallelBitExtract, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Permute2x128, 32, 3, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Permute4x64, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, PermuteVar8x32, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, ResetLowestSetBit, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical, 32, 2, INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical128BitLane, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogicalVariable, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid, 2, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, ShiftRightArithmetic, 32, 2, INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftRightArithmeticVariable, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_vpsravd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 2, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogical, 32, 2, INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogical128BitLane, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogicalVariable, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid, 2, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, Shuffle, 32, 2, INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM) +HARDWARE_INTRINSIC(AVX2, ShuffleHigh, 32, 2, INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShuffleLow, 32, 2, INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, Sign, 32, 2, INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Subtract, 32, 2, INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, SubtractSaturate, 32, 2, INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, SumAbsoluteDifferences, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, TrailingZeroCount, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, ZeroHighBits, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) #define LAST_NI_AVX2 NI_AVX2_ZeroHighBits // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // 64-bit only Intrinsics for AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE #define FIRST_NI_AVX2_X64 NI_AVX2_X64_AndNot -HARDWARE_INTRINSIC(AVX2_X64, AndNot, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_SpecialImport|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, BitFieldExtract, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid}, 2, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, ExtractLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, GetMaskUpToLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2_X64, LeadingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_lzcnt, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2_X64, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics|HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX2_X64, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, ResetLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2_X64, TrailingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid}, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2_X64, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, AndNot, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_SpecialImport|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, BitFieldExtract, 0, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid, 2, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, ExtractLowestSetBit, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, GetMaskUpToLowestSetBit, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2_X64, LeadingZeroCount, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_lzcnt, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2_X64, MultiplyNoFlags, 0, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics|HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX2_X64, ParallelBitDeposit, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, ParallelBitExtract, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, ResetLowestSetBit, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2_X64, TrailingZeroCount, 0, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid, 3, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2_X64, ZeroHighBits, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) #define LAST_NI_AVX2_X64 NI_AVX2_X64_ZeroHighBits // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL #define FIRST_NI_AVX512 NI_AVX512_Abs -HARDWARE_INTRINSIC(AVX512, Abs, -1, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, Add, 64, -1, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, AddSaturate, 64, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, AlignRight, 64, 3, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, AlignRight32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, AlignRight64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, -1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, Average, 64, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, BlendVariable, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastScalarToVector512, 64, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, 3, 3, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_vbroadcastf32x4, INS_vbroadcastf64x2}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_vbroadcastf32x8, INS_vbroadcastf64x4}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Classify, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_IMM, HW_Flag_InvalidNodeId|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ClassifyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_IMM, HW_Flag_InvalidNodeId|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Compare, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_IMM, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareOrdered, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareUnordered, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, Compress, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompressStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, 5, 4, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi32, INS_vcvtsd2usi32}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Byte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Single, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_cvtpd2ps}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Double, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_cvtps2pd, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, 3, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, 3, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, DetectConflicts, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Divide, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, DuplicateOddIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Expand, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, ExpandLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti64x2, INS_vextracti64x2, INS_vextractf32x4, INS_vextractf64x2}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf32x8, INS_vextractf64x4}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, Fixup, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, -1, 4, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, -1, 4, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddSubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, GetExponent, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, GetExponentScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, GetMantissa, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, -1, 4, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, -1, 4, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti64x2, INS_vinserti64x2, INS_vinsertf32x4, INS_vinsertf64x2}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf32x8, INS_vinsertf64x4}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512NonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, MaskLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, MaskLoadAligned, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, MaskStoreAligned, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, Max, -1, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX512, Min, -1, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX512, MoveMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoContainment|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, Multiply, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, 5, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, MultiplyAddAdjacent, 64, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MultiplyHigh, 64, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, MultiplyHighRoundScale, 64, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_vporq, INS_vporq, INS_orps, INS_orpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, PackSignedSaturate, 64, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, PackUnsignedSaturate, 64, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Permute2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Permute4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Permute4x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar16x16x2, 32, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar16x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar16x32x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, PermuteVar2x64x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar32x16, 64, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar32x16x2, 64, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, PermuteVar4x32x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar4x64x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x16 , 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x16x2, 16, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x32x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x64x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, Range, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, -1, 4, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, RangeScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd}, -1, 4, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, Reciprocal14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Reciprocal14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, Reduce, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, -1, 4, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ReduceScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd}, -1, 4, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, RotateLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, RotateLeftVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, RotateRight, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, RotateRightVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, RoundScale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, -1, 8, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, RoundScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, -1, 8, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, Scale, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical128BitLane, 64, 2, {INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftRightLogical128BitLane, 64, 2, {INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Shuffle, 64, -1, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, 1, 1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Shuffle4x128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShuffleHigh, 64, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShuffleLow, 64, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Sqrt, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, Subtract, 64, -1, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, SubtractSaturate, 64, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferences, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, TernaryLogic, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, 1, 1, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, UnpackHigh, 64, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, UnpackLow, 64, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Abs, -1, 1, INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, Add, 64, -1, INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, AddSaturate, 64, 2, INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, AddScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, AlignRight, 64, 3, INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, AlignRight32, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, AlignRight64, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid, -1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, And, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_vpandq, INS_vpandq, INS_andps, INS_andpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, AndNot, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Average, 64, 2, INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, BlendVariable, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector128, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector256, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector512, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastScalarToVector512, 64, 1, INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd, 3, 3, HW_Category_SIMDScalar, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastVector128ToVector512, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_vbroadcastf32x4, INS_vbroadcastf64x2, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastVector256ToVector512, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_vbroadcastf32x8, INS_vbroadcastf64x4, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Classify, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_IMM, HW_Flag_InvalidNodeId|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ClassifyScalar, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_IMM, HW_Flag_InvalidNodeId|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Compare, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_IMM, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThan, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareLessThan, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThan, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThan, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqual, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareOrdered, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareUnordered, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, Compress, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompressStore, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Double, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Single, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, 5, 4, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToInt32, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToUInt32, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi32, INS_vcvtsd2usi32, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToUInt32WithTruncation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Byte, -1, 1, INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128ByteWithSaturation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Double, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16WithSaturation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32WithSaturation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64WithTruncation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByte, -1, 1, INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByteWithSaturation, -1, 1, INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Single, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16WithSaturation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithSaturation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithTruncation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64WithTruncation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Byte, 64, 1, INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256ByteWithSaturation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Double, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16WithSaturation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithSaturation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithTruncation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64WithTruncation, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByte, 64, 1, INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByteWithSaturation, 64, 1, INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Single, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_cvtpd2ps, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16WithSaturation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithSaturation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithTruncation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64WithTruncation, 32, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Double, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_cvtps2pd, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int16, 64, 1, INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32, 64, -1, INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, 3, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32WithTruncation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64, 64, -1, INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64WithTruncation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Single, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt16, 64, 1, INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32, 64, -1, INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid, 3, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32WithTruncation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64, 64, -1, INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64WithTruncation, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, DetectConflicts, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Divide, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, DivideScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd, -1, -1, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, DuplicateEvenIndexed, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, DuplicateOddIndexed, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Expand, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, ExpandLoad, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, ExtractVector128, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti64x2, INS_vextracti64x2, INS_vextractf32x4, INS_vextractf64x2, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, ExtractVector256, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf32x8, INS_vextractf64x4, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Fixup, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd, -1, 4, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, FixupScalar, 16, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd, -1, 4, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAdd, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegated, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegatedScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddSubtract, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtract, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractAdd, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegated, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegatedScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd, -1, 4, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, GetExponent, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, GetExponentScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, GetMantissa, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd, -1, 4, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, GetMantissaScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd, -1, 4, HW_Category_IMM, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, InsertVector128, 64, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti64x2, INS_vinserti64x2, INS_vinsertf32x4, INS_vinsertf64x2, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, InsertVector256, 64, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf32x8, INS_vinsertf64x4, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LeadingZeroCount, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512NonTemporal, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LoadVector512, 64, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, MaskLoad, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, MaskLoadAligned, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, MaskStore, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, MaskStoreAligned, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, Max, -1, 2, INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512, Min, -1, 2, INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512, MoveMask, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, 3, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoContainment|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, Multiply, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd, 5, 4, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, MultiplyAddAdjacent, 64, 2, INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MultiplyHigh, 64, 2, INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, MultiplyHighRoundScale, 64, 2, INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MultiplyLow, -1, 2, INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, MultiplyScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, Or, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_vporq, INS_vporq, INS_orps, INS_orpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, PackSignedSaturate, 64, 2, INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PackUnsignedSaturate, 64, 2, INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Permute2x64, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Permute4x32, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid, -1, 1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Permute4x64, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x16, 32, 2, INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x16x2, 32, 3, INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x32, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x32x2, 64, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar2x64, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PermuteVar2x64x2, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar32x16, 64, 2, INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar32x16x2, 64, 3, INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x32, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid, -1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x32x2, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x64, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x64x2, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x16 , 16, 2, INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x16x2, 16, 3, INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 6, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x32x2, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x64, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x64x2, 64, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd, 3, 3, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, Range, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd, -1, 4, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RangeScalar, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd, -1, 4, HW_Category_IMM, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Reciprocal14, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Reciprocal14Scalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14Scalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Reduce, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd, -1, 4, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ReduceScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd, -1, 4, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, RotateLeft, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RotateLeftVariable, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RotateRight, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RotateRightVariable, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RoundScale, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd, -1, 8, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RoundScaleScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd, -1, 8, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Scale, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ScaleScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical, 64, 2, INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical128BitLane, 64, 2, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogicalVariable, -1, 2, INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ShiftRightArithmetic, -1, 2, INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightArithmeticVariable, -1, 2, INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogical, 64, 2, INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogical128BitLane, 64, 2, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogicalVariable, -1, 2, INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Shuffle, 64, -1, INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd, 1, 1, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Shuffle2x128, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Shuffle4x128, 64, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2, 3, 3, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShuffleHigh, 64, 2, INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShuffleLow, 64, 2, INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Sqrt, 64, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SqrtScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd, -1, -1, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, Store, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, StoreAligned, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, StoreAlignedNonTemporal, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Subtract, 64, -1, INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SubtractSaturate, 64, 2, INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, SubtractScalar, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd, -1, 4, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferences, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferencesInBlock32, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, TernaryLogic, -1, 4, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq, 1, 1, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, UnpackHigh, 64, 2, INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, UnpackLow, 64, 2, INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Xor, 64, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) #define LAST_NI_AVX512 NI_AVX512_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // 64-bit only Intrinsics for AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL #define FIRST_NI_AVX512_X64 NI_AVX512_X64_ConvertScalarToVector128Double -HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, 5, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, 5, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512_X64, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi64, INS_vcvtsd2usi64}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Double, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid, 5, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Single, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid, 5, -1, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToInt64, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64, 16, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi64, INS_vcvtsd2usi64, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64WithTruncation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) #define LAST_NI_AVX512_X64 NI_AVX512_X64_ConvertToUInt64WithTruncation // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX512-IFMA, AVX512-VBMI #define FIRST_NI_AVX512v2 NI_AVX512v2_MultiShift -HARDWARE_INTRINSIC(AVX512v2, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar32x8x2, 32, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar64x8x2, 64, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512v2, MultiShift, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar16x8, 16, 2, INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar16x8x2, 16, 3, INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar32x8, 32, 2, INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar32x8x2, 32, 3, INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar64x8, 64, 2, INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar64x8x2, 64, 3, INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) #define LAST_NI_AVX512v2 NI_AVX512v2_PermuteVar64x8x2 // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX512-BITALG, AVX512-VBMI2, AVX512-VPOPCNTDQ, AVX512-VNNI #define FIRST_NI_AVX512v3 NI_AVX512v3_Compress -HARDWARE_INTRINSIC(AVX512v3, Compress, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512v3, CompressStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512v3, Expand, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512v3, ExpandLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512v3, MultiplyWideningAndAdd, 64, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512v3, MultiplyWideningAndAddSaturate, 64, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512v3, Compress, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512v3, CompressStore, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512v3, Expand, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512v3, ExpandLoad, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512v3, MultiplyWideningAndAdd, 64, 3, INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512v3, MultiplyWideningAndAddSaturate, 64, 3, INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVX512v3 NI_AVX512v3_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX10v2 #define FIRST_NI_AVX10v2 NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32 -HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToInt32WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis32, INS_vcvttsd2sis32}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToUInt32WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis32, INS_vcvttsd2usis32}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, -1, 4, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd}, -1, 4, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_vmovd_simd, INS_vmovd_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToInt32WithTruncatedSaturation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis32, INS_vcvttsd2sis32, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid, -1, 4, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToUInt32WithTruncatedSaturation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis32, INS_vcvttsd2usis32, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncatedSaturation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncatedSaturation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncatedSaturation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncatedSaturation, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs, -1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd, -1, 4, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd, -1, 4, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, MoveScalar, 16, -1, INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_vmovd_simd, INS_vmovd_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SIMDScalar, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, 64, 3, INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVX10v2 NI_AVX10v2_StoreScalar // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // 64-bit only intrinsics for AVX10v2 #define FIRST_NI_AVX10v2_X64 NI_AVX10v2_X64_ConvertToInt64WithTruncatedSaturation -HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToInt64WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis64, INS_vcvttsd2sis64}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToUInt64WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis64, INS_vcvttsd2usis64}, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToInt64WithTruncatedSaturation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis64, INS_vcvttsd2sis64, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToUInt64WithTruncatedSaturation, 16, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis64, INS_vcvttsd2usis64, -1, 7, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) #define LAST_NI_AVX10v2_X64 NI_AVX10v2_X64_ConvertToUInt64WithTruncatedSaturation #define FIRST_NI_AVX512BMM NI_AVX512BMM_BitMultiplyMatrix16x16WithOrReduction -HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithOrReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacor16x16x16, INS_vbmacor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithXorReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacxor16x16x16, INS_vbmacxor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BMM, ReverseBits, -1, -1, {INS_invalid, INS_vbitrev, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithOrReduction, -1, -1, INS_invalid, INS_invalid, INS_vbmacor16x16x16, INS_vbmacor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithXorReduction, -1, -1, INS_invalid, INS_invalid, INS_vbmacxor16x16x16, INS_vbmacxor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512BMM, ReverseBits, -1, -1, INS_invalid, INS_vbitrev, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) #define LAST_NI_AVX512BMM NI_AVX512BMM_ReverseBits // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVXVNNI #define FIRST_NI_AVXVNNI NI_AVXVNNI_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) #define LAST_NI_AVXVNNI NI_AVXVNNI_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT Intrinsics #define FIRST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) #define LAST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT_V512 Intrinsics #define FIRST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAdd, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSaturate, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) #define LAST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AES, PCLMULQDQ #define FIRST_NI_AES NI_AES_CarrylessMultiply -HARDWARE_INTRINSIC(AES, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, 7, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AES, Decrypt, 16, 2, {INS_invalid, INS_aesdec, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, DecryptLast, 16, 2, {INS_invalid, INS_aesdeclast, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, Encrypt, 16, 2, {INS_invalid, INS_aesenc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, EncryptLast, 16, 2, {INS_invalid, INS_aesenclast, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, InverseMixColumns, 16, 1, {INS_invalid, INS_aesimc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 8, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, KeygenAssist, 16, 2, {INS_invalid, INS_aeskeygenassist, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 6, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, CarrylessMultiply, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid, 7, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AES, Decrypt, 16, 2, INS_invalid, INS_aesdec, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, DecryptLast, 16, 2, INS_invalid, INS_aesdeclast, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, Encrypt, 16, 2, INS_invalid, INS_aesenc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, EncryptLast, 16, 2, INS_invalid, INS_aesenclast, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, InverseMixColumns, 16, 1, INS_invalid, INS_aesimc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 8, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, KeygenAssist, 16, 2, INS_invalid, INS_aeskeygenassist, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 6, -1, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) #define LAST_NI_AES NI_AES_KeygenAssist // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for VAES, VPCLMULQDQ #define FIRST_NI_AES_V256 NI_AES_V256_CarrylessMultiply -HARDWARE_INTRINSIC(AES_V256, CarrylessMultiply, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, 7, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AES_V256, CarrylessMultiply, 32, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid, 7, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) #define LAST_NI_AES_V256 NI_AES_V256_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX Intrinsics for VAES, VPCLMULQDQ #define FIRST_NI_AES_V512 NI_AES_V512_CarrylessMultiply -HARDWARE_INTRINSIC(AES_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, 7, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AES_V512, CarrylessMultiply, 64, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid, 7, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) #define LAST_NI_AES_V512 NI_AES_V512_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512 Intrinsics for X86Serialize #define FIRST_NI_X86Serialize NI_X86Serialize_Serialize -HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, -1, -1, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) #define LAST_NI_X86Serialize NI_X86Serialize_Serialize // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for GFNI #define FIRST_NI_GFNI NI_GFNI_GaloisFieldAffineTransform -HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransform, 16, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransformInverse, 16, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI, GaloisFieldMultiply, 16, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransform, 16, 3, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransformInverse, 16, 3, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI, GaloisFieldMultiply, 16, 2, INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) #define LAST_NI_GFNI NI_GFNI_GaloisFieldMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX Intrinsics for GFNI #define FIRST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldAffineTransform -HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransform, 32, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransformInverse, 32, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldMultiply, 32, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransform, 32, 3, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransformInverse, 32, 3, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldMultiply, 32, 2, INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) #define LAST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512 Intrinsics for GFNI #define FIRST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldAffineTransform -HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransform, 64, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransformInverse, 64, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, 64, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransform, 64, 3, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransformInverse, 64, 3, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, 3, -1, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, 64, 2, INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 3, -1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) #define LAST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags +// TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Special intrinsics that are generated during lowering -HARDWARE_INTRINSIC(X86Base, COMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, PTEST, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, UCOMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, -1, 3, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(AVX, PTEST, -1, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, AndNotVector, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_invalid, INS_invalid}, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, AndNotScalar, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX512, KORTEST, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, 4, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX512, KTEST, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, 4, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX512, PTESTM, 0, 2, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, 4, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, PTESTNM, 0, 2, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, 4, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) - -HARDWARE_INTRINSIC(AVX512, AddMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, 4, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, AndMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, AndNotMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, BlendVariableMask, -1, 3, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ClassifyMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfpclassps, INS_vfpclasspd}, -1, 3, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ClassifyScalarMask, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfpclassss, INS_vfpclasssd}, -1, 3, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, CompareMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, -1, 4, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareEqualMask, -1, 2, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AVX512, CompareGreaterThanMask, -1, 2, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareNotEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareNotLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareOrderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareScalarMask, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpss, INS_vcmpsd}, -1, 4, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareUnorderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompressMask, -1, 3, {INS_vpcompressb, INS_vpcompressb, INS_vpcompressw, INS_vpcompressw, INS_vpcompressd, INS_vpcompressd, INS_vpcompressq, INS_vpcompressq, INS_vcompressps, INS_vcompresspd}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, CompressStoreMask, -1, 3, {INS_vpcompressb, INS_vpcompressb, INS_vpcompressw, INS_vpcompressw, INS_vpcompressd, INS_vpcompressd, INS_vpcompressq, INS_vpcompressq, INS_vcompressps, INS_vcompresspd}, -1, -1, HW_Category_MemoryStore, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ConvertMaskToVector, -1, 1, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, ConvertVectorToMask, -1, 1, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, ExpandLoadMask, -1, 3, {INS_vpexpandb, INS_vpexpandb, INS_vpexpandw, INS_vpexpandw, INS_vpexpandd, INS_vpexpandd, INS_vpexpandq, INS_vpexpandq, INS_vexpandps, INS_vexpandpd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ExpandMask, -1, 3, {INS_vpexpandb, INS_vpexpandb, INS_vpexpandw, INS_vpexpandw, INS_vpexpandd, INS_vpexpandd, INS_vpexpandq, INS_vpexpandq, INS_vexpandps, INS_vexpandpd}, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MaskLoadMask, -1, 3, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MaskLoadAlignedMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MaskStoreMask, -1, 3, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, MaskStoreAlignedMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, NotMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, OrMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ShiftLeftMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, 4, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ShiftRightMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 4, 4, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, XorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, XnorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base, COMIS, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, PTEST, 16, 2, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid, 4, -1, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, UCOMIS, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, -1, 3, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX, PTEST, -1, 2, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, AndNotVector, 32, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_invalid, INS_invalid, 1, -1, HW_Category_SimpleSIMD, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, AndNotScalar, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, -1, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX512, KORTEST, 0, -1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, 4, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, KTEST, 0, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, 4, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, PTESTM, 0, 2, INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq, 4, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, PTESTNM, 0, 2, INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq, 4, 4, HW_Category_SimpleSIMD, HW_Flag_Commutative) + +HARDWARE_INTRINSIC(AVX512, AddMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, 4, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, AndMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, AndNotMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, BlendVariableMask, -1, 3, INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ClassifyMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfpclassps, INS_vfpclasspd, -1, 3, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ClassifyScalarMask, 16, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfpclassss, INS_vfpclasssd, -1, 3, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, CompareMask, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd, -1, 4, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareEqualMask, -1, 2, INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanMask, -1, 2, INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqualMask, -1, 2, INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareLessThanMask, -1, 2, INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqualMask, -1, 2, INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotEqualMask, -1, 2, INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanMask, -1, 2, INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqualMask, -1, 2, INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanMask, -1, 2, INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqualMask, -1, 2, INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, 1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareOrderedMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareScalarMask, 16, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpss, INS_vcmpsd, -1, 4, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareUnorderedMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd, -1, 4, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompressMask, -1, 3, INS_vpcompressb, INS_vpcompressb, INS_vpcompressw, INS_vpcompressw, INS_vpcompressd, INS_vpcompressd, INS_vpcompressq, INS_vpcompressq, INS_vcompressps, INS_vcompresspd, 3, 3, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, CompressStoreMask, -1, 3, INS_vpcompressb, INS_vpcompressb, INS_vpcompressw, INS_vpcompressw, INS_vpcompressd, INS_vpcompressd, INS_vpcompressq, INS_vpcompressq, INS_vcompressps, INS_vcompresspd, -1, -1, HW_Category_MemoryStore, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ConvertMaskToVector, -1, 1, INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q, -1, -1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ConvertVectorToMask, -1, 1, INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ExpandLoadMask, -1, 3, INS_vpexpandb, INS_vpexpandb, INS_vpexpandw, INS_vpexpandw, INS_vpexpandd, INS_vpexpandd, INS_vpexpandq, INS_vpexpandq, INS_vexpandps, INS_vexpandpd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ExpandMask, -1, 3, INS_vpexpandb, INS_vpexpandb, INS_vpexpandw, INS_vpexpandw, INS_vpexpandd, INS_vpexpandd, INS_vpexpandq, INS_vpexpandq, INS_vexpandps, INS_vexpandpd, 3, 3, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MaskLoadMask, -1, 3, INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MaskLoadAlignedMask, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd, -1, -1, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MaskStoreMask, -1, 3, INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, MaskStoreAlignedMask, -1, 3, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd, -1, -1, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, NotMask, -1, 1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, OrMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ShiftLeftMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, 4, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ShiftRightMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 4, 4, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, XorMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, XnorMask, -1, 2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, 1, 1, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) #endif // FEATURE_HW_INTRINSIC #undef HARDWARE_INTRINSIC diff --git a/src/coreclr/jit/hwintrinsicwasm.cpp b/src/coreclr/jit/hwintrinsicwasm.cpp index 2f25e4506e12ce..25ab03cdb2f9b0 100644 --- a/src/coreclr/jit/hwintrinsicwasm.cpp +++ b/src/coreclr/jit/hwintrinsicwasm.cpp @@ -76,6 +76,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, unsigned simdSize, bool mustExpand) { + CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); + + if (isa == InstructionSet_Vector) + { + return impXplatIntrinsic(intrinsic, clsHnd, method, sig R2RARG(entryPoint), simdBaseType, retType, simdSize, + mustExpand); + } + assert(varTypeIsArithmetic(simdBaseType)); GenTree* retNode = nullptr; @@ -84,15 +92,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, switch (intrinsic) { - case NI_Vector128_GetElement: - case NI_Vector128_WithElement: - { - // Vector128.GetElement / WithElement have valid managed implementations in - // Vector128.cs. Return nullptr to let the importer fall back to those rather - // than asserting / NYI'ing in the JIT. - return nullptr; - } - // The following PackedSimd intrinsics are not yet implemented on WASM. Because they are must-expand, // when we return nullptr here the importer will insert a PlatformNotSupportedException throw. case NI_PackedSimd_CompareGreaterThan: @@ -154,12 +153,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_PackedSimd_Splat: break; - case NI_Vector128_Create: - { - retNode = impSimdCreate(intrinsic, sig, simdBaseType, retType, simdSize); - break; - } - default: { break; diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 17ed42ed753151..82e60f67a54785 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1240,2984 +1240,121 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, unsigned simdSize, bool mustExpand) { - GenTree* retNode = nullptr; - GenTree* op1 = nullptr; - GenTree* op2 = nullptr; - GenTree* op3 = nullptr; - GenTree* op4 = nullptr; - - CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); - - if (simdSize != 0) - { - assert(varTypeIsArithmetic(simdBaseType)); - } - -#if defined(FEATURE_READYTORUN) - CORINFO_CONST_LOOKUP emptyEntryPoint; - - emptyEntryPoint.addr = nullptr; - emptyEntryPoint.accessType = IAT_VALUE; -#endif // FEATURE_READYTORUN - - bool isMinMaxIntrinsic = false; - bool isMax = false; - bool isMagnitude = false; - bool isNative = false; - bool isNumber = false; - - switch (intrinsic) - { - case NI_Vector128_Abs: - case NI_Vector256_Abs: - case NI_Vector512_Abs: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || varTypeIsUnsigned(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdAbsNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_op_Addition: - case NI_Vector256_op_Addition: - case NI_Vector512_op_Addition: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_AVX2_AndNot: - { - if (varTypeIsSIMD(retType)) - { - intrinsic = NI_AVX2_AndNotVector; - simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig); - compFloatingPointUsed = true; - } - else - { - intrinsic = NI_AVX2_AndNotScalar; - } - FALLTHROUGH; - } - - case NI_X86Base_AndNot: - case NI_AVX_AndNot: - case NI_AVX2_X64_AndNot: - case NI_AVX512_AndNot: - { - assert(sig->numArgs == 2); - - if (simdSize != 0) - { - // We don't want to support creating AND_NOT nodes prior to LIR - // as it can break important optimizations. We'll produces this - // in lowering instead so decompose into the individual operations - // on import, taking into account that despite the name, these APIs - // do (~op1 & op2), so we need to account for that - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseType, simdSize)); - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseType, simdSize); - } - else - { - // The same general reasoning for the decomposition exists here as - // given above for the SIMD AndNot APIs. - - op2 = impPopStack().val; - op1 = impPopStack().val; - - op1 = gtFoldExpr(gtNewOperNode(GT_NOT, retType, op1)); - retNode = gtNewOperNode(GT_AND, retType, op1, op2); - } - break; - } - - case NI_Vector128_AddSaturate: - case NI_Vector256_AddSaturate: - case NI_Vector512_AddSaturate: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (varTypeIsFloating(simdBaseType)) - { - retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); - } - else if (varTypeIsSmall(simdBaseType)) - { - if (simdSize == 64) - { - intrinsic = NI_AVX512_AddSaturate; - } - else if (simdSize == 32) - { - intrinsic = NI_AVX2_AddSaturate; - } - else - { - assert(simdSize == 16); - intrinsic = NI_X86Base_AddSaturate; - } - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - } - else if (varTypeIsUnsigned(simdBaseType)) - { - // For unsigned we simply have to detect `(x + y) < x` - // and in that scenario return MaxValue (AllBitsSet) - - GenTree* cns = gtNewAllBitsSetConNode(retType); - GenTree* op1Dup1 = fgMakeMultiUse(&op1); - - GenTree* tmp = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); - GenTree* tmpDup1 = fgMakeMultiUse(&tmp); - GenTree* msk = gtNewSimdCmpOpNode(GT_LT, retType, tmp, op1Dup1, simdBaseType, simdSize); - - retNode = gtNewSimdCndSelNode(retType, msk, cns, tmpDup1, simdBaseType, simdSize); - } - else - { - // For signed the logic is a bit more complex, but is - // explained on the managed side as part of Scalar.AddSaturate - - GenTreeVecCon* minCns = gtNewVconNode(retType); - GenTreeVecCon* maxCns = gtNewVconNode(retType); - - switch (simdBaseType) - { - case TYP_SHORT: - { - minCns->EvaluateBroadcastInPlace(INT16_MIN); - maxCns->EvaluateBroadcastInPlace(INT16_MAX); - break; - } - - case TYP_INT: - { - minCns->EvaluateBroadcastInPlace(INT32_MIN); - maxCns->EvaluateBroadcastInPlace(INT32_MAX); - break; - } - - case TYP_LONG: - { - minCns->EvaluateBroadcastInPlace(INT64_MIN); - maxCns->EvaluateBroadcastInPlace(INT64_MAX); - break; - } - - default: - { - unreached(); - } - } - - GenTree* op1Dup1 = fgMakeMultiUse(&op1); - GenTree* op2Dup1 = fgMakeMultiUse(&op2); - - GenTree* tmp = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseType, simdSize); - - GenTree* tmpDup1 = fgMakeMultiUse(&tmp); - GenTree* tmpDup2 = gtCloneExpr(tmpDup1); - - GenTree* msk = gtNewSimdIsNegativeNode(retType, tmpDup1, simdBaseType, simdSize); - GenTree* ovf = gtNewSimdCndSelNode(retType, msk, maxCns, minCns, simdBaseType, simdSize); - - // The mask we need is ((a ^ b) & ~(b ^ c)) < 0 - - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - // tmpDup1 = a: 0xF0 - // op1Dup1 = b: 0xCC - // op2Dup2 = c: 0xAA - // - // 0x18 = A ? norBC : andBC - // a ? ~(b | c) : (b & c) - msk = gtNewSimdTernaryLogicNode(retType, tmp, op1Dup1, op2Dup1, gtNewIconNode(0x18), - simdBaseType, simdSize); - } - else - { - GenTree* op1Dup2 = gtCloneExpr(op1Dup1); - - GenTree* msk2 = gtNewSimdBinOpNode(GT_XOR, retType, tmp, op1Dup1, simdBaseType, simdSize); - GenTree* msk3 = gtNewSimdBinOpNode(GT_XOR, retType, op1Dup2, op2Dup1, simdBaseType, simdSize); - - msk = gtNewSimdBinOpNode(GT_AND_NOT, retType, msk2, msk3, simdBaseType, simdSize); - } - - msk = gtNewSimdIsNegativeNode(retType, msk, simdBaseType, simdSize); - retNode = gtNewSimdCndSelNode(retType, msk, ovf, tmpDup2, simdBaseType, simdSize); - } - } - break; - } - - case NI_Vector128_AndNot: - case NI_Vector256_AndNot: - case NI_Vector512_AndNot: - { - assert(sig->numArgs == 2); - - // We don't want to support creating AND_NOT nodes prior to LIR - // as it can break important optimizations. We'll produces this - // in lowering instead so decompose into the individual operations - // on import - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - op2 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseType, simdSize)); - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_As: - case NI_Vector128_AsByte: - case NI_Vector128_AsDouble: - case NI_Vector128_AsInt16: - case NI_Vector128_AsInt32: - case NI_Vector128_AsInt64: - case NI_Vector128_AsNInt: - case NI_Vector128_AsNUInt: - case NI_Vector128_AsSByte: - case NI_Vector128_AsSingle: - case NI_Vector128_AsUInt16: - case NI_Vector128_AsUInt32: - case NI_Vector128_AsUInt64: - case NI_Vector128_AsVector4: - case NI_Vector256_As: - case NI_Vector256_AsByte: - case NI_Vector256_AsDouble: - case NI_Vector256_AsInt16: - case NI_Vector256_AsInt32: - case NI_Vector256_AsInt64: - case NI_Vector256_AsNInt: - case NI_Vector256_AsNUInt: - case NI_Vector256_AsSByte: - case NI_Vector256_AsSingle: - case NI_Vector256_AsUInt16: - case NI_Vector256_AsUInt32: - case NI_Vector256_AsUInt64: - case NI_Vector512_As: - case NI_Vector512_AsByte: - case NI_Vector512_AsDouble: - case NI_Vector512_AsInt16: - case NI_Vector512_AsInt32: - case NI_Vector512_AsInt64: - case NI_Vector512_AsNInt: - case NI_Vector512_AsNUInt: - case NI_Vector512_AsSByte: - case NI_Vector512_AsSingle: - case NI_Vector512_AsUInt16: - case NI_Vector512_AsUInt32: - case NI_Vector512_AsUInt64: - { - // We fold away the cast here, as it only exists to satisfy - // the type system. It is safe to do this here since the retNode type - // and the signature return type are both the same TYP_SIMD. - - assert(sig->numArgs == 1); - - retNode = impSIMDPopStack(); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); - break; - } - - case NI_Vector128_AsVector2: - case NI_Vector128_AsVector3: - { - assert(sig->numArgs == 1); - assert((simdSize == 16) && (simdBaseType == TYP_FLOAT)); - assert((retType == TYP_SIMD8) || (retType == TYP_SIMD12)); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); - break; - } - - case NI_Vector128_AsVector128: - { - assert(sig->numArgs == 1); - assert(retType == TYP_SIMD16); - assert(HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic)); - - var_types op1SimdBaseType = - getBaseTypeAndSizeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args), &simdSize); - - assert(simdBaseType == op1SimdBaseType); - - switch (getSIMDTypeForSize(simdSize)) - { - case TYP_SIMD8: - { - assert((simdSize == 8) && (simdBaseType == TYP_FLOAT)); - - op1 = impSIMDPopStack(); - - if (op1->IsCnsVec()) - { - GenTreeVecCon* vecCon = op1->AsVecCon(); - vecCon->gtType = TYP_SIMD16; - - vecCon->gtSimdVal.f32[2] = 0.0f; - vecCon->gtSimdVal.f32[3] = 0.0f; - - return vecCon; - } - - op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseType, 8); - - GenTree* idx = gtNewIconNode(2, TYP_INT); - GenTree* zero = gtNewZeroConNode(TYP_FLOAT); - op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseType, 16); - - idx = gtNewIconNode(3, TYP_INT); - zero = gtNewZeroConNode(TYP_FLOAT); - retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseType, 16); - - break; - } - - case TYP_SIMD12: - { - assert((simdSize == 12) && (simdBaseType == TYP_FLOAT)); - - op1 = impSIMDPopStack(); - - if (op1->IsCnsVec()) - { - GenTreeVecCon* vecCon = op1->AsVecCon(); - vecCon->gtType = TYP_SIMD16; - - vecCon->gtSimdVal.f32[3] = 0.0f; - return vecCon; - } - - op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseType, 12); - - GenTree* idx = gtNewIconNode(3, TYP_INT); - GenTree* zero = gtNewZeroConNode(TYP_FLOAT); - retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseType, 16); - break; - } - - case TYP_SIMD16: - { - // We fold away the cast here, as it only exists to satisfy - // the type system. It is safe to do this here since the retNode type - // and the signature return type are both the same TYP_SIMD. - - retNode = impSIMDPopStack(); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); - - break; - } - - case TYP_SIMD32: - case TYP_SIMD64: - { - // Vector is larger, so we should treat this as a call to the appropriate narrowing intrinsic - intrinsic = simdSize == YMM_REGSIZE_BYTES ? NI_Vector256_GetLower : NI_Vector512_GetLower128; - - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); - break; - } - - default: - { - unreached(); - } - } - - break; - } - - case NI_Vector128_AsVector128Unsafe: - { - assert(sig->numArgs == 1); - assert(retType == TYP_SIMD16); - assert(simdBaseType == TYP_FLOAT); - assert((simdSize == 8) || (simdSize == 12)); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseType, simdSize); - break; - } - - case NI_Vector128_AsVector: - case NI_Vector256_AsVector: - case NI_Vector512_AsVector: - case NI_Vector256_AsVector256: - case NI_Vector512_AsVector512: - { - assert(sig->numArgs == 1); - uint32_t vectorTByteLength = getVectorTByteLength(); - - if (vectorTByteLength == 0) - { - // VectorT ISA was not present. Fall back to managed. - break; - } - - if (vectorTByteLength == simdSize) - { - // We fold away the cast here, as it only exists to satisfy - // the type system. It is safe to do this here since the retNode type - // and the signature return type are both the same TYP_SIMD. - - retNode = impSIMDPopStack(); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); - - break; - } - - // Vector is a different size than the source/target SIMD type, so we should - // treat this as a call to the appropriate narrowing or widening intrinsic. - - NamedIntrinsic convertIntrinsic = NI_Illegal; - - switch (vectorTByteLength) - { - case XMM_REGSIZE_BYTES: - { - switch (intrinsic) - { - case NI_Vector256_AsVector: - convertIntrinsic = NI_Vector256_GetLower; - break; - case NI_Vector512_AsVector: - convertIntrinsic = NI_Vector512_GetLower128; - break; - case NI_Vector256_AsVector256: - convertIntrinsic = NI_Vector128_ToVector256; - break; - case NI_Vector512_AsVector512: - convertIntrinsic = NI_Vector128_ToVector512; - break; - default: - unreached(); - } - break; - } - - case YMM_REGSIZE_BYTES: - { - switch (intrinsic) - { - case NI_Vector128_AsVector: - convertIntrinsic = NI_Vector128_ToVector256; - break; - case NI_Vector512_AsVector: - convertIntrinsic = NI_Vector512_GetLower; - break; - case NI_Vector512_AsVector512: - convertIntrinsic = NI_Vector256_ToVector512; - break; - default: - unreached(); - } - break; - } - - case ZMM_REGSIZE_BYTES: - { - switch (intrinsic) - { - case NI_Vector128_AsVector: - convertIntrinsic = NI_Vector128_ToVector512; - break; - case NI_Vector256_AsVector: - convertIntrinsic = NI_Vector256_ToVector512; - break; - case NI_Vector256_AsVector256: - convertIntrinsic = NI_Vector512_GetLower; - break; - default: - unreached(); - } - break; - } - - default: - { - unreached(); - } - } - - unsigned convertSize = simdSize; - bool sizeFound = HWIntrinsicInfo::tryLookupSimdSize(convertIntrinsic, &convertSize); - assert(sizeFound); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, convertIntrinsic, simdBaseType, convertSize); - - break; - } - - case NI_Vector128_op_BitwiseAnd: - case NI_Vector256_op_BitwiseAnd: - case NI_Vector512_op_BitwiseAnd: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_op_BitwiseOr: - case NI_Vector256_op_BitwiseOr: - case NI_Vector512_op_BitwiseOr: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_Ceiling: - case NI_Vector256_Ceiling: - case NI_Vector512_Ceiling: - { - assert(sig->numArgs == 1); - - if (!varTypeIsFloating(simdBaseType)) - { - retNode = impSIMDPopStack(); - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdCeilNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_ConditionalSelect: - case NI_Vector256_ConditionalSelect: - case NI_Vector512_ConditionalSelect: - { - assert(sig->numArgs == 3); - - op3 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseType, simdSize); - break; - } - - case NI_Vector128_ConvertToDouble: - case NI_Vector256_ConvertToDouble: - case NI_Vector512_ConvertToDouble: - { - assert(sig->numArgs == 1); - assert(varTypeIsLong(simdBaseType)); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - if (simdSize == 64) - { - intrinsic = NI_AVX512_ConvertToVector512Double; - } - else if (simdSize == 32) - { - intrinsic = NI_AVX512_ConvertToVector256Double; - } - else - { - assert(simdSize == 16); - intrinsic = NI_AVX512_ConvertToVector128Double; - } - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_ConvertToInt32: - case NI_Vector256_ConvertToInt32: - case NI_Vector512_ConvertToInt32: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNode(retType, op1, TYP_INT, simdBaseType, simdSize); - break; - } - - case NI_Vector128_ConvertToInt32Native: - case NI_Vector256_ConvertToInt32Native: - case NI_Vector512_ConvertToInt32Native: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_INT, simdBaseType, simdSize); - break; - } - - case NI_Vector128_ConvertToInt64: - case NI_Vector256_ConvertToInt64: - case NI_Vector512_ConvertToInt64: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNode(retType, op1, TYP_LONG, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_ConvertToInt64Native: - case NI_Vector256_ConvertToInt64Native: - case NI_Vector512_ConvertToInt64Native: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_LONG, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_ConvertToSingle: - case NI_Vector256_ConvertToSingle: - case NI_Vector512_ConvertToSingle: - { - assert(sig->numArgs == 1); - assert(varTypeIsInt(simdBaseType)); - intrinsic = NI_Illegal; - if (simdBaseType == TYP_INT) - { - switch (simdSize) - { - case 16: - intrinsic = NI_X86Base_ConvertToVector128Single; - break; - case 32: - intrinsic = NI_AVX_ConvertToVector256Single; - break; - case 64: - intrinsic = NI_AVX512_ConvertToVector512Single; - break; - default: - unreached(); - } - } - else if (simdBaseType == TYP_UINT && compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - switch (simdSize) - { - case 16: - intrinsic = NI_AVX512_ConvertToVector128Single; - break; - case 32: - intrinsic = NI_AVX512_ConvertToVector256Single; - break; - case 64: - intrinsic = NI_AVX512_ConvertToVector512Single; - break; - default: - unreached(); - } - } - if (intrinsic != NI_Illegal) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_ConvertToUInt32: - case NI_Vector256_ConvertToUInt32: - case NI_Vector512_ConvertToUInt32: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNode(retType, op1, TYP_UINT, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_ConvertToUInt32Native: - case NI_Vector256_ConvertToUInt32Native: - case NI_Vector512_ConvertToUInt32Native: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_UINT, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_ConvertToUInt64: - case NI_Vector256_ConvertToUInt64: - case NI_Vector512_ConvertToUInt64: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNode(retType, op1, TYP_ULONG, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_ConvertToUInt64Native: - case NI_Vector256_ConvertToUInt64Native: - case NI_Vector512_ConvertToUInt64Native: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdCvtNativeNode(retType, op1, TYP_ULONG, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_Create: - case NI_Vector256_Create: - case NI_Vector512_Create: - { - retNode = impSimdCreate(intrinsic, sig, simdBaseType, retType, simdSize); - break; - } - - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - { - assert(sig->numArgs == 1); - - op1 = impPopStack().val; - retNode = gtNewSimdCreateScalarNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: - { - assert(sig->numArgs == 1); - - op1 = impPopStack().val; - retNode = gtNewSimdCreateScalarUnsafeNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_CreateSequence: - case NI_Vector256_CreateSequence: - case NI_Vector512_CreateSequence: - { - assert(sig->numArgs == 2); - - if (!impStackTop(1).val->OperIsConst() || !impStackTop(0).val->OperIsConst()) - { - // One of the operands isn't constant, so we need to do a computation in the form of: - // (Indices * op2) + op1 - - if (simdSize == 32) - { - if (varTypeIsIntegral(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 - break; - } - } - } - - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op1 side effects for vector CreateSequence")); - - op2 = impPopStack().val; - op1 = impPopStack().val; - - retNode = gtNewSimdCreateSequenceNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_CreateGeometricSequence: - case NI_Vector256_CreateGeometricSequence: - case NI_Vector512_CreateGeometricSequence: - { - assert(sig->numArgs == 2); - - bool multiplierIsConst = impStackTop(0).val->OperIsConst(); - bool initialIsConst = impStackTop(1).val->OperIsConst(); - bool canGenerate = - multiplierIsConst && (initialIsConst || (simdSize != 32) || !varTypeIsIntegral(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)); - - if (!canGenerate) - { - if (opts.OptimizationEnabled()) - { - op2 = impPopStack().val; - op1 = impPopStack().val; - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - } - break; - } - - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op1 side effects for vector CreateGeometricSequence")); - - op2 = impPopStack().val; - op1 = impPopStack().val; - - retNode = gtNewSimdCreateGeometricSequenceNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_CreateAlternatingSequence: - case NI_Vector256_CreateAlternatingSequence: - case NI_Vector512_CreateAlternatingSequence: - { - assert(sig->numArgs == 2); - - if ((!impStackTop(1).val->OperIsConst() || !impStackTop(0).val->OperIsConst()) && (simdSize == 32) && - varTypeIsIntegral(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 - break; - } - - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op1 side effects for vector CreateAlternatingSequence")); - - op2 = impPopStack().val; - op1 = impPopStack().val; - - retNode = gtNewSimdCreateAlternatingSequenceNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_op_Division: - case NI_Vector256_op_Division: - case NI_Vector512_op_Division: - { - assert(sig->numArgs == 2); - - if (!varTypeIsFloating(simdBaseType)) - { -#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) - // Check to see if it is possible to emulate the integer division - if (varTypeIsLong(simdBaseType)) - { - break; - } - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op1 side effects for vector integer division")); -#else - break; -#endif // defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) - } - - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass); - - retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseType, simdSize); - - break; - } - - case NI_Vector128_Dot: - case NI_Vector256_Dot: - case NI_Vector512_Dot: - { - assert(sig->numArgs == 2); - var_types simdType = getSIMDTypeForSize(simdSize); - - if ((simdSize == 32) && !varTypeIsFloating(simdBaseType) && - !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 - break; - } - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if ((simdSize == 64) || varTypeIsByte(simdBaseType) || varTypeIsLong(simdBaseType)) - { - // The lowering for Dot doesn't handle these cases, so import as Sum(left * right) - retNode = gtNewSimdBinOpNode(GT_MUL, simdType, op1, op2, simdBaseType, simdSize); - retNode = gtNewSimdSumNode(retType, retNode, simdBaseType, simdSize); - break; - } - - retNode = gtNewSimdDotProdNode(simdType, op1, op2, simdBaseType, simdSize); - retNode = gtNewSimdToScalarNode(retType, retNode, simdBaseType, simdSize); - break; - } - - case NI_Vector128_Equals: - case NI_Vector256_Equals: - case NI_Vector512_Equals: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_op_Equality: - case NI_Vector256_op_Equality: - case NI_Vector512_op_Equality: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_EqualsAny: - case NI_Vector256_EqualsAny: - case NI_Vector512_EqualsAny: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_ExtractMostSignificantBits: - case NI_Vector256_ExtractMostSignificantBits: - case NI_Vector512_ExtractMostSignificantBits: - case NI_AVX512_MoveMask: - { - assert(sig->numArgs == 1); - - if ((simdSize == 64) || canUseEvexEncoding()) - { - intrinsic = NI_AVX512_MoveMask; - } - - if (intrinsic == NI_AVX512_MoveMask) - { - op1 = impSIMDPopStack(); - - op1 = gtFoldExpr(gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseType, simdSize)); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); - break; - } - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - var_types simdType = getSIMDTypeForSize(simdSize); - - NamedIntrinsic moveMaskIntrinsic = NI_Illegal; - NamedIntrinsic shuffleIntrinsic = NI_Illegal; - - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - { - op1 = impSIMDPopStack(); - moveMaskIntrinsic = (simdSize == 32) ? NI_AVX2_MoveMask : NI_X86Base_MoveMask; - break; - } - - case TYP_SHORT: - case TYP_USHORT: - { - op1 = impSIMDPopStack(); - moveMaskIntrinsic = intrinsic; - break; - } - - case TYP_INT: - case TYP_UINT: - case TYP_FLOAT: - { - simdBaseType = TYP_FLOAT; - op1 = impSIMDPopStack(); - moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_X86Base_MoveMask; - break; - } - - case TYP_LONG: - case TYP_ULONG: - case TYP_DOUBLE: - { - simdBaseType = TYP_DOUBLE; - op1 = impSIMDPopStack(); - moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_X86Base_MoveMask; - break; - } - - default: - { - unreached(); - } - } - - assert(moveMaskIntrinsic != NI_Illegal); - assert(op1 != nullptr); - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, moveMaskIntrinsic, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_Floor: - case NI_Vector256_Floor: - case NI_Vector512_Floor: - { - assert(sig->numArgs == 1); - - if (!varTypeIsFloating(simdBaseType)) - { - retNode = impSIMDPopStack(); - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdFloorNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_FusedMultiplyAdd: - case NI_Vector256_FusedMultiplyAdd: - case NI_Vector512_FusedMultiplyAdd: - { - assert(sig->numArgs == 3); - assert(varTypeIsFloating(simdBaseType)); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op3 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_get_AllBitsSet: - case NI_Vector256_get_AllBitsSet: - case NI_Vector512_get_AllBitsSet: - { - assert(sig->numArgs == 0); - retNode = gtNewAllBitsSetConNode(retType); - break; - } - - case NI_Vector128_get_E: - case NI_Vector256_get_E: - case NI_Vector512_get_E: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, 2.718281828459045); - retNode = vecCns; - } - break; - } - - case NI_Vector128_get_Epsilon: - case NI_Vector256_get_Epsilon: - case NI_Vector512_get_Epsilon: - { - assert(sig->numArgs == 0); - - if (simdBaseType == TYP_FLOAT) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0x00000001)); - retNode = vecCns; - } - else if (simdBaseType == TYP_DOUBLE) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0x0000000000000001)); - retNode = vecCns; - } - break; - } - - case NI_Vector128_get_Indices: - case NI_Vector256_get_Indices: - case NI_Vector512_get_Indices: - { - assert(sig->numArgs == 0); - retNode = gtNewSimdGetIndicesNode(retType, simdBaseType, simdSize); - break; - } - - case NI_Vector128_get_SignSequence: - case NI_Vector256_get_SignSequence: - case NI_Vector512_get_SignSequence: - { - assert(sig->numArgs == 0); - - var_types scalarType = genActualType(simdBaseType); - GenTree* one = gtNewOneConNode(scalarType); - GenTree* negativeOne = varTypeIsFloating(simdBaseType) ? gtNewDconNode(-1.0, simdBaseType) - : gtNewAllBitsSetConNode(scalarType); - - retNode = gtNewSimdCreateAlternatingSequenceNode(retType, one, negativeOne, simdBaseType, simdSize); - break; - } - - case NI_Vector128_get_NaN: - case NI_Vector256_get_NaN: - case NI_Vector512_get_NaN: - { - assert(sig->numArgs == 0); - - if (simdBaseType == TYP_FLOAT) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0xFFC00000)); - retNode = vecCns; - } - else if (simdBaseType == TYP_DOUBLE) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0xFFF8000000000000)); - retNode = vecCns; - } - break; - } - - case NI_Vector128_get_NegativeInfinity: - case NI_Vector256_get_NegativeInfinity: - case NI_Vector512_get_NegativeInfinity: - { - assert(sig->numArgs == 0); - - if (simdBaseType == TYP_FLOAT) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0xFF800000)); - retNode = vecCns; - } - else if (simdBaseType == TYP_DOUBLE) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0xFFF0000000000000)); - retNode = vecCns; - } - break; - } - - case NI_Vector128_get_NegativeOne: - case NI_Vector256_get_NegativeOne: - case NI_Vector512_get_NegativeOne: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, -1.0); - retNode = vecCns; - } - else if (varTypeIsSigned(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, static_cast(-1)); - retNode = vecCns; - } - break; - } - - case NI_Vector128_get_NegativeZero: - case NI_Vector256_get_NegativeZero: - case NI_Vector512_get_NegativeZero: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, -0.0); - retNode = vecCns; - } - break; - } - - case NI_Vector128_get_One: - case NI_Vector256_get_One: - case NI_Vector512_get_One: - { - assert(sig->numArgs == 0); - retNode = gtNewOneConNode(retType, simdBaseType); - break; - } - - case NI_Vector128_get_Pi: - case NI_Vector256_get_Pi: - case NI_Vector512_get_Pi: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, 3.141592653589793); - retNode = vecCns; - } - break; - } - - case NI_Vector128_get_PositiveInfinity: - case NI_Vector256_get_PositiveInfinity: - case NI_Vector512_get_PositiveInfinity: - { - assert(sig->numArgs == 0); - - if (simdBaseType == TYP_FLOAT) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_INT, static_cast(0x7F800000)); - retNode = vecCns; - } - else if (simdBaseType == TYP_DOUBLE) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(TYP_LONG, static_cast(0x7FF0000000000000)); - retNode = vecCns; - } - break; - } - - case NI_Vector128_get_Tau: - case NI_Vector256_get_Tau: - case NI_Vector512_get_Tau: - { - assert(sig->numArgs == 0); - - if (varTypeIsFloating(simdBaseType)) - { - GenTreeVecCon* vecCns = gtNewVconNode(retType); - vecCns->EvaluateBroadcastInPlace(simdBaseType, 6.283185307179586); - retNode = vecCns; - } - break; - } - - case NI_Vector128_get_Zero: - case NI_Vector256_get_Zero: - case NI_Vector512_get_Zero: - { - assert(sig->numArgs == 0); - retNode = gtNewZeroConNode(retType); - break; - } - - case NI_Vector128_GetElement: - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: - { - assert(sig->numArgs == 2); - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_GreaterThan: - case NI_Vector256_GreaterThan: - case NI_Vector512_GreaterThan: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_GreaterThanAll: - case NI_Vector256_GreaterThanAll: - case NI_Vector512_GreaterThanAll: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_GT, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_GreaterThanAny: - case NI_Vector256_GreaterThanAny: - case NI_Vector512_GreaterThanAny: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_GT, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_GreaterThanOrEqual: - case NI_Vector256_GreaterThanOrEqual: - case NI_Vector512_GreaterThanOrEqual: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_GreaterThanOrEqualAll: - case NI_Vector256_GreaterThanOrEqualAll: - case NI_Vector512_GreaterThanOrEqualAll: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_GE, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_GreaterThanOrEqualAny: - case NI_Vector256_GreaterThanOrEqualAny: - case NI_Vector512_GreaterThanOrEqualAny: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_GE, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_IsEvenInteger: - case NI_Vector256_IsEvenInteger: - case NI_Vector512_IsEvenInteger: - { - assert(sig->numArgs == 1); - - if (varTypeIsFloating(simdBaseType)) - { - // The code for handling floating-point is decently complex but also expected - // to be rare, so we fallback to the managed implementation, which is accelerated - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsEvenIntegerNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_IsFinite: - case NI_Vector256_IsFinite: - case NI_Vector512_IsFinite: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsFiniteNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_IsInfinity: - case NI_Vector256_IsInfinity: - case NI_Vector512_IsInfinity: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsInfinityNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_IsInteger: - case NI_Vector256_IsInteger: - case NI_Vector512_IsInteger: - { - assert(sig->numArgs == 1); - - if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsIntegerNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_IsNaN: - case NI_Vector256_IsNaN: - case NI_Vector512_IsNaN: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNaNNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_IsNegative: - case NI_Vector256_IsNegative: - case NI_Vector512_IsNegative: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNegativeNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_IsNegativeInfinity: - case NI_Vector256_IsNegativeInfinity: - case NI_Vector512_IsNegativeInfinity: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNegativeInfinityNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_IsNormal: - case NI_Vector256_IsNormal: - case NI_Vector512_IsNormal: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNormalNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_IsOddInteger: - case NI_Vector256_IsOddInteger: - case NI_Vector512_IsOddInteger: - { - assert(sig->numArgs == 1); - - if (varTypeIsFloating(simdBaseType)) - { - // The code for handling floating-point is decently complex but also expected - // to be rare, so we fallback to the managed implementation, which is accelerated - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsOddIntegerNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_IsPositive: - case NI_Vector256_IsPositive: - case NI_Vector512_IsPositive: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsPositiveNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_IsPositiveInfinity: - case NI_Vector256_IsPositiveInfinity: - case NI_Vector512_IsPositiveInfinity: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsPositiveInfinityNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_IsSubnormal: - case NI_Vector256_IsSubnormal: - case NI_Vector512_IsSubnormal: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsSubnormalNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_IsZero: - case NI_Vector256_IsZero: - case NI_Vector512_IsZero: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsZeroNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_LessThan: - case NI_Vector256_LessThan: - case NI_Vector512_LessThan: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_LessThanAll: - case NI_Vector256_LessThanAll: - case NI_Vector512_LessThanAll: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_LT, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_LessThanAny: - case NI_Vector256_LessThanAny: - case NI_Vector512_LessThanAny: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_LT, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_LessThanOrEqual: - case NI_Vector256_LessThanOrEqual: - case NI_Vector512_LessThanOrEqual: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_LessThanOrEqualAll: - case NI_Vector256_LessThanOrEqualAll: - case NI_Vector512_LessThanOrEqualAll: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAllNode(GT_LE, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_LessThanOrEqualAny: - case NI_Vector256_LessThanOrEqualAny: - case NI_Vector512_LessThanOrEqualAny: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_LE, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_X86Base_LoadVector128: - case NI_AVX_LoadVector256: - case NI_AVX512_LoadVector512: - case NI_Vector128_LoadUnsafe: - case NI_Vector256_LoadUnsafe: - case NI_Vector512_LoadUnsafe: - { - if (sig->numArgs == 2) - { - op2 = impPopStack().val; - } - else - { - assert(sig->numArgs == 1); - } - - op1 = impPopStack().val; - - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - if (sig->numArgs == 2) - { - op3 = gtNewIconNode(genTypeSize(simdBaseType), op2->TypeGet()); - op2 = gtNewOperNode(GT_MUL, op2->TypeGet(), op2, op3); - op1 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, op2); - } - - retNode = gtNewSimdLoadNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_LoadAligned: - case NI_Vector256_LoadAligned: - case NI_Vector512_LoadAligned: - { - assert(sig->numArgs == 1); - - op1 = impPopStack().val; - - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - retNode = gtNewSimdLoadAlignedNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_LoadAlignedNonTemporal: - case NI_Vector256_LoadAlignedNonTemporal: - case NI_Vector512_LoadAlignedNonTemporal: - { - assert(sig->numArgs == 1); - - op1 = impPopStack().val; - - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - retNode = gtNewSimdLoadNonTemporalNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_Max: - case NI_Vector256_Max: - case NI_Vector512_Max: - { - isMinMaxIntrinsic = true; - isMax = true; - break; - } - - case NI_Vector128_MaxMagnitude: - case NI_Vector256_MaxMagnitude: - case NI_Vector512_MaxMagnitude: - { - isMinMaxIntrinsic = true; - isMax = true; - isMagnitude = true; - break; - } - - case NI_Vector128_MaxMagnitudeNumber: - case NI_Vector256_MaxMagnitudeNumber: - case NI_Vector512_MaxMagnitudeNumber: - { - isMinMaxIntrinsic = true; - isMax = true; - isMagnitude = true; - isNumber = true; - break; - } - - case NI_Vector128_MaxNative: - case NI_Vector256_MaxNative: - case NI_Vector512_MaxNative: - { - isMinMaxIntrinsic = true; - isMax = true; - isNative = true; - break; - } - - case NI_Vector128_MaxNumber: - case NI_Vector256_MaxNumber: - case NI_Vector512_MaxNumber: - { - isMinMaxIntrinsic = true; - isMax = true; - isNumber = true; - break; - } - - case NI_Vector128_Min: - case NI_Vector256_Min: - case NI_Vector512_Min: - { - isMinMaxIntrinsic = true; - break; - } - - case NI_Vector128_MinMagnitude: - case NI_Vector256_MinMagnitude: - case NI_Vector512_MinMagnitude: - { - isMinMaxIntrinsic = true; - isMagnitude = true; - break; - } - - case NI_Vector128_MinMagnitudeNumber: - case NI_Vector256_MinMagnitudeNumber: - case NI_Vector512_MinMagnitudeNumber: - { - isMinMaxIntrinsic = true; - isMagnitude = true; - isNumber = true; - break; - } - - case NI_Vector128_MinNative: - case NI_Vector256_MinNative: - case NI_Vector512_MinNative: - { - isMinMaxIntrinsic = true; - isNative = true; - break; - } - - case NI_Vector128_MinNumber: - case NI_Vector256_MinNumber: - case NI_Vector512_MinNumber: - { - isMinMaxIntrinsic = true; - isNumber = true; - break; - } - - case NI_Vector128_op_Multiply: - case NI_Vector256_op_Multiply: - case NI_Vector512_op_Multiply: - { - assert(sig->numArgs == 2); - - if ((simdSize == 32) && !varTypeIsFloating(simdBaseType) && - !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 - break; - } - - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass); - - retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_MultiplyAddEstimate: - case NI_Vector256_MultiplyAddEstimate: - case NI_Vector512_MultiplyAddEstimate: - { - assert(sig->numArgs == 3); - - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - - if ((simdSize == 32) && !varTypeIsFloating(simdBaseType) && - !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 - break; - } - - op3 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (varTypeIsFloating(simdBaseType) && compExactlyDependsOn(InstructionSet_AVX2)) - { - retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseType, simdSize); - } - else - { - GenTree* mulNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseType, simdSize); - retNode = gtNewSimdBinOpNode(GT_ADD, retType, mulNode, op3, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_Narrow: - case NI_Vector256_Narrow: - case NI_Vector512_Narrow: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdNarrowNode(retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_NarrowWithSaturation: - case NI_Vector256_NarrowWithSaturation: - case NI_Vector512_NarrowWithSaturation: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (simdBaseType == TYP_DOUBLE) - { - // gtNewSimdNarrowNode uses the base type of the return for the simdBaseType - retNode = gtNewSimdNarrowNode(retType, op1, op2, TYP_FLOAT, simdSize); - } - else if ((simdSize == 16) && ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_INT))) - { - // PackSignedSaturate uses the base type of the return for the simdBaseType - simdBaseType = (simdBaseType == TYP_SHORT) ? TYP_BYTE : TYP_SHORT; - - intrinsic = NI_X86Base_PackSignedSaturate; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - if ((simdSize == 32) || (simdSize == 64)) - { - if (simdSize == 32) - { - intrinsic = NI_Vector256_ToVector512Unsafe; - - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD64, op1, intrinsic, simdBaseType, simdSize); - op1 = gtNewSimdWithUpperNode(TYP_SIMD64, op1, op2, simdBaseType, simdSize * 2); - } - - switch (simdBaseType) - { - case TYP_SHORT: - { - intrinsic = NI_AVX512_ConvertToVector256SByteWithSaturation; - break; - } - - case TYP_USHORT: - { - intrinsic = NI_AVX512_ConvertToVector256ByteWithSaturation; - break; - } - - case TYP_INT: - { - intrinsic = NI_AVX512_ConvertToVector256Int16WithSaturation; - break; - } - - case TYP_UINT: - { - intrinsic = NI_AVX512_ConvertToVector256UInt16WithSaturation; - break; - } - - case TYP_LONG: - { - intrinsic = NI_AVX512_ConvertToVector256Int32WithSaturation; - break; - } - - case TYP_ULONG: - { - intrinsic = NI_AVX512_ConvertToVector256UInt32WithSaturation; - break; - } - - default: - { - unreached(); - } - } - } - else - { - assert(simdSize == 16); - intrinsic = NI_Vector128_ToVector256Unsafe; - - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, intrinsic, simdBaseType, simdSize); - op1 = gtNewSimdWithUpperNode(TYP_SIMD32, op1, op2, simdBaseType, simdSize * 2); - - switch (simdBaseType) - { - case TYP_USHORT: - { - intrinsic = NI_AVX512_ConvertToVector128ByteWithSaturation; - break; - } - - case TYP_UINT: - { - intrinsic = NI_AVX512_ConvertToVector128UInt16WithSaturation; - break; - } - - case TYP_LONG: - { - intrinsic = NI_AVX512_ConvertToVector128Int32WithSaturation; - break; - } - - case TYP_ULONG: - { - intrinsic = NI_AVX512_ConvertToVector128UInt32WithSaturation; - break; - } - - default: - { - unreached(); - } - } - } - - if (simdSize == 64) - { - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, intrinsic, simdBaseType, simdSize); - op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op2, intrinsic, simdBaseType, simdSize); - - retNode = gtNewSimdWithUpperNode(retType, op1, op2, simdBaseType, simdSize); - } - else - { - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize * 2); - } - } - else - { - // gtNewSimdNarrowNode uses the base type of the return for the simdBaseType - var_types narrowSimdBaseType; - - GenTreeVecCon* minCns = varTypeIsSigned(simdBaseType) ? gtNewVconNode(retType) : nullptr; - GenTreeVecCon* maxCns = gtNewVconNode(retType); - - switch (simdBaseType) - { - case TYP_SHORT: - { - minCns->EvaluateBroadcastInPlace(INT8_MIN); - maxCns->EvaluateBroadcastInPlace(INT8_MAX); - - narrowSimdBaseType = TYP_BYTE; - break; - } - - case TYP_USHORT: - { - maxCns->EvaluateBroadcastInPlace(UINT8_MAX); - narrowSimdBaseType = TYP_UBYTE; - break; - } - - case TYP_INT: - { - minCns->EvaluateBroadcastInPlace(INT16_MIN); - maxCns->EvaluateBroadcastInPlace(INT16_MAX); - - narrowSimdBaseType = TYP_SHORT; - break; - } - - case TYP_UINT: - { - maxCns->EvaluateBroadcastInPlace(UINT16_MAX); - narrowSimdBaseType = TYP_USHORT; - break; - } - - case TYP_LONG: - { - minCns->EvaluateBroadcastInPlace(INT32_MIN); - maxCns->EvaluateBroadcastInPlace(INT32_MAX); - - narrowSimdBaseType = TYP_INT; - break; - } - - case TYP_ULONG: - { - maxCns->EvaluateBroadcastInPlace(UINT32_MAX); - narrowSimdBaseType = TYP_UINT; - break; - } - - default: - { - unreached(); - } - } - - // This does a clamp which is defined as: Min(Max(value, min), max) - // which means that we do a max computation if a minimum constant is specified - // There will be none specified for unsigned to unsigned narrowing since - // they share a lower bound (0) and will already be correct. - - if (minCns != nullptr) - { - op1 = gtNewSimdMinMaxNode(retType, op1, minCns, simdBaseType, simdSize, /* isMax */ true, - /* isMagnitude */ false, /* isNumber */ false); - op2 = gtNewSimdMinMaxNode(retType, op2, gtCloneExpr(minCns), simdBaseType, simdSize, - /* isMax */ true, /* isMagnitude */ false, /* isNumber */ false); - } - - op1 = gtNewSimdMinMaxNode(retType, op1, maxCns, simdBaseType, simdSize, /* isMax */ false, - /* isMagnitude */ false, /* isNumber */ false); - op2 = gtNewSimdMinMaxNode(retType, op2, gtCloneExpr(maxCns), simdBaseType, simdSize, - /* isMax */ false, /* isMagnitude */ false, /* isNumber */ false); - - retNode = gtNewSimdNarrowNode(retType, op1, op2, narrowSimdBaseType, simdSize); - } - } - break; - } - - case NI_Vector128_op_UnaryNegation: - case NI_Vector256_op_UnaryNegation: - case NI_Vector512_op_UnaryNegation: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_op_OnesComplement: - case NI_Vector256_op_OnesComplement: - case NI_Vector512_op_OnesComplement: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_op_Inequality: - case NI_Vector256_op_Inequality: - case NI_Vector512_op_Inequality: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdCmpOpAnyNode(GT_NE, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_op_UnaryPlus: - case NI_Vector256_op_UnaryPlus: - case NI_Vector512_op_UnaryPlus: - { - assert(sig->numArgs == 1); - retNode = impSIMDPopStack(); - break; - } - - case NI_Vector128_op_Subtraction: - case NI_Vector256_op_Subtraction: - case NI_Vector512_op_Subtraction: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_op_LeftShift: - case NI_Vector256_op_LeftShift: - case NI_Vector512_op_LeftShift: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_op_RightShift: - case NI_Vector256_op_RightShift: - case NI_Vector512_op_RightShift: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH; - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_op_UnsignedRightShift: - case NI_Vector256_op_UnsignedRightShift: - case NI_Vector512_op_UnsignedRightShift: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_Round: - case NI_Vector256_Round: - case NI_Vector512_Round: - { - if (sig->numArgs != 1) - { - break; - } - - if (!varTypeIsFloating(simdBaseType)) - { - retNode = impSIMDPopStack(); - break; - } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdRoundNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_ShiftLeft: - case NI_Vector256_ShiftLeft: - case NI_Vector512_ShiftLeft: - { - assert(sig->numArgs == 2); - - if (!varTypeIsSIMD(impStackTop(0).val)) - { - // We just want the inlining profitability boost for the helper intrinsics/ - // that have operator alternatives like `simd << int` - break; - } - - if ((simdSize != 16) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (simdSize == 64) - { - intrinsic = NI_AVX512_ShiftLeftLogicalVariable; - } - else - { - assert((simdSize == 16) || (simdSize == 32)); - intrinsic = NI_AVX2_ShiftLeftLogicalVariable; - } - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - } - break; - } - - case NI_Vector128_Shuffle: - case NI_Vector256_Shuffle: - case NI_Vector512_Shuffle: - case NI_Vector128_ShuffleNative: - case NI_Vector256_ShuffleNative: - case NI_Vector512_ShuffleNative: - case NI_Vector128_ShuffleNativeFallback: - case NI_Vector256_ShuffleNativeFallback: - case NI_Vector512_ShuffleNativeFallback: - { - assert((sig->numArgs == 2) || (sig->numArgs == 3)); - - // The Native variants are non-deterministic on xarch - bool isShuffleNative = (intrinsic != NI_Vector128_Shuffle) && (intrinsic != NI_Vector256_Shuffle) && - (intrinsic != NI_Vector512_Shuffle); - if (isShuffleNative && BlockNonDeterministicIntrinsics(mustExpand)) - { - break; - } - - GenTree* indices = impStackTop(0).val; - - // Check if the required intrinsics are available to emit now (validForShuffle). If we have variable - // indices that might become possible to emit later (due to them becoming constant), this will be - // indicated in canBecomeValidForShuffle; otherwise, it's just the same as validForShuffle. - bool canBecomeValidForShuffle = false; - bool validForShuffle = - IsValidForShuffle(indices, simdSize, simdBaseType, &canBecomeValidForShuffle, isShuffleNative); - - // If it isn't valid for shuffle (and can't become valid later), then give up now. - if (!canBecomeValidForShuffle) - { - return nullptr; - } - - // If the indices might become constant later, then we don't emit for now, delay until later. - if ((!validForShuffle) || (!indices->IsCnsVec())) - { - assert(sig->numArgs == 2); - - if (opts.OptimizationEnabled()) - { - // Only enable late stage rewriting if optimizations are enabled - // as we won't otherwise encounter a constant at the later point - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - break; - } - - // If we're not doing late stage rewriting, just return null now as it won't become valid. - if (!validForShuffle) - { - return nullptr; - } - } - - if (sig->numArgs == 2) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseType, simdSize, isShuffleNative); - } - break; - } - - case NI_Vector128_Sqrt: - case NI_Vector256_Sqrt: - case NI_Vector512_Sqrt: - { - assert(sig->numArgs == 1); - - if (varTypeIsFloating(simdBaseType)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdSqrtNode(retType, op1, simdBaseType, simdSize); - } - break; - } - - case NI_X86Base_Store: - case NI_AVX_Store: - case NI_AVX512_Store: - { - assert(retType == TYP_VOID); - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impPopStack().val; - - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - retNode = gtNewSimdStoreNode(op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector128_StoreUnsafe: - case NI_Vector256_StoreUnsafe: - case NI_Vector512_StoreUnsafe: - { - assert(retType == TYP_VOID); - - if (sig->numArgs == 3) - { - impSpillSideEffect(true, - stackState.esStackDepth - 3 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - - op3 = impPopStack().val; - } - else - { - assert(sig->numArgs == 2); - - impSpillSideEffect(true, - stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - } - - op2 = impPopStack().val; - - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } - - if (sig->numArgs == 3) - { - op4 = gtNewIconNode(genTypeSize(simdBaseType), op3->TypeGet()); - op3 = gtNewOperNode(GT_MUL, op3->TypeGet(), op3, op4); - op2 = gtNewOperNode(GT_ADD, op2->TypeGet(), op2, op3); - } - - op1 = impSIMDPopStack(); - - retNode = gtNewSimdStoreNode(op2, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_StoreAligned: - case NI_Vector256_StoreAligned: - case NI_Vector512_StoreAligned: - { - assert(sig->numArgs == 2); - assert(retType == TYP_VOID); - - impSpillSideEffect(true, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - - op2 = impPopStack().val; - - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } - - op1 = impSIMDPopStack(); - - retNode = gtNewSimdStoreAlignedNode(op2, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_StoreAlignedNonTemporal: - case NI_Vector256_StoreAlignedNonTemporal: - case NI_Vector512_StoreAlignedNonTemporal: - { - assert(sig->numArgs == 2); - assert(retType == TYP_VOID); - - impSpillSideEffect(true, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - - op2 = impPopStack().val; + CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } + if (isa == InstructionSet_Vector) + { + return impXplatIntrinsic(intrinsic, clsHnd, method, sig R2RARG(entryPoint), simdBaseType, retType, simdSize, + mustExpand); + } - op1 = impSIMDPopStack(); + GenTree* retNode = nullptr; + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + GenTree* op3 = nullptr; + GenTree* op4 = nullptr; - retNode = gtNewSimdStoreNonTemporalNode(op2, op1, simdBaseType, simdSize); - break; - } + if (simdSize != 0) + { + assert(varTypeIsArithmetic(simdBaseType)); + } - case NI_Vector128_SubtractSaturate: - case NI_Vector256_SubtractSaturate: - case NI_Vector512_SubtractSaturate: + switch (intrinsic) + { + case NI_AVX2_AndNot: { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) + if (varTypeIsSIMD(retType)) { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (varTypeIsFloating(simdBaseType)) - { - retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); - } - else if (varTypeIsSmall(simdBaseType)) - { - if (simdSize == 64) - { - intrinsic = NI_AVX512_SubtractSaturate; - } - else if (simdSize == 32) - { - intrinsic = NI_AVX2_SubtractSaturate; - } - else - { - assert(simdSize == 16); - intrinsic = NI_X86Base_SubtractSaturate; - } - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); - } - else if (varTypeIsUnsigned(simdBaseType)) - { - // For unsigned we simply have to detect `(x - y) > x` - // and in that scenario return MinValue (Zero) - - GenTree* cns = gtNewZeroConNode(retType); - GenTree* op1Dup1 = fgMakeMultiUse(&op1); - - GenTree* tmp = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); - GenTree* tmpDup1 = fgMakeMultiUse(&tmp); - GenTree* msk = gtNewSimdCmpOpNode(GT_GT, retType, tmp, op1Dup1, simdBaseType, simdSize); - - retNode = gtNewSimdCndSelNode(retType, msk, cns, tmpDup1, simdBaseType, simdSize); - } - else - { - // For signed the logic is a bit more complex, but is - // explained on the managed side as part of Scalar.SubtractSaturate - - GenTreeVecCon* minCns = gtNewVconNode(retType); - GenTreeVecCon* maxCns = gtNewVconNode(retType); - - switch (simdBaseType) - { - case TYP_SHORT: - { - minCns->EvaluateBroadcastInPlace(INT16_MIN); - maxCns->EvaluateBroadcastInPlace(INT16_MAX); - break; - } - - case TYP_INT: - { - minCns->EvaluateBroadcastInPlace(INT32_MIN); - maxCns->EvaluateBroadcastInPlace(INT32_MAX); - break; - } - - case TYP_LONG: - { - minCns->EvaluateBroadcastInPlace(INT64_MIN); - maxCns->EvaluateBroadcastInPlace(INT64_MAX); - break; - } - - default: - { - unreached(); - } - } - - GenTree* op1Dup1 = fgMakeMultiUse(&op1); - GenTree* op2Dup1 = fgMakeMultiUse(&op2); - - GenTree* tmp = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseType, simdSize); - - GenTree* tmpDup1 = fgMakeMultiUse(&tmp); - GenTree* tmpDup2 = gtCloneExpr(tmpDup1); - - GenTree* msk = gtNewSimdIsNegativeNode(retType, tmpDup1, simdBaseType, simdSize); - GenTree* ovf = gtNewSimdCndSelNode(retType, msk, maxCns, minCns, simdBaseType, simdSize); - - // The mask we need is ((a ^ b) & (b ^ c)) < 0 - - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - // tmpDup1 = a: 0xF0 - // op1Dup1 = b: 0xCC - // op2Dup2 = c: 0xAA - // - // 0x18 = B ? norAC : andAC - // b ? ~(a | c) : (a & c) - msk = gtNewSimdTernaryLogicNode(retType, tmp, op1Dup1, op2Dup1, gtNewIconNode(0x24), - simdBaseType, simdSize); - } - else - { - GenTree* op1Dup2 = gtCloneExpr(op1Dup1); - - GenTree* msk2 = gtNewSimdBinOpNode(GT_XOR, retType, tmp, op1Dup1, simdBaseType, simdSize); - GenTree* msk3 = gtNewSimdBinOpNode(GT_XOR, retType, op1Dup2, op2Dup1, simdBaseType, simdSize); - - msk = gtNewSimdBinOpNode(GT_AND, retType, msk2, msk3, simdBaseType, simdSize); - } - - msk = gtNewSimdIsNegativeNode(retType, msk, simdBaseType, simdSize); - retNode = gtNewSimdCndSelNode(retType, msk, ovf, tmpDup2, simdBaseType, simdSize); - } + intrinsic = NI_AVX2_AndNotVector; + simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig); + compFloatingPointUsed = true; } - break; - } - - case NI_Vector128_Sum: - case NI_Vector256_Sum: - case NI_Vector512_Sum: - { - assert(sig->numArgs == 1); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdSumNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: - { - assert(sig->numArgs == 1); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdToScalarNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_Truncate: - case NI_Vector256_Truncate: - case NI_Vector512_Truncate: - { - assert(sig->numArgs == 1); - - if (!varTypeIsFloating(simdBaseType)) + else { - retNode = impSIMDPopStack(); - break; + intrinsic = NI_AVX2_AndNotScalar; } - - op1 = impSIMDPopStack(); - retNode = gtNewSimdTruncNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector256_GetUpper: - case NI_Vector512_GetUpper: - { - assert(sig->numArgs == 1); - - op1 = impSIMDPopStack(); - retNode = gtNewSimdGetUpperNode(retType, op1, simdBaseType, simdSize); - break; + FALLTHROUGH; } - case NI_Vector128_WidenLower: - case NI_Vector256_WidenLower: - case NI_Vector512_WidenLower: + case NI_X86Base_AndNot: + case NI_AVX_AndNot: + case NI_AVX2_X64_AndNot: + case NI_AVX512_AndNot: { - assert(sig->numArgs == 1); + assert(sig->numArgs == 2); - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) + if (simdSize != 0) { - assert((simdSize != 64) || compIsaSupportedDebugOnly(InstructionSet_AVX512)); + // We don't want to support creating AND_NOT nodes prior to LIR + // as it can break important optimizations. We'll produces this + // in lowering instead so decompose into the individual operations + // on import, taking into account that despite the name, these APIs + // do (~op1 & op2), so we need to account for that + op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdWidenLowerNode(retType, op1, simdBaseType, simdSize); + op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseType, simdSize)); + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseType, simdSize); } - break; - } - - case NI_Vector128_WidenUpper: - case NI_Vector256_WidenUpper: - case NI_Vector512_WidenUpper: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) + else { - op1 = impSIMDPopStack(); - - retNode = gtNewSimdWidenUpperNode(retType, op1, simdBaseType, simdSize); - } - break; - } + // The same general reasoning for the decomposition exists here as + // given above for the SIMD AndNot APIs. - case NI_Vector128_WithElement: - case NI_Vector256_WithElement: - case NI_Vector512_WithElement: - { - assert(sig->numArgs == 3); + op2 = impPopStack().val; + op1 = impPopStack().val; - if (varTypeIsLong(simdBaseType)) - { - if (!compOpportunisticallyDependsOn(InstructionSet_X86Base_X64)) - { - return nullptr; - } + op1 = gtFoldExpr(gtNewOperNode(GT_NOT, retType, op1)); + retNode = gtNewOperNode(GT_AND, retType, op1, op2); } - - GenTree* valueOp = impPopStack().val; - GenTree* indexOp = impPopStack().val; - GenTree* vectorOp = impSIMDPopStack(); - - retNode = gtNewSimdWithElementNode(retType, vectorOp, indexOp, valueOp, simdBaseType, simdSize); - break; - } - - case NI_Vector256_WithLower: - case NI_Vector512_WithLower: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - retNode = gtNewSimdWithLowerNode(retType, op1, op2, simdBaseType, simdSize); - break; - } - - case NI_Vector256_WithUpper: - case NI_Vector512_WithUpper: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - retNode = gtNewSimdWithUpperNode(retType, op1, op2, simdBaseType, simdSize); break; } - case NI_Vector128_ConcatLowerLower: - case NI_Vector256_ConcatLowerLower: - case NI_Vector512_ConcatLowerLower: - case NI_Vector128_ConcatLowerUpper: - case NI_Vector256_ConcatLowerUpper: - case NI_Vector512_ConcatLowerUpper: - case NI_Vector128_ConcatUpperLower: - case NI_Vector256_ConcatUpperLower: - case NI_Vector512_ConcatUpperLower: - case NI_Vector128_ConcatUpperUpper: - case NI_Vector256_ConcatUpperUpper: - case NI_Vector512_ConcatUpperUpper: + case NI_AVX512_MoveMask: { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); + assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - bool leftUpper = - (intrinsic == NI_Vector128_ConcatUpperLower) || (intrinsic == NI_Vector256_ConcatUpperLower) || - (intrinsic == NI_Vector512_ConcatUpperLower) || (intrinsic == NI_Vector128_ConcatUpperUpper) || - (intrinsic == NI_Vector256_ConcatUpperUpper) || (intrinsic == NI_Vector512_ConcatUpperUpper); - bool rightUpper = - (intrinsic == NI_Vector128_ConcatLowerUpper) || (intrinsic == NI_Vector256_ConcatLowerUpper) || - (intrinsic == NI_Vector512_ConcatLowerUpper) || (intrinsic == NI_Vector128_ConcatUpperUpper) || - (intrinsic == NI_Vector256_ConcatUpperUpper) || (intrinsic == NI_Vector512_ConcatUpperUpper); - - retNode = gtNewSimdConcatNode(retType, op1, op2, simdBaseType, simdSize, leftUpper, rightUpper); + op1 = gtFoldExpr(gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseType, simdSize)); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); break; } - case NI_Vector128_ZipLower: - case NI_Vector256_ZipLower: - case NI_Vector512_ZipLower: - case NI_Vector128_ZipUpper: - case NI_Vector256_ZipUpper: - case NI_Vector512_ZipUpper: + case NI_X86Base_LoadVector128: + case NI_AVX_LoadVector256: + case NI_AVX512_LoadVector512: { - assert(sig->numArgs == 2); + assert(sig->numArgs == 1); + op1 = impPopStack().val; - if ((simdSize == 32) && varTypeIsIntegral(simdBaseType) && - !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) { - break; + // If what we have is a BYREF, that's what we really want, so throw away the cast. + op1 = op1->gtGetOp1(); } - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - bool upper = (intrinsic == NI_Vector128_ZipUpper) || (intrinsic == NI_Vector256_ZipUpper) || - (intrinsic == NI_Vector512_ZipUpper); - retNode = gtNewSimdZipNode(retType, op1, op2, simdBaseType, simdSize, upper); + retNode = gtNewSimdLoadNode(retType, op1, simdBaseType, simdSize); break; } - case NI_Vector128_UnzipEven: - case NI_Vector256_UnzipEven: - case NI_Vector512_UnzipEven: - case NI_Vector128_UnzipOdd: - case NI_Vector256_UnzipOdd: - case NI_Vector512_UnzipOdd: + case NI_X86Base_Store: + case NI_AVX_Store: + case NI_AVX512_Store: { + assert(retType == TYP_VOID); assert(sig->numArgs == 2); - if (simdSize == 16) - { - bool supportsX86BaseShuffle = - (simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT) || (simdBaseType == TYP_FLOAT); - - if (!supportsX86BaseShuffle && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - break; - } - } - else if (simdSize > 16) - { - if (!compOpportunisticallyDependsOn(varTypeIsFloating(simdBaseType) ? InstructionSet_AVX - : InstructionSet_AVX2)) - { - break; - } - } - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - bool odd = (intrinsic == NI_Vector128_UnzipOdd) || (intrinsic == NI_Vector256_UnzipOdd) || - (intrinsic == NI_Vector512_UnzipOdd); - retNode = gtNewSimdUnzipNode(retType, op1, op2, simdBaseType, simdSize, odd); - break; - } - - case NI_Vector128_Reverse: - case NI_Vector256_Reverse: - case NI_Vector512_Reverse: - { - assert(sig->numArgs == 1); - - if (simdSize == 32) - { - if (!compOpportunisticallyDependsOn(varTypeIsFloating(simdBaseType) ? InstructionSet_AVX - : InstructionSet_AVX2)) - { - break; - } - } + op1 = impPopStack().val; - if ((simdSize == 64) && varTypeIsByte(simdBaseType) && - !compOpportunisticallyDependsOn(InstructionSet_AVX512v2)) + if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) { - break; + // If what we have is a BYREF, that's what we really want, so throw away the cast. + op1 = op1->gtGetOp1(); } - op1 = impSIMDPopStack(); - retNode = gtNewSimdReverseNode(retType, op1, simdBaseType, simdSize); - break; - } - - case NI_Vector128_op_ExclusiveOr: - case NI_Vector256_op_ExclusiveOr: - case NI_Vector512_op_ExclusiveOr: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseType, simdSize); + retNode = gtNewSimdStoreNode(op1, op2, simdBaseType, simdSize); break; } @@ -5599,31 +2736,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(retType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); retNode = gtNewSimdCvtMaskToVectorNode(retType, gtFoldExpr(retNode), simdBaseType, simdSize); } - else if (isMinMaxIntrinsic) - { - assert(sig->numArgs == 2); - assert(retNode == nullptr); - - if (isNative && BlockNonDeterministicIntrinsics(mustExpand)) - { - return nullptr; - } - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - if (isNative) - { - assert(!isMagnitude && !isNumber); - retNode = gtNewSimdMinMaxNativeNode(retType, op1, op2, simdBaseType, simdSize, isMax); - } - else if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - retNode = gtNewSimdMinMaxNode(retType, op1, op2, simdBaseType, simdSize, isMax, isMagnitude, isNumber); - } - } - return retNode; } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index a0340343c58c44..a74db385af1695 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3376,23 +3376,23 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, #if defined(TARGET_XARCH) // We can't guarantee that all overloads for the xplat intrinsics can be // handled by the AltJit, so limit only the platform specific intrinsics - assert((LAST_NI_Vector512 + 1) == FIRST_NI_X86Base); + assert((LAST_NI_Vector + 1) == FIRST_NI_X86Base); - if (ni < LAST_NI_Vector512) + if (ni < LAST_NI_Vector) #elif defined(TARGET_ARM64) // We can't guarantee that all overloads for the xplat intrinsics can be // handled by the AltJit, so limit only the platform specific intrinsics - assert((LAST_NI_Vector128 + 1) == FIRST_NI_AdvSimd); + assert((LAST_NI_Vector + 1) == FIRST_NI_AdvSimd); - if (ni < LAST_NI_Vector128) + if (ni < LAST_NI_Vector) #elif defined(TARGET_WASM) NYI_WASM_SIMD("impHWIntrinsic"); - if (ni < LAST_NI_Vector128) + if (ni < LAST_NI_Vector) #else #error Unsupported platform #endif { - // Several of the NI_Vector64/128/256 APIs do not have + // Several of the NI_Vector APIs do not have // all overloads as intrinsic today so they will assert return nullptr; } diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index e33d810f40fd70..080d78d95f0f3b 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1162,12 +1162,8 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(instruction ins, GenTree* op) break; } - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: { // The hwintrinsic should be contained and its // op1 should be either contained or spilled. This diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index d8dc57c0f85205..426a284166868d 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1717,15 +1717,19 @@ class LocalAddressVisitor final : public GenTreeVisitor { // Handle the Vector3 field of case 2 assert(genTypeSize(varDsc) == 16); - hwiNode = m_compiler->gtNewSimdHWIntrinsicNode(elementType, lclNode, NI_Vector128_AsVector3, + hwiNode = m_compiler->gtNewSimdHWIntrinsicNode(elementType, lclNode, NI_Vector_AsVector3, TYP_FLOAT, 16); break; } - case TYP_SIMD8: -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) case TYP_SIMD16: case TYP_SIMD32: +#elif defined(TARGET_ARM64) + case TYP_SIMD8: +#else +#error Unsupported platform #endif { // Handle case 3 @@ -1744,6 +1748,8 @@ class LocalAddressVisitor final : public GenTreeVisitor break; } +#endif // !TARGET_XARCH && !TARGET_ARM64 + default: unreached(); } @@ -1780,9 +1786,8 @@ class LocalAddressVisitor final : public GenTreeVisitor // simdLclNode[3] as the new value. This gives us a new TYP_SIMD16 with all elements in the // right spots - elementNode = - m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, elementNode, - NI_Vector128_AsVector128Unsafe, TYP_FLOAT, 12); + elementNode = m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, elementNode, + NI_Vector_AsVector128Unsafe, TYP_FLOAT, 12); GenTree* indexNode1 = m_compiler->gtNewIconNode(3, TYP_INT); simdLclNode = @@ -1794,10 +1799,14 @@ class LocalAddressVisitor final : public GenTreeVisitor break; } - case TYP_SIMD8: -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) case TYP_SIMD16: case TYP_SIMD32: +#elif defined(TARGET_ARM64) + case TYP_SIMD8: +#else +#error Unsupported platform #endif { // Handle case 3 @@ -1813,9 +1822,9 @@ class LocalAddressVisitor final : public GenTreeVisitor hwiNode = m_compiler->gtNewSimdWithUpperNode(varDsc->TypeGet(), simdLclNode, elementNode, TYP_FLOAT, genTypeSize(varDsc)); } - break; } +#endif // !TARGET_XARCH && !TARGET_ARM64 default: unreached(); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0c04a216fb9c99..c938782230d2de 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2559,7 +2559,7 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) } else if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { - // We need AVX2 for NI_Vector256_op_Equality, fallback to Vector128 if only AVX is available + // We need AVX2 for TYP_SIMD32 based op_Equality, fallback to Vector128 if only AVX is available MaxUnrollSize = 64; } else diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 805c4d93aed4b9..cdf88c1c0a707b 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1387,18 +1387,23 @@ void Lowering::LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node) auto lowerOperand = [this](GenTree* op) { bool wasNegated = false; - if (op->OperIsHWIntrinsic() && - ((op->AsHWIntrinsic()->GetHWIntrinsicId() == NI_AdvSimd_Arm64_DuplicateToVector64) || - (op->AsHWIntrinsic()->GetHWIntrinsicId() == NI_Vector64_CreateScalarUnsafe))) + if (op->OperIsHWIntrinsic()) { - GenTreeHWIntrinsic* createVector64 = op->AsHWIntrinsic(); - GenTree* valueOp = createVector64->Op(1); + GenTreeHWIntrinsic* opIntrinsic = op->AsHWIntrinsic(); + NamedIntrinsic opIntrinsicId = opIntrinsic->GetHWIntrinsicId(); + unsigned opSimdSize = opIntrinsic->GetSimdSize(); - if (valueOp->OperIs(GT_NEG)) + if ((opIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector64) || + ((opIntrinsicId == NI_Vector_CreateScalarUnsafe) && (opSimdSize == 8))) { - createVector64->Op(1) = valueOp->gtGetOp1(); - BlockRange().Remove(valueOp); - wasNegated = true; + GenTree* valueOp = opIntrinsic->Op(1); + + if (valueOp->OperIs(GT_NEG)) + { + opIntrinsic->Op(1) = valueOp->gtGetOp1(); + BlockRange().Remove(valueOp); + wasNegated = true; + } } } @@ -1529,10 +1534,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_Vector64_Create: - case NI_Vector128_Create: - case NI_Vector64_CreateScalar: - case NI_Vector128_CreateScalar: + case NI_Vector_Create: + case NI_Vector_CreateScalar: { // We don't directly support the Vector64.Create or Vector128.Create methods in codegen // and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect @@ -1542,14 +1545,12 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return LowerHWIntrinsicCreate(node); } - case NI_Vector64_Dot: - case NI_Vector128_Dot: + case NI_Vector_Dot: { return LowerHWIntrinsicDot(node); } - case NI_Vector64_GetElement: - case NI_Vector128_GetElement: + case NI_Vector_GetElement: { GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); @@ -1673,14 +1674,12 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector64_op_Equality: - case NI_Vector128_op_Equality: + case NI_Vector_op_Equality: { return LowerHWIntrinsicCmpOp(node, GT_EQ); } - case NI_Vector64_op_Inequality: - case NI_Vector128_op_Inequality: + case NI_Vector_op_Inequality: { return LowerHWIntrinsicCmpOp(node, GT_NE); } @@ -1706,15 +1705,15 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return node->gtNext; } - case NI_Vector128_WithLower: - case NI_Vector128_WithUpper: + case NI_Vector_WithLower: + case NI_Vector_WithUpper: { // Converts to equivalent managed code: // AdvSimd.InsertScalar(vector.AsUInt64(), 0, value.AsUInt64()).As(); // -or- // AdvSimd.InsertScalar(vector.AsUInt64(), 1, value.AsUInt64()).As(); - int index = (intrinsicId == NI_Vector128_WithUpper) ? 1 : 0; + int index = (intrinsicId == NI_Vector_WithUpper) ? 1 : 0; GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); @@ -1970,8 +1969,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm unsigned simdSize = node->GetSimdSize(); var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - assert((intrinsicId == NI_Vector64_op_Equality) || (intrinsicId == NI_Vector64_op_Inequality) || - (intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality)); + assert((intrinsicId == NI_Vector_op_Equality) || (intrinsicId == NI_Vector_op_Inequality)); assert(varTypeIsSIMD(simdType)); assert(varTypeIsArithmetic(simdBaseType)); @@ -2326,7 +2324,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) unsigned simdSize = node->GetSimdSize(); var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - assert((intrinsicId == NI_Vector64_Dot) || (intrinsicId == NI_Vector128_Dot)); + assert(intrinsicId == NI_Vector_Dot); assert(varTypeIsSIMD(simdType)); assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); @@ -3861,8 +3859,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector64_CreateScalarUnsafe: - case NI_Vector128_CreateScalarUnsafe: + case NI_Vector_CreateScalarUnsafe: case NI_AdvSimd_DuplicateToVector64: case NI_AdvSimd_DuplicateToVector128: case NI_AdvSimd_Arm64_DuplicateToVector64: @@ -3873,8 +3870,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; - case NI_Vector64_GetElement: - case NI_Vector128_GetElement: + case NI_Vector_GetElement: { assert(!IsContainableMemoryOp(intrin.op1) || !IsSafeToContainMem(node, intrin.op1)); assert(intrin.op2->OperIsConst()); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index eeea2751b537b0..40213512b03649 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1290,9 +1290,7 @@ void Lowering::LowerFusedMultiplyOp(GenTreeHWIntrinsic* node) switch (hwArg->GetHWIntrinsicId()) { - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: + case NI_Vector_CreateScalarUnsafe: { GenTree*& argOp = hwArg->Op(1); @@ -1740,19 +1738,13 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_Vector128_ConditionalSelect: - case NI_Vector256_ConditionalSelect: - case NI_Vector512_ConditionalSelect: + case NI_Vector_ConditionalSelect: { return LowerHWIntrinsicCndSel(node); } - case NI_Vector128_Create: - case NI_Vector256_Create: - case NI_Vector512_Create: - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: + case NI_Vector_Create: + case NI_Vector_CreateScalar: { // We don't directly support the Vector128.Create or Vector256.Create methods in codegen // and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect @@ -1764,31 +1756,36 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return LowerHWIntrinsicCreate(node); } - case NI_Vector128_Dot: - case NI_Vector256_Dot: + case NI_Vector_Dot: { + assert((node->GetSimdSize() == 16) || (node->GetSimdSize() == 32)); return LowerHWIntrinsicDot(node); } - case NI_Vector128_GetElement: - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: + case NI_Vector_GetElement: { return LowerHWIntrinsicGetElement(node); } - case NI_Vector256_GetUpper: + case NI_Vector_GetUpper: { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX)); var_types simdBaseType = node->GetSimdBaseType(); - if (varTypeIsFloating(simdBaseType) || !m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2)) + if (node->GetSimdSize() == 32) { - intrinsicId = NI_AVX_ExtractVector128; + if (varTypeIsFloating(simdBaseType) || !m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + intrinsicId = NI_AVX_ExtractVector128; + } + else + { + intrinsicId = NI_AVX2_ExtractVector128; + } } else { - intrinsicId = NI_AVX2_ExtractVector128; + assert(node->GetSimdSize() == 64); + intrinsicId = NI_AVX512_ExtractVector256; } GenTree* op1 = node->Op(1); @@ -1801,44 +1798,32 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector512_GetUpper: - { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512)); - var_types simdBaseType = node->GetSimdBaseType(); - - intrinsicId = NI_AVX512_ExtractVector256; - - GenTree* op1 = node->Op(1); - - GenTree* op2 = m_compiler->gtNewIconNode(1); - BlockRange().InsertBefore(node, op2); - LowerNode(op2); - - node->ResetHWIntrinsicId(intrinsicId, m_compiler, op1, op2); - break; - } - - case NI_Vector128_WithElement: - case NI_Vector256_WithElement: - case NI_Vector512_WithElement: + case NI_Vector_WithElement: { return LowerHWIntrinsicWithElement(node); } - case NI_Vector256_WithLower: - case NI_Vector256_WithUpper: + case NI_Vector_WithLower: + case NI_Vector_WithUpper: { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX)); var_types simdBaseType = node->GetSimdBaseType(); - int index = (intrinsicId == NI_Vector256_WithUpper) ? 1 : 0; + int index = (intrinsicId == NI_Vector_WithUpper) ? 1 : 0; - if (varTypeIsFloating(simdBaseType) || !m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2)) + if (node->GetSimdSize() == 32) { - intrinsicId = NI_AVX_InsertVector128; + if (varTypeIsFloating(simdBaseType) || !m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + intrinsicId = NI_AVX_InsertVector128; + } + else + { + intrinsicId = NI_AVX2_InsertVector128; + } } else { - intrinsicId = NI_AVX2_InsertVector128; + assert(node->GetSimdSize() == 64); + intrinsicId = NI_AVX512_InsertVector256; } GenTree* op1 = node->Op(1); @@ -1852,36 +1837,12 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector512_WithLower: - case NI_Vector512_WithUpper: - { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512)); - var_types simdBaseType = node->GetSimdBaseType(); - int index = (intrinsicId == NI_Vector512_WithUpper) ? 1 : 0; - - intrinsicId = NI_AVX512_InsertVector256; - - GenTree* op1 = node->Op(1); - GenTree* op2 = node->Op(2); - - GenTree* op3 = m_compiler->gtNewIconNode(index); - BlockRange().InsertBefore(node, op3); - LowerNode(op3); - - node->ResetHWIntrinsicId(intrinsicId, m_compiler, op1, op2, op3); - break; - } - - case NI_Vector128_op_Equality: - case NI_Vector256_op_Equality: - case NI_Vector512_op_Equality: + case NI_Vector_op_Equality: { return LowerHWIntrinsicCmpOp(node, GT_EQ); } - case NI_Vector128_op_Inequality: - case NI_Vector256_op_Inequality: - case NI_Vector512_op_Inequality: + case NI_Vector_op_Inequality: { return LowerHWIntrinsicCmpOp(node, GT_NE); } @@ -2030,9 +1991,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: + case NI_Vector_ToScalar: { return LowerHWIntrinsicToScalar(node); } @@ -2061,7 +2020,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) node->Op(2) = tmp; } - node->ChangeHWIntrinsicId(NI_Vector128_GetElement); + node->ChangeHWIntrinsicId(NI_Vector_GetElement); return LowerNode(node); } break; @@ -2591,9 +2550,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm unsigned simdSize = node->GetSimdSize(); var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - assert((intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality) || - (intrinsicId == NI_Vector256_op_Equality) || (intrinsicId == NI_Vector256_op_Inequality) || - (intrinsicId == NI_Vector512_op_Equality) || (intrinsicId == NI_Vector512_op_Inequality)); + assert((intrinsicId == NI_Vector_op_Equality) || (intrinsicId == NI_Vector_op_Inequality)); assert(varTypeIsSIMD(simdType)); assert(varTypeIsArithmetic(simdBaseType)); @@ -3001,7 +2958,8 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm GenTree* broadcastOp = nestedIntrin->Op(1); - if (broadcastOp->OperIsHWIntrinsic(NI_Vector128_CreateScalarUnsafe)) + if (broadcastOp->OperIsHWIntrinsic(NI_Vector_CreateScalarUnsafe) && + broadcastOp->TypeIs(TYP_SIMD16)) { BlockRange().Remove(broadcastOp); broadcastOp = broadcastOp->AsHWIntrinsic()->Op(1); @@ -4024,11 +3982,13 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) return node->gtNext; } + assert(intrinsicId == NI_Vector_Create); + // We have the following (where simd is simd16, simd32 or simd64): // /--* op1 T // node = * HWINTRINSIC simd T Create - if (intrinsicId == NI_Vector512_Create) + if (simdType == TYP_SIMD64) { assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512)); // We will be constructing the following parts: @@ -4051,7 +4011,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // /--* op1 T // node = * HWINTRINSIC simd T Create - if (intrinsicId == NI_Vector256_Create) + if (simdType == TYP_SIMD32) { if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { @@ -4107,17 +4067,17 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp2 = m_compiler->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); - tmp3 = m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD32, tmp2, NI_Vector128_ToVector256Unsafe, simdBaseType, - 16); + tmp3 = + m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD32, tmp2, NI_Vector_ToVector256Unsafe, simdBaseType, 16); BlockRange().InsertAfter(tmp2, tmp3); - node->ResetHWIntrinsicId(NI_Vector256_WithUpper, m_compiler, tmp3, tmp1); + node->ResetHWIntrinsicId(NI_Vector_WithUpper, m_compiler, tmp3, tmp1); LowerNode(tmp3); return LowerNode(node); } - assert(intrinsicId == NI_Vector128_Create); + assert(simdType == TYP_SIMD16); // We will be constructing the following parts: // /--* op1 T @@ -4339,7 +4299,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) return LowerNode(node); } - if (intrinsicId == NI_Vector512_Create || intrinsicId == NI_Vector256_Create) + assert(intrinsicId == NI_Vector_Create); + + if ((simdType == TYP_SIMD32) || (simdType == TYP_SIMD64)) { assert(argCnt >= (simdSize / genTypeSize(TYP_LONG))); assert(((simdSize == 64) && m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512)) || @@ -4378,9 +4340,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // lo = Vector256.Create(op1, ..., op16); // hi = Vector256.Create(op17, ..., op32); - var_types halfType = m_compiler->getSIMDTypeForSize(simdSize / 2); - NamedIntrinsic halfCreate = (simdSize == 64) ? NI_Vector256_Create : NI_Vector128_Create; - NamedIntrinsic withUpper = (simdSize == 64) ? NI_Vector512_WithUpper : NI_Vector256_WithUpper; + var_types halfType = m_compiler->getSIMDTypeForSize(simdSize / 2); size_t halfArgCnt = argCnt / 2; assert((halfArgCnt * 2) == argCnt); @@ -4388,13 +4348,13 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTree* loInsertionPoint = LIR::LastNode(node->GetOperandArray(), halfArgCnt); GenTree* hiInsertionPoint = LIR::LastNode(node->GetOperandArray(halfArgCnt), halfArgCnt); - GenTree* lo = m_compiler->gtNewSimdHWIntrinsicNode(halfType, node->GetOperandArray(), halfArgCnt, halfCreate, - simdBaseType, simdSize / 2); + GenTree* lo = m_compiler->gtNewSimdHWIntrinsicNode(halfType, node->GetOperandArray(), halfArgCnt, + NI_Vector_Create, simdBaseType, simdSize / 2); GenTree* hi = m_compiler->gtNewSimdHWIntrinsicNode(halfType, node->GetOperandArray(halfArgCnt), halfArgCnt, - halfCreate, simdBaseType, simdSize / 2); + NI_Vector_Create, simdBaseType, simdSize / 2); - node->ResetHWIntrinsicId(withUpper, m_compiler, lo, hi); + node->ResetHWIntrinsicId(NI_Vector_WithUpper, m_compiler, lo, hi); BlockRange().InsertAfter(loInsertionPoint, lo); BlockRange().InsertAfter(hiInsertionPoint, hi); @@ -4405,7 +4365,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) return LowerNode(node); } - assert(intrinsicId == NI_Vector128_Create); + assert(simdType == TYP_SIMD16); // We will be constructing the following parts: // /--* op1 T @@ -4647,20 +4607,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) // Specially handle as ToScalar BlockRange().Remove(op2); - if (simdSize == 64) - { - intrinsicId = NI_Vector512_ToScalar; - } - else if (simdSize == 32) - { - intrinsicId = NI_Vector256_ToScalar; - } - else - { - intrinsicId = NI_Vector128_ToScalar; - } - - node->ResetHWIntrinsicId(intrinsicId, op1); + node->ResetHWIntrinsicId(NI_Vector_ToScalar, op1); return LowerNode(node); } @@ -4907,7 +4854,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) GenTree* tmp1 = nullptr; GenTree* tmp2 = nullptr; - if (intrinsicId == NI_Vector512_GetElement) + if (simdSize == 64) { assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512)); @@ -4922,8 +4869,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) // ... // op1 = op1.GetLower().GetLower(); - tmp1 = - m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector512_GetLower128, simdBaseType, simdSize); + tmp1 = m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector_GetLower128, simdBaseType, simdSize); BlockRange().InsertBefore(node, tmp1); LowerNode(tmp1); } @@ -4958,7 +4904,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) op1 = tmp1; } - else if (intrinsicId == NI_Vector256_GetElement) + else if (simdSize == 32) { assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX)); @@ -5008,7 +4954,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) // Specially handle as ToScalar node->SetSimdSize(16); - node->ResetHWIntrinsicId(NI_Vector128_ToScalar, op1); + node->ResetHWIntrinsicId(NI_Vector_ToScalar, op1); return LowerNode(node); } @@ -5030,7 +4976,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_DOUBLE: { // We specially handle float and double for more efficient codegen - resIntrinsic = NI_Vector128_GetElement; + resIntrinsic = NI_Vector_GetElement; // GetElement takes a native sized index after lowering, so change // the type of the constant we inserted above. // (This is generally only for the non constant index case, @@ -5150,7 +5096,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) GenTree* tmp2 = nullptr; GenTreeHWIntrinsic* result = node; - if (intrinsicId == NI_Vector512_WithElement) + if (simdType == TYP_SIMD64) { // If we have a simd64 WithElement, we will spill the original // simd64 source into a local, extract the relevant simd16 from @@ -5189,8 +5135,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // ... // op1 = op1.GetLower().GetLower(); - tmp1 = - m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector512_GetLower128, simdBaseType, simdSize); + tmp1 = m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector_GetLower128, simdBaseType, simdSize); BlockRange().InsertAfter(op1, tmp1); LowerNode(tmp1); } @@ -5235,7 +5180,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) node->ResetHWIntrinsicId(insertIntrinsicId, m_compiler, tmp64, result, idx); } - else if (intrinsicId == NI_Vector256_WithElement) + else if (simdType == TYP_SIMD32) { // If we have a simd32 WithElement, we will spill the original // simd32 source into a local, extract the lower/upper half from @@ -5304,11 +5249,11 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // Now we will insert our "result" into our simd32 temporary. if (simd16Idx == 0) { - node->ResetHWIntrinsicId(NI_Vector256_WithLower, m_compiler, tmp32, result); + node->ResetHWIntrinsicId(NI_Vector_WithLower, m_compiler, tmp32, result); } else { - node->ResetHWIntrinsicId(NI_Vector256_WithUpper, m_compiler, tmp32, result); + node->ResetHWIntrinsicId(NI_Vector_WithUpper, m_compiler, tmp32, result); } } else @@ -5386,7 +5331,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) assert(result->GetHWIntrinsicId() != intrinsicId); GenTree* nextNode = LowerNode(result); - if (intrinsicId == NI_Vector512_WithElement) + if (simdType == TYP_SIMD64) { // Now that we have finalized the shape of the tree, lower the insertion node as well. @@ -5395,12 +5340,11 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) nextNode = LowerNode(node); } - else if (intrinsicId == NI_Vector256_WithElement) + else if (simdType == TYP_SIMD32) { // Now that we have finalized the shape of the tree, lower the insertion node as well. - assert((node->GetHWIntrinsicId() == NI_Vector256_WithLower) || - (node->GetHWIntrinsicId() == NI_Vector256_WithUpper)); + assert((node->GetHWIntrinsicId() == NI_Vector_WithLower) || (node->GetHWIntrinsicId() == NI_Vector_WithUpper)); assert(node != result); nextNode = LowerNode(node); @@ -5429,7 +5373,7 @@ GenTree* Lowering::LowerHWIntrinsicDotInnerMulSum(GenTreeHWIntrinsic* node) unsigned simdSize = node->GetSimdSize(); var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - assert((intrinsicId == NI_Vector128_Dot) || (intrinsicId == NI_Vector256_Dot)); + assert(intrinsicId == NI_Vector_Dot); assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX)); assert(varTypeIsSIMD(simdType)); assert(varTypeIsFloating(simdBaseType)); @@ -5596,9 +5540,7 @@ GenTree* Lowering::LowerHWIntrinsicDotInnerMulSum(GenTreeHWIntrinsic* node) LowerNode(tmp1); - tmp2 = m_compiler->gtNewSimdHWIntrinsicNode(node->gtType, tmp1, - simdSize == 16 ? NI_Vector128_ToScalar : NI_Vector256_ToScalar, - simdBaseType, simdSize); + tmp2 = m_compiler->gtNewSimdHWIntrinsicNode(node->gtType, tmp1, NI_Vector_ToScalar, simdBaseType, simdSize); BlockRange().InsertAfter(tmp1, tmp2); tmp1 = tmp2; } @@ -5626,7 +5568,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) var_types simdType = Compiler::getSIMDTypeForSize(simdSize); unsigned simd16Count = m_compiler->getSIMDVectorLength(16, simdBaseType); - assert((intrinsicId == NI_Vector128_Dot) || (intrinsicId == NI_Vector256_Dot)); + assert(intrinsicId == NI_Vector_Dot); assert(varTypeIsSIMD(simdType)); assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); @@ -5727,7 +5669,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(idx, tmp3); LowerNode(tmp3); - node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); + node->ResetHWIntrinsicId(NI_Vector_ToScalar, tmp3); } return LowerNode(node); @@ -5768,7 +5710,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(idx, tmp3); LowerNode(tmp3); - node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); + node->ResetHWIntrinsicId(NI_Vector_ToScalar, tmp3); } return LowerNode(node); @@ -7240,14 +7182,13 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) GenTreeHWIntrinsic* hwintrinsic = src->AsHWIntrinsic(); NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); var_types simdBaseType = hwintrinsic->GetSimdBaseType(); + unsigned simdSize = hwintrinsic->GetSimdSize(); bool isContainable = false; GenTree* clearContainedNode = nullptr; switch (intrinsicId) { - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: + case NI_Vector_ToScalar: { // These intrinsics are "ins reg/mem, xmm" or "ins xmm, reg/mem" // @@ -7272,14 +7213,14 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) { var_types baseType = varTypeIsByte(node) ? TYP_UBYTE : TYP_USHORT; - if (intrinsicId == NI_Vector512_ToScalar) + if (simdSize == 64) { - op1 = m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector512_GetLower128, + op1 = m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector_GetLower128, baseType, 64); BlockRange().InsertBefore(hwintrinsic, op1); LowerNode(op1); } - else if (intrinsicId == NI_Vector256_ToScalar) + else if (simdSize == 32) { op1 = m_compiler->gtNewSimdGetLowerNode(TYP_SIMD16, op1, baseType, 32); BlockRange().InsertBefore(hwintrinsic, op1); @@ -7312,8 +7253,13 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) break; } - case NI_Vector128_GetElement: + case NI_Vector_GetElement: { + if (simdSize != 16) + { + break; + } + // GetElement for floating-point is specially handled since double // doesn't have a direct "extract" instruction and float cannot extract // to a SIMD register. @@ -8556,12 +8502,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre switch (parentIntrinsicId) { - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: { // Integral scalar loads to vector use movd/movq, so small types must be sized up. // They may also use a GR reg, so disable SIMD operand containment. @@ -8662,12 +8604,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre switch (intrinsicId) { - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: { if (!supportsSIMDScalarLoad) { @@ -9277,12 +9215,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } #ifdef TARGET_X86 - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: { if (op1->OperIsLong()) { @@ -9310,9 +9244,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: + case NI_Vector_ToScalar: { // These will be contained by a STOREIND if (varTypeIsLong(simdBaseType)) @@ -9434,12 +9366,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) GenTreeHWIntrinsic* userHw = use.User()->AsHWIntrinsic(); NamedIntrinsic userId = userHw->GetHWIntrinsicId(); - bool isEQ = (userId == NI_Vector128_op_Equality) || - (userId == NI_Vector256_op_Equality) || - (userId == NI_Vector512_op_Equality); - bool isNE = (userId == NI_Vector128_op_Inequality) || - (userId == NI_Vector256_op_Inequality) || - (userId == NI_Vector512_op_Inequality); + bool isEQ = (userId == NI_Vector_op_Equality); + bool isNE = (userId == NI_Vector_op_Inequality); + if ((isEQ || isNE) && (userHw->Op(1)->IsVectorZero() || userHw->Op(2)->IsVectorZero())) { isEmbeddedBroadcastCompatible = false; @@ -9652,9 +9581,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_Vector128_GetElement: - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: + case NI_Vector_GetElement: { if (op2->OperIsConst()) { @@ -9668,8 +9595,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector128_op_Division: - case NI_Vector256_op_Division: + case NI_Vector_op_Division: { break; } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 3e24519a5566d0..7c3385ebbe6303 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -2348,8 +2348,7 @@ GenTree* LinearScan::getDelayFreeOperand(GenTreeHWIntrinsic* intrinsicTree, GenT switch (intrinsicId) { - case NI_Vector64_CreateScalarUnsafe: - case NI_Vector128_CreateScalarUnsafe: + case NI_Vector_CreateScalarUnsafe: if (varTypeIsFloating(intrinsicTree->Op(1))) { delayFreeOp = intrinsicTree->Op(1); @@ -2365,8 +2364,7 @@ GenTree* LinearScan::getDelayFreeOperand(GenTreeHWIntrinsic* intrinsicTree, GenT } break; - case NI_Vector64_ToScalar: - case NI_Vector128_ToScalar: + case NI_Vector_ToScalar: if (varTypeIsFloating(intrinsicTree)) { delayFreeOp = intrinsicTree->Op(1); @@ -2374,10 +2372,9 @@ GenTree* LinearScan::getDelayFreeOperand(GenTreeHWIntrinsic* intrinsicTree, GenT } break; - case NI_Vector64_ToVector128Unsafe: - case NI_Vector128_AsVector128Unsafe: - case NI_Vector128_AsVector3: - case NI_Vector128_GetLower: + case NI_Vector_ToVector128Unsafe: + case NI_Vector_AsVector3: + case NI_Vector_GetLower: delayFreeOp = intrinsicTree->Op(1); assert(delayFreeOp != nullptr); break; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 05781578c7c608..58a9e2cbef9cc6 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2036,12 +2036,8 @@ static GenTree* SkipContainedUnaryOp(GenTree* node) switch (intrinsicId) { - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: { return hwintrinsic->Op(1); } @@ -2200,15 +2196,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // must be handled within the case. switch (intrinsicId) { - case NI_Vector128_CreateScalar: - case NI_Vector256_CreateScalar: - case NI_Vector512_CreateScalar: - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - case NI_Vector512_CreateScalarUnsafe: - case NI_Vector128_ToScalar: - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: + case NI_Vector_CreateScalar: + case NI_Vector_CreateScalarUnsafe: + case NI_Vector_ToScalar: { assert(numArgs == 1); @@ -2242,9 +2232,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_Vector128_GetElement: - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: + case NI_Vector_GetElement: { assert(numArgs == 2); @@ -2264,9 +2252,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_Vector128_WithElement: - case NI_Vector256_WithElement: - case NI_Vector512_WithElement: + case NI_Vector_WithElement: { assert(numArgs == 3); @@ -2291,17 +2277,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_Vector128_AsVector128Unsafe: - case NI_Vector128_AsVector2: - case NI_Vector128_AsVector3: - case NI_Vector128_ToVector256: - case NI_Vector128_ToVector512: - case NI_Vector256_ToVector512: - case NI_Vector128_ToVector256Unsafe: - case NI_Vector256_ToVector512Unsafe: - case NI_Vector256_GetLower: - case NI_Vector512_GetLower: - case NI_Vector512_GetLower128: + case NI_Vector_AsVector128Unsafe: + case NI_Vector_AsVector2: + case NI_Vector_AsVector3: + case NI_Vector_ToVector256: + case NI_Vector_ToVector256Unsafe: + case NI_Vector_ToVector512: + case NI_Vector_ToVector512Unsafe: + case NI_Vector_GetLower: + case NI_Vector_GetLower128: { assert(numArgs == 1); SingleTypeRegSet apxAwareRegCandidates = @@ -2830,8 +2814,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_Vector128_op_Division: - case NI_Vector256_op_Division: + case NI_Vector_op_Division: { srcCount = BuildOperandUses(op1, lowSIMDRegs()); srcCount += BuildOperandUses(op2, lowSIMDRegs()); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 7c99a66f80bc9f..b63eafcf8cc4e6 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9329,11 +9329,13 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { -#if defined(TARGET_ARM64) - case NI_Vector64_Create: -#endif // TARGET_ARM64 - case NI_Vector128_Create: + case NI_Vector_Create: { + if ((simdSize != 8) && (simdSize != 16)) + { + break; + } + // The managed `Dot` API returns a scalar. However, many common usages require // it to be then immediately broadcast back to a vector so that it can be used // in a subsequent operation. One of the most common is normalizing a vector @@ -9389,12 +9391,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) GenTreeHWIntrinsic* hwop1 = op1->AsHWIntrinsic(); -#if defined(TARGET_ARM64) - if ((hwop1->GetHWIntrinsicId() == NI_Vector64_ToScalar) || - (hwop1->GetHWIntrinsicId() == NI_Vector128_ToScalar)) -#else - if (hwop1->GetHWIntrinsicId() == NI_Vector128_ToScalar) -#endif + if (hwop1->GetHWIntrinsicId() == NI_Vector_ToScalar) { op1 = hwop1->Op(1); @@ -9407,11 +9404,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) hwop1 = op1->AsHWIntrinsic(); } -#if defined(TARGET_ARM64) - if ((hwop1->GetHWIntrinsicId() != NI_Vector64_Dot) && (hwop1->GetHWIntrinsicId() != NI_Vector128_Dot)) -#else - if (hwop1->GetHWIntrinsicId() != NI_Vector128_Dot) -#endif + if (hwop1->GetHWIntrinsicId() != NI_Vector_Dot) { break; } @@ -11835,14 +11828,7 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) switch (intrinsic) { #if !defined(TARGET_WASM) -#if defined(TARGET_ARM64) - case NI_Vector64_CreateGeometricSequence: -#endif // TARGET_ARM64 - case NI_Vector128_CreateGeometricSequence: -#if defined(TARGET_XARCH) - case NI_Vector256_CreateGeometricSequence: - case NI_Vector512_CreateGeometricSequence: -#endif // TARGET_XARCH + case NI_Vector_CreateGeometricSequence: { assert(tree->GetOperandCount() == 2); diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index 5f96a30903d1d2..3ce8c9e8f13250 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -179,22 +179,15 @@ enum NamedIntrinsic : unsigned short NI_System_Numerics_Intrinsic, NI_System_Runtime_Intrinsics_Intrinsic, -#ifdef FEATURE_HW_INTRINSICS +#if defined(FEATURE_HW_INTRINSICS) NI_HW_INTRINSIC_START, -#if defined(TARGET_XARCH) -#define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, intCost, fltCost, \ - category, flag) \ - NI_##isa##_##name, -#include "hwintrinsiclistxarch.h" -#elif defined(TARGET_ARM64) -#define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ - NI_##isa##_##name, -#include "hwintrinsiclistarm64.h" -#elif defined(TARGET_WASM) -#define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ + + // clang-format off +#define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, intCost, fltCost, category, flag) \ NI_##isa##_##name, -#include "hwintrinsiclistwasm.h" -#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) && !defined(TARGET_WASM) +#include "hwintrinsiclist.h" + // clang-format on + NI_HW_INTRINSIC_END, #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/rangecheck.cpp b/src/coreclr/jit/rangecheck.cpp index 4fd6e4a96b65ad..9e7eb209db9aae 100644 --- a/src/coreclr/jit/rangecheck.cpp +++ b/src/coreclr/jit/rangecheck.cpp @@ -893,17 +893,13 @@ Range RangeCheck::GetRangeFromAssertionsWorker( } #if defined(FEATURE_HW_INTRINSICS) + case VNF_HWI_Vector_ExtractMostSignificantBits: #if defined(TARGET_XARCH) - case VNF_HWI_Vector256_ExtractMostSignificantBits: - case VNF_HWI_Vector512_ExtractMostSignificantBits: case VNF_HWI_X86Base_MoveMask: case VNF_HWI_AVX_MoveMask: case VNF_HWI_AVX2_MoveMask: case VNF_HWI_AVX512_MoveMask: -#elif defined(TARGET_ARM64) - case VNF_HWI_Vector64_ExtractMostSignificantBits: #endif - case VNF_HWI_Vector128_ExtractMostSignificantBits: { // We have 1 bit per element, remaining upper bits are 0 diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index fc4545a2cb46e8..87460c47c622a9 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -373,14 +373,7 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStackgtFlags & GTF_REVERSE_OPS) == 0); // gtNewSimdShuffleNode with reverse ops is not supported GenTree* op1 = operands[0]; GenTree* op2 = operands[1]; - bool isShuffleNative = intrinsicId != NI_Vector128_Shuffle; -#if defined(TARGET_XARCH) - isShuffleNative = - isShuffleNative && (intrinsicId != NI_Vector256_Shuffle) && (intrinsicId != NI_Vector512_Shuffle); -#elif defined(TARGET_ARM64) - isShuffleNative = isShuffleNative && (intrinsicId != NI_Vector64_Shuffle); -#endif + bool isShuffleNative = intrinsicId != NI_Vector_Shuffle; // Check if the required intrinsics to emit are available. if (!m_compiler->IsValidForShuffle(op2, simdSize, simdBaseType, nullptr, isShuffleNative)) @@ -463,11 +433,15 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStackgtType = genActualType(simdBaseType); - node->ChangeHWIntrinsicId(intrinsic); + node->ChangeHWIntrinsicId(NI_Vector_ToScalar); node->SetSimdSize(8); node->SetSimdBaseType(simdBaseType); node->Op(1) = op1; diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 43999e89edcebf..69ac16d48713c6 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -305,7 +305,6 @@ struct simd64_t static_assert(sizeof(simd64_t) == 64); #endif // TARGET_XARCH -#if defined(FEATURE_MASKED_HW_INTRINSICS) struct simdmask_t { union @@ -376,7 +375,6 @@ struct simdmask_t } }; static_assert(sizeof(simdmask_t) == 8); -#endif // FEATURE_MASKED_HW_INTRINSICS #if defined(TARGET_XARCH) typedef simd64_t simd_t; @@ -633,6 +631,7 @@ inline void EvaluateUnaryMask( } } } +#endif // FEATURE_MASKED_HW_INTRINSICS template inline void EvaluateExtractMSB(simdmask_t* result, const TSimd& arg0) @@ -695,7 +694,6 @@ inline void EvaluateExtractMSB(var_types baseType, simdmask_t* result, const TSi } } } -#endif // FEATURE_MASKED_HW_INTRINSICS template void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, const TSimd& arg0) diff --git a/src/coreclr/jit/stacklevelsetter.cpp b/src/coreclr/jit/stacklevelsetter.cpp index 91ae68a7e35b0a..c33de2c033d374 100644 --- a/src/coreclr/jit/stacklevelsetter.cpp +++ b/src/coreclr/jit/stacklevelsetter.cpp @@ -266,7 +266,7 @@ void StackLevelSetter::SetThrowHelperBlocks(GenTree* node, BasicBlock* block) { NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->GetHWIntrinsicId(); - if (intrinsicId == NI_Vector128_op_Division || intrinsicId == NI_Vector256_op_Division) + if (intrinsicId == NI_Vector_op_Division) { SetThrowHelperBlock(SCK_DIV_BY_ZERO, block); SetThrowHelperBlock(SCK_OVERFLOW, block); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index a8ee6dd94e0a8b..534310ab7f9cdf 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -7139,17 +7139,13 @@ bool ValueNumStore::IsVNNeverNegative(ValueNum vn) } #if defined(FEATURE_HW_INTRINSICS) + case VNF_HWI_Vector_ExtractMostSignificantBits: #if defined(TARGET_XARCH) - case VNF_HWI_Vector256_ExtractMostSignificantBits: - case VNF_HWI_Vector512_ExtractMostSignificantBits: case VNF_HWI_X86Base_MoveMask: case VNF_HWI_AVX_MoveMask: case VNF_HWI_AVX2_MoveMask: case VNF_HWI_AVX512_MoveMask: -#elif defined(TARGET_ARM64) - case VNF_HWI_Vector64_ExtractMostSignificantBits: #endif - case VNF_HWI_Vector128_ExtractMostSignificantBits: { // We have 1 bit per element, remaining upper bits are 0 @@ -8290,15 +8286,12 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree, switch (ni) { -#if defined(TARGET_ARM64) - case NI_Vector64_ExtractMostSignificantBits: -#elif defined(TARGET_XARCH) - case NI_Vector256_ExtractMostSignificantBits: + case NI_Vector_ExtractMostSignificantBits: +#if defined(TARGET_XARCH) case NI_X86Base_MoveMask: case NI_AVX_MoveMask: case NI_AVX2_MoveMask: #endif - case NI_Vector128_ExtractMostSignificantBits: { #ifdef FEATURE_MASKED_HW_INTRINSICS @@ -8428,21 +8421,21 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree, return VNForLongCon(static_cast(result)); } - case NI_Vector64_ToVector128: - case NI_Vector64_ToVector128Unsafe: + case NI_Vector_ToVector128: + case NI_Vector_ToVector128Unsafe: { simd16_t result = {}; result.v64[0] = GetConstantSimd8(arg0VN); return VNForSimd16Con(result); } - case NI_Vector128_GetLower: + case NI_Vector_GetLower: { simd8_t result = GetConstantSimd16(arg0VN).v64[0]; return VNForSimd8Con(result); } - case NI_Vector128_GetUpper: + case NI_Vector_GetUpper: { simd8_t result = GetConstantSimd16(arg0VN).v64[1]; return VNForSimd8Con(result); @@ -8555,67 +8548,75 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree, return VNForLongCon(static_cast(result)); } - case NI_Vector128_ToVector256: - case NI_Vector128_ToVector256Unsafe: + case NI_Vector_ToVector256: + case NI_Vector_ToVector256Unsafe: { simd32_t result = {}; result.v128[0] = GetConstantSimd16(arg0VN); return VNForSimd32Con(result); } - case NI_Vector128_ToVector512: + case NI_Vector_ToVector512: + case NI_Vector_ToVector512Unsafe: { simd64_t result = {}; - result.v128[0] = GetConstantSimd16(arg0VN); - return VNForSimd64Con(result); - } - case NI_Vector256_GetLower: - { - simd16_t result = GetConstantSimd32(arg0VN).v128[0]; - return VNForSimd16Con(result); - } - - case NI_Vector256_GetUpper: - { - simd16_t result = GetConstantSimd32(arg0VN).v128[1]; - return VNForSimd16Con(result); - } - - case NI_Vector256_ToVector512: - case NI_Vector256_ToVector512Unsafe: - { - simd64_t result = {}; - result.v256[0] = GetConstantSimd32(arg0VN); + if (simdSize == 16) + { + result.v128[0] = GetConstantSimd16(arg0VN); + } + else + { + assert(simdSize == 32); + result.v256[0] = GetConstantSimd32(arg0VN); + } return VNForSimd64Con(result); } - case NI_Vector512_GetLower: + case NI_Vector_GetLower: { - simd32_t result = GetConstantSimd64(arg0VN).v256[0]; - return VNForSimd32Con(result); + if (simdSize == 64) + { + simd32_t result = GetConstantSimd64(arg0VN).v256[0]; + return VNForSimd32Con(result); + } + else + { + assert(simdSize == 32); + simd16_t result = GetConstantSimd32(arg0VN).v128[0]; + return VNForSimd16Con(result); + } } - case NI_Vector512_GetUpper: + case NI_Vector_GetUpper: { - simd32_t result = GetConstantSimd64(arg0VN).v256[1]; - return VNForSimd32Con(result); + if (simdSize == 64) + { + simd32_t result = GetConstantSimd64(arg0VN).v256[1]; + return VNForSimd32Con(result); + } + else + { + assert(simdSize == 32); + simd16_t result = GetConstantSimd32(arg0VN).v128[1]; + return VNForSimd16Con(result); + } } - case NI_Vector512_GetLower128: + case NI_Vector_GetLower128: { simd16_t result = GetConstantSimd64(arg0VN).v128[0]; return VNForSimd16Con(result); } #endif // TARGET_XARCH - case NI_Vector128_AsVector2: + case NI_Vector_AsVector2: { simd8_t result = GetConstantSimd16(arg0VN).v64[0]; return VNForSimd8Con(result); } - case NI_Vector128_AsVector3: + case NI_Vector_AsVector3: { simd12_t result = {}; simd16_t vector = GetConstantSimd16(arg0VN); @@ -8627,7 +8628,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree, return VNForSimd12Con(result); } - case NI_Vector128_AsVector128Unsafe: + case NI_Vector_AsVector128Unsafe: { if (TypeOfVN(arg0VN) == TYP_SIMD8) { @@ -8650,13 +8651,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree, } } - case NI_Vector128_ToScalar: -#ifdef TARGET_ARM64 - case NI_Vector64_ToScalar: -#elif defined(TARGET_XARCH) - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: -#endif + case NI_Vector_ToScalar: { return EvaluateSimdGetElement(this, TypeOfVN(arg0VN), baseType, arg0VN, 0); } @@ -8784,13 +8779,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( switch (ni) { - case NI_Vector128_GetElement: -#ifdef TARGET_ARM64 - case NI_Vector64_GetElement: -#elif defined(TARGET_XARCH) - case NI_Vector256_GetElement: - case NI_Vector512_GetElement: -#endif + case NI_Vector_GetElement: { var_types simdType = TypeOfVN(arg0VN); int32_t index = GetConstantInt32(arg1VN); @@ -8815,14 +8804,14 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( return EvaluateBinarySimd(this, GT_MUL, /* scalar */ false, type, baseType, arg0VN, arg1VN); } - case NI_Vector128_WithLower: + case NI_Vector_WithLower: { simd16_t result = GetConstantSimd16(arg0VN); result.v64[0] = GetConstantSimd8(arg1VN); return VNForSimd16Con(result); } - case NI_Vector128_WithUpper: + case NI_Vector_WithUpper: { simd16_t result = GetConstantSimd16(arg0VN); result.v64[1] = GetConstantSimd8(arg1VN); @@ -8831,32 +8820,38 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( #endif // TARGET_ARM64 #if defined(TARGET_XARCH) - case NI_Vector256_WithLower: - { - simd32_t result = GetConstantSimd32(arg0VN); - result.v128[0] = GetConstantSimd16(arg1VN); - return VNForSimd32Con(result); - } - - case NI_Vector256_WithUpper: + case NI_Vector_WithLower: { - simd32_t result = GetConstantSimd32(arg0VN); - result.v128[1] = GetConstantSimd16(arg1VN); - return VNForSimd32Con(result); - } - - case NI_Vector512_WithLower: - { - simd64_t result = GetConstantSimd64(arg0VN); - result.v256[0] = GetConstantSimd32(arg1VN); - return VNForSimd64Con(result); + if (simdSize == 64) + { + simd64_t result = GetConstantSimd64(arg0VN); + result.v256[0] = GetConstantSimd32(arg1VN); + return VNForSimd64Con(result); + } + else + { + assert(simdSize == 32); + simd32_t result = GetConstantSimd32(arg0VN); + result.v128[0] = GetConstantSimd16(arg1VN); + return VNForSimd32Con(result); + } } - case NI_Vector512_WithUpper: + case NI_Vector_WithUpper: { - simd64_t result = GetConstantSimd64(arg0VN); - result.v256[1] = GetConstantSimd32(arg1VN); - return VNForSimd64Con(result); + if (simdSize == 64) + { + simd64_t result = GetConstantSimd64(arg0VN); + result.v256[1] = GetConstantSimd32(arg1VN); + return VNForSimd64Con(result); + } + else + { + assert(simdSize == 32); + simd32_t result = GetConstantSimd32(arg0VN); + result.v128[1] = GetConstantSimd16(arg1VN); + return VNForSimd32Con(result); + } } #endif // TARGET_XARCH @@ -9348,13 +9343,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( } #endif - case NI_Vector128_op_Equality: -#if defined(TARGET_ARM64) - case NI_Vector64_op_Equality: -#elif defined(TARGET_XARCH) - case NI_Vector256_op_Equality: - case NI_Vector512_op_Equality: -#endif // !TARGET_ARM64 && !TARGET_XARCH + case NI_Vector_op_Equality: { if (varTypeIsFloating(baseType)) { @@ -9369,13 +9358,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( break; } - case NI_Vector128_op_Inequality: -#if defined(TARGET_ARM64) - case NI_Vector64_op_Inequality: -#elif defined(TARGET_XARCH) - case NI_Vector256_op_Inequality: - case NI_Vector512_op_Inequality: -#endif // !TARGET_ARM64 && !TARGET_XARCH + case NI_Vector_op_Inequality: { if (varTypeIsFloating(baseType)) { @@ -9475,13 +9458,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( switch (ni) { - case NI_Vector128_op_Equality: -#if defined(TARGET_ARM64) - case NI_Vector64_op_Equality: -#elif defined(TARGET_XARCH) - case NI_Vector256_op_Equality: - case NI_Vector512_op_Equality: -#endif // !TARGET_ARM64 && !TARGET_XARCH + case NI_Vector_op_Equality: { // We can't handle floating-point due to NaN @@ -9492,13 +9469,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( break; } - case NI_Vector128_op_Inequality: -#if defined(TARGET_ARM64) - case NI_Vector64_op_Inequality: -#elif defined(TARGET_XARCH) - case NI_Vector256_op_Inequality: - case NI_Vector512_op_Inequality: -#endif // !TARGET_ARM64 && !TARGET_XARCH + case NI_Vector_op_Inequality: { // We can't handle floating-point due to NaN @@ -9640,9 +9611,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary( #ifndef TARGET_WASM // TODO-WASM: Implement bitwise select case #if defined(TARGET_XARCH) - case NI_Vector128_ConditionalSelect: - case NI_Vector256_ConditionalSelect: - case NI_Vector512_ConditionalSelect: + case NI_Vector_ConditionalSelect: #elif defined(TARGET_ARM64) case NI_AdvSimd_BitwiseSelect: case NI_Sve_ConditionalSelect: @@ -9709,13 +9678,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary( } #endif // !defined(TARGET_WASM) - case NI_Vector128_WithElement: -#ifdef TARGET_ARM64 - case NI_Vector64_WithElement: -#elif defined(TARGET_XARCH) - case NI_Vector256_WithElement: - case NI_Vector512_WithElement: -#endif + case NI_Vector_WithElement: { if (!IsVNConstant(arg0VN) || !IsVNConstant(arg1VN) || !IsVNConstant(arg2VN)) { @@ -13883,7 +13846,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) } }; - // There are some HWINTRINSICS operations that have zero args, i.e. NI_Vector128_Zero + // There are some HWINTRINSICS operations that have zero args, i.e. NI_Vector_Zero if (opCount == 0) { // There are zero arg HWINTRINSICS operations that encode the result type, i.e. Vector128_AllBitSet diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index 7026e444388c72..a0b03da75c60f1 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -191,43 +191,26 @@ ValueNumFuncDef(SimdType, 2, false, false) // A value number function to compos // In VN all HW intrinsics encode an extra arg for the base type (except when // they are variadic), hence the +1 to the arg count below here. -#if defined(TARGET_XARCH) +#if defined(FEATURE_HW_INTRINSICS) +ValueNumFuncDef(HWI_INTRINSIC_START, -1, false, false) + #define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, intCost, fltCost, category, flag) \ ValueNumFuncDef(HWI_##isa##_##name, ((numArgs == -1) ? -1 : (numArgs + 1)), ((flag) & HW_Flag_Commutative) >> 0, false) // All of the HARDWARE_INTRINSICS for x86/x64 -#include "hwintrinsiclistxarch.h" -#define VNF_HWI_FIRST VNF_HWI_Vector128_Abs -#define VNF_HWI_LAST VNF_HWI_AVX512_XnorMask - -#elif defined(TARGET_ARM64) -#define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ -ValueNumFuncDef(HWI_##isa##_##name, ((numArgs == -1) ? -1 : (numArgs + 1)), ((flag) & HW_Flag_Commutative) >> 0, false) // All of the HARDWARE_INTRINSICS for arm64 -#include "hwintrinsiclistarm64.h" -#define VNF_HWI_FIRST VNF_HWI_Vector64_Abs -#define VNF_HWI_LAST VNF_HWI_Sve_ReverseElement_Predicates +#include "hwintrinsiclist.h" -#elif defined(TARGET_ARM) -// No Hardware Intrinsics on ARM32 +ValueNumFuncDef(HWI_INTRINSIC_END, -1, false, false) -#elif defined(TARGET_LOONGARCH64) - //TODO-LOONGARCH64-CQ: add LoongArch64's Hardware Intrinsics Instructions if supported. +#define VNF_HWI_FIRST (VNF_HWI_INTRINSIC_START + 1) +#define VNF_HWI_LAST (VNF_HWI_INTRINSIC_END - 1) +#endif // FEATURE_HW_INTRINSICS -#elif defined (TARGET_RISCV64) +#if defined(TARGET_RISCV64) // Signed/Unsigned integer min/max intrinsics ValueNumFuncDef(MinInt, 2, true, false) ValueNumFuncDef(MaxInt, 2, true, false) ValueNumFuncDef(MinInt_UN, 2, true, false) ValueNumFuncDef(MaxInt_UN, 2, true, false) - -#elif defined(TARGET_WASM) -#define HARDWARE_INTRINSIC(isa, name, simdSize, numArgs, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ -ValueNumFuncDef(HWI_##isa##_##name, ((numArgs == -1) ? -1 : (numArgs + 1)), ((flag) & HW_Flag_Commutative) >> 0, false) // All of the HARDWARE_INTRINSICS for wasm -#include "hwintrinsiclistwasm.h" -#define VNF_HWI_FIRST VNF_HWI_Vector128_As -#define VNF_HWI_LAST VNF_HWI_Vector128_op_UnaryPlus - -#else -#error Unsupported platform -#endif +#endif // TARGET_RISCV64 // clang-format on diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs index 3acb2c3de66e43..d15da9ddbff34c 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs @@ -1056,8 +1056,9 @@ enum CORINFO_InstructionSet InstructionSet_ILLEGAL = 0, "); - int lastAvailableBit = (FlagsFieldCount * 64) - 1; - tr.WriteLine($" InstructionSet_NONE = {lastAvailableBit},"); + int lastAvailableBit = (FlagsFieldCount * 64) - 2; + tr.WriteLine($" InstructionSet_Vector = {lastAvailableBit},"); + tr.WriteLine($" InstructionSet_NONE = {lastAvailableBit + 1},"); foreach (string architecture in _architectures) {