[X86] Handle VPMADD52L for smaller min-legal-vector-width (llvm#183250)

JaydeepChauhan14 · web-flow · commit 5dd160058f68 · 2026-02-26T14:05:11.000+05:30
There is crash as below https://godbolt.org/z/qdE1EE4Y9, After llvm#171760 . ``` ReplaceNodeResults: t32: v8i64 = X86ISD::VPMADD52L t10, t22, t2 Do not know how to custom type legalize this operation! ``` For ```"min-legal-vector-width"="512"```, it works fine, but for smaller value it is crash.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34277,6 +34277,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(Res);
     return;
   }
+  case X86ISD::VPMADD52L: {
+    SDLoc dl(N);
+    EVT VT = N->getValueType(0);
+
+    SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi, Op2Lo, Op2Hi;
+    std::tie(Op0Lo, Op0Hi) = DAG.SplitVectorOperand(N, 0);
+    std::tie(Op1Lo, Op1Hi) = DAG.SplitVectorOperand(N, 1);
+    std::tie(Op2Lo, Op2Hi) = DAG.SplitVectorOperand(N, 2);
+
+    EVT HalfVT = Op0Lo.getValueType();
+    SDValue ResLo =
+        DAG.getNode(N->getOpcode(), dl, HalfVT, Op0Lo, Op1Lo, Op2Lo);
+    SDValue ResHi =
+        DAG.getNode(N->getOpcode(), dl, HalfVT, Op0Hi, Op1Hi, Op2Hi);
+
+    Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ResLo, ResHi));
+    return;
+  }
   case X86ISD::STRICT_CVTPH2PS: {
     EVT VT = N->getValueType(0);
     SDValue Lo, Hi;
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
@@ -455,3 +455,40 @@ define <2 x i64> @test_vpmadd52h_mul_one(<2 x i64> %x0, <2 x i64> %x1) {
   %ifma = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> splat(i64 1), <2 x i64> %x1)
   ret <2 x i64> %ifma
 }
+
+define <8 x i64> @test_vpmadd52luq_small_vector_width(<8 x i64> %a, <8 x i64> %b) #0 {
+; AVX512-LABEL: test_vpmadd52luq_small_vector_width:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [8589934591,8589934591,8589934591,8589934591]
+; AVX512-NEXT:    vpand %ymm4, %ymm1, %ymm5
+; AVX512-NEXT:    vpand %ymm4, %ymm0, %ymm4
+; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [524287,524287,524287,524287]
+; AVX512-NEXT:    vpand %ymm0, %ymm3, %ymm3
+; AVX512-NEXT:    vpand %ymm0, %ymm2, %ymm2
+; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vpmadd52luq %ymm2, %ymm4, %ymm0
+; AVX512-NEXT:    vpmadd52luq %ymm3, %ymm5, %ymm1
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test_vpmadd52luq_small_vector_width:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [8589934591,8589934591,8589934591,8589934591]
+; AVX-NEXT:    vpand %ymm4, %ymm1, %ymm5
+; AVX-NEXT:    vpand %ymm4, %ymm0, %ymm4
+; AVX-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [524287,524287,524287,524287]
+; AVX-NEXT:    vpand %ymm0, %ymm3, %ymm3
+; AVX-NEXT:    vpand %ymm0, %ymm2, %ymm2
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    {vex} vpmadd52luq %ymm2, %ymm4, %ymm0
+; AVX-NEXT:    {vex} vpmadd52luq %ymm3, %ymm5, %ymm1
+; AVX-NEXT:    retq
+  %a_masked = and <8 x i64> %a,  splat (i64 8589934591)
+  %b_masked = and <8 x i64> %b,  splat (i64 524287)
+
+  %res = mul <8 x i64> %a_masked, %b_masked
+  ret <8 x i64> %res
+}
+
+attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="tigerlake" }