Skip to content

Commit 5dd1600

Browse files
[X86] Handle VPMADD52L for smaller min-legal-vector-width (llvm#183250)
There is crash as below https://godbolt.org/z/qdE1EE4Y9, After llvm#171760 . ``` ReplaceNodeResults: t32: v8i64 = X86ISD::VPMADD52L t10, t22, t2 Do not know how to custom type legalize this operation! ``` For ```"min-legal-vector-width"="512"```, it works fine, but for smaller value it is crash.
1 parent 1ccb026 commit 5dd1600

2 files changed

Lines changed: 55 additions & 0 deletions

File tree

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34277,6 +34277,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3427734277
Results.push_back(Res);
3427834278
return;
3427934279
}
34280+
case X86ISD::VPMADD52L: {
34281+
SDLoc dl(N);
34282+
EVT VT = N->getValueType(0);
34283+
34284+
SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi, Op2Lo, Op2Hi;
34285+
std::tie(Op0Lo, Op0Hi) = DAG.SplitVectorOperand(N, 0);
34286+
std::tie(Op1Lo, Op1Hi) = DAG.SplitVectorOperand(N, 1);
34287+
std::tie(Op2Lo, Op2Hi) = DAG.SplitVectorOperand(N, 2);
34288+
34289+
EVT HalfVT = Op0Lo.getValueType();
34290+
SDValue ResLo =
34291+
DAG.getNode(N->getOpcode(), dl, HalfVT, Op0Lo, Op1Lo, Op2Lo);
34292+
SDValue ResHi =
34293+
DAG.getNode(N->getOpcode(), dl, HalfVT, Op0Hi, Op1Hi, Op2Hi);
34294+
34295+
Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ResLo, ResHi));
34296+
return;
34297+
}
3428034298
case X86ISD::STRICT_CVTPH2PS: {
3428134299
EVT VT = N->getValueType(0);
3428234300
SDValue Lo, Hi;

llvm/test/CodeGen/X86/combine-vpmadd52.ll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,3 +455,40 @@ define <2 x i64> @test_vpmadd52h_mul_one(<2 x i64> %x0, <2 x i64> %x1) {
455455
%ifma = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> splat(i64 1), <2 x i64> %x1)
456456
ret <2 x i64> %ifma
457457
}
458+
459+
define <8 x i64> @test_vpmadd52luq_small_vector_width(<8 x i64> %a, <8 x i64> %b) #0 {
460+
; AVX512-LABEL: test_vpmadd52luq_small_vector_width:
461+
; AVX512: # %bb.0:
462+
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934591,8589934591,8589934591,8589934591]
463+
; AVX512-NEXT: vpand %ymm4, %ymm1, %ymm5
464+
; AVX512-NEXT: vpand %ymm4, %ymm0, %ymm4
465+
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [524287,524287,524287,524287]
466+
; AVX512-NEXT: vpand %ymm0, %ymm3, %ymm3
467+
; AVX512-NEXT: vpand %ymm0, %ymm2, %ymm2
468+
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
469+
; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0
470+
; AVX512-NEXT: vpmadd52luq %ymm2, %ymm4, %ymm0
471+
; AVX512-NEXT: vpmadd52luq %ymm3, %ymm5, %ymm1
472+
; AVX512-NEXT: retq
473+
;
474+
; AVX-LABEL: test_vpmadd52luq_small_vector_width:
475+
; AVX: # %bb.0:
476+
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934591,8589934591,8589934591,8589934591]
477+
; AVX-NEXT: vpand %ymm4, %ymm1, %ymm5
478+
; AVX-NEXT: vpand %ymm4, %ymm0, %ymm4
479+
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [524287,524287,524287,524287]
480+
; AVX-NEXT: vpand %ymm0, %ymm3, %ymm3
481+
; AVX-NEXT: vpand %ymm0, %ymm2, %ymm2
482+
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
483+
; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
484+
; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm4, %ymm0
485+
; AVX-NEXT: {vex} vpmadd52luq %ymm3, %ymm5, %ymm1
486+
; AVX-NEXT: retq
487+
%a_masked = and <8 x i64> %a, splat (i64 8589934591)
488+
%b_masked = and <8 x i64> %b, splat (i64 524287)
489+
490+
%res = mul <8 x i64> %a_masked, %b_masked
491+
ret <8 x i64> %res
492+
}
493+
494+
attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="tigerlake" }

0 commit comments

Comments
 (0)