Skip to content

Commit d43213f

Browse files
authored
Revert "[VPlan] Don't drop NUW flag on tail folded canonical IVs (llvm#183301)" (llvm#183698)
This reverts commit b0b3e3e. After thinking about this for a bit, I don't think this is correct. vscale being a power-of-2 only guarantees the canonical IV increment overflows to zero, but not overflows in general.
1 parent 16aa190 commit d43213f

60 files changed

Lines changed: 261 additions & 243 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8186,6 +8186,31 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
81868186

81878187
VPlanTransforms::createLoopRegions(*Plan);
81888188

8189+
// Don't use getDecisionAndClampRange here, because we don't know the UF
8190+
// so this function is better to be conservative, rather than to split
8191+
// it up into different VPlans.
8192+
// TODO: Consider using getDecisionAndClampRange here to split up VPlans.
8193+
bool IVUpdateMayOverflow = false;
8194+
for (ElementCount VF : Range)
8195+
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);
8196+
8197+
TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
8198+
// Use NUW for the induction increment if we proved that it won't overflow in
8199+
// the vector loop or when not folding the tail. In the later case, we know
8200+
// that the canonical induction increment will not overflow as the vector trip
8201+
// count is >= increment and a multiple of the increment.
8202+
VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
8203+
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
8204+
if (!HasNUW) {
8205+
auto *IVInc =
8206+
LoopRegion->getExitingBasicBlock()->getTerminator()->getOperand(0);
8207+
assert(match(IVInc,
8208+
m_VPInstruction<Instruction::Add>(
8209+
m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) &&
8210+
"Did not find the canonical IV increment");
8211+
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
8212+
}
8213+
81898214
// ---------------------------------------------------------------------------
81908215
// Pre-construction: record ingredients whose recipes we'll need to further
81918216
// process after constructing the initial VPlan.
@@ -8225,7 +8250,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
82258250

82268251
// Scan the body of the loop in a topological order to visit each basic block
82278252
// after having visited its predecessor basic blocks.
8228-
VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
82298253
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
82308254
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
82318255
HeaderVPBB);
@@ -8378,12 +8402,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83788402
Builder))
83798403
return nullptr;
83808404

8381-
// TODO: Remove as IV can no longer overflow.
8382-
bool IVUpdateMayOverflow = false;
8383-
for (ElementCount VF : Range)
8384-
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);
8385-
8386-
TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
83878405
if (useActiveLaneMask(Style)) {
83888406
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once
83898407
// TailFoldingStyle is visible there.

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
618618
; PRED-NEXT: store i32 0, ptr [[TMP12]], align 8
619619
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE5]]
620620
; PRED: [[PRED_STORE_CONTINUE5]]:
621-
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
621+
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
622622
; PRED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
623623
; PRED-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
624624
; PRED: [[MIDDLE_BLOCK]]:

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
416416
; CHECK-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
417417
; CHECK-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
418418
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
419-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
419+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
420420
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
421421
; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
422422
; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,7 +1227,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
12271227
; CHECK-INTERLEAVE1-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
12281228
; CHECK-INTERLEAVE1-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
12291229
; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
1230-
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1230+
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
12311231
; CHECK-INTERLEAVE1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
12321232
; CHECK-INTERLEAVE1-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
12331233
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -1483,7 +1483,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
14831483
; CHECK-INTERLEAVED-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
14841484
; CHECK-INTERLEAVED-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
14851485
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
1486-
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1486+
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
14871487
; CHECK-INTERLEAVED-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
14881488
; CHECK-INTERLEAVED-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
14891489
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -1739,7 +1739,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
17391739
; CHECK-MAXBW-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
17401740
; CHECK-MAXBW-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
17411741
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
1742-
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1742+
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
17431743
; CHECK-MAXBW-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
17441744
; CHECK-MAXBW-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
17451745
; CHECK-MAXBW-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
6767
; DATA-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[UMAX]])
6868
; DATA-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]]
6969
; DATA-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP10]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
70-
; DATA-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP5]]
70+
; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP5]]
7171
; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
7272
; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
7373
; DATA: middle.block:
@@ -102,7 +102,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
102102
; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
103103
; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]]
104104
; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT5]], ptr align 4 [[TMP13]], <vscale x 4 x i1> [[TMP12]])
105-
; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX1]], [[TMP5]]
105+
; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP5]]
106106
; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
107107
; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
108108
; DATA_NO_LANEMASK: middle.block:

llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define void @f0(ptr noalias %dst, ptr readonly %src, i64 %n) #0 {
3131
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16
3232
; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP4]], ptr align 1 [[TMP6]], <16 x i1> [[ACTIVE_LANE_MASK]])
3333
; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP5]], ptr align 1 [[TMP8]], <16 x i1> [[ACTIVE_LANE_MASK1]])
34-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
34+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
3535
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3636
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3737
; CHECK: [[MIDDLE_BLOCK]]:

llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
2121
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]]
2222
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]]
2323
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[ACTIVE_LANE_MASK]])
24-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
24+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
2525
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2626
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2727
; CHECK: middle.block:
@@ -192,7 +192,7 @@ define void @test_stride3_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
192192
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]]
193193
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]]
194194
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[ACTIVE_LANE_MASK]])
195-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
195+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
196196
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
197197
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
198198
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -239,7 +239,7 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
239239
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]]
240240
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]]
241241
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[ACTIVE_LANE_MASK]])
242-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
242+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
243243
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
244244
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
245245
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -287,7 +287,7 @@ define void @test_stride_loopinvar_4i32(ptr readonly %data, ptr noalias nocaptur
287287
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]]
288288
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]]
289289
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[ACTIVE_LANE_MASK]])
290-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
290+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
291291
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
292292
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
293293
; CHECK: middle.block:

llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ define void @diff_checks(ptr nocapture noundef writeonly %dst, ptr nocapture nou
7373
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP11]], <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison), !alias.scope [[META0:![0-9]+]]
7474
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP8]], i32 [[INDEX]]
7575
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[WIDE_MASKED_LOAD]], ptr align 4 [[TMP12]], <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
76-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
76+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
7777
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
7878
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
7979
; CHECK: middle.block:

llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,7 +1128,7 @@ define i32 @red_mla_ext_s8_s16_s32(ptr noalias nocapture readonly %A, ptr noalia
11281128
; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP4]], <8 x i32> zeroinitializer
11291129
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
11301130
; CHECK-NEXT: [[TMP7]] = add i32 [[VEC_PHI]], [[TMP6]]
1131-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
1131+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
11321132
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
11331133
; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
11341134
; CHECK: for.cond.cleanup:
@@ -1276,7 +1276,7 @@ define i32 @red_mla_u8_s8_u32(ptr noalias nocapture readonly %A, ptr noalias noc
12761276
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
12771277
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
12781278
; CHECK-NEXT: [[TMP7]] = add i32 [[VEC_PHI]], [[TMP6]]
1279-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1279+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
12801280
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
12811281
; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
12821282
; CHECK: for.cond.cleanup:

llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 {
7171
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 2
7272
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP21]], <4 x ptr> align 1 [[TMP30]], <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]]
7373
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP29]], <4 x ptr> align 1 [[TMP31]], <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]]
74-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
74+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
7575
; CHECK-NEXT: [[PTR_IND3]] = getelementptr i8, ptr [[POINTER_PHI2]], i32 12
7676
; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12
7777
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]

0 commit comments

Comments
 (0)