Skip to content

Commit 4912004

Browse files
authored
Revert "[X86] Allow EVEX compression for mask registers" (llvm#174872)
Reverts llvm#171980 while investigating regressions reported on llvm#174871
1 parent 77d6dd4 commit 4912004

13 files changed

Lines changed: 70 additions & 178 deletions

llvm/lib/Target/X86/X86CompressEVEX.cpp

Lines changed: 4 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
// d. NF_ND (EVEX) -> NF (EVEX)
1717
// e. NonNF (EVEX) -> NF (EVEX)
1818
// f. SETZUCCm (EVEX) -> SETCCm (legacy)
19-
// g. VPMOV*2M (EVEX) + KMOV -> VMOVMSK/VPMOVMSKB (VEX)
2019
//
2120
// Compression a, b and c can always reduce code size, with some exceptions
2221
// such as promoted 16-bit CRC32 which is as long as the legacy version.
@@ -42,7 +41,6 @@
4241
#include "X86.h"
4342
#include "X86InstrInfo.h"
4443
#include "X86Subtarget.h"
45-
#include "llvm/ADT/SmallVector.h"
4644
#include "llvm/ADT/StringRef.h"
4745
#include "llvm/CodeGen/MachineFunction.h"
4846
#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
@@ -180,137 +178,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
180178
return true;
181179
}
182180

183-
static bool isKMovNarrowing(unsigned VPMOVOpc, unsigned KMOVOpc) {
184-
unsigned VPMOVBits = 0;
185-
switch (VPMOVOpc) {
186-
case X86::VPMOVQ2MZ128kr:
187-
VPMOVBits = 2;
188-
break;
189-
case X86::VPMOVQ2MZ256kr:
190-
case X86::VPMOVD2MZ128kr:
191-
VPMOVBits = 4;
192-
break;
193-
case X86::VPMOVD2MZ256kr:
194-
VPMOVBits = 8;
195-
break;
196-
case X86::VPMOVB2MZ128kr:
197-
VPMOVBits = 16;
198-
break;
199-
case X86::VPMOVB2MZ256kr:
200-
VPMOVBits = 32;
201-
break;
202-
default:
203-
llvm_unreachable("Unknown VPMOV opcode");
204-
}
205-
206-
unsigned KMOVSize = 0;
207-
switch (KMOVOpc) {
208-
case X86::KMOVBrk:
209-
KMOVSize = 8;
210-
break;
211-
case X86::KMOVWrk:
212-
KMOVSize = 16;
213-
break;
214-
case X86::KMOVDrk:
215-
KMOVSize = 32;
216-
break;
217-
default:
218-
llvm_unreachable("Unknown KMOV opcode");
219-
}
220-
221-
return KMOVSize < VPMOVBits;
222-
}
223-
224-
// Try to compress VPMOV*2M + KMOV chain patterns:
225-
// vpmov*2m %xmm0, %k0 -> (erase this)
226-
// kmov* %k0, %eax -> vmovmskp* %xmm0, %eax
227-
static bool tryCompressVPMOVPattern(MachineInstr &MI, MachineBasicBlock &MBB,
228-
const X86Subtarget &ST,
229-
SmallVectorImpl<MachineInstr *> &ToErase) {
230-
const X86InstrInfo *TII = ST.getInstrInfo();
231-
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
232-
233-
unsigned Opc = MI.getOpcode();
234-
if (Opc != X86::VPMOVD2MZ128kr && Opc != X86::VPMOVD2MZ256kr &&
235-
Opc != X86::VPMOVQ2MZ128kr && Opc != X86::VPMOVQ2MZ256kr &&
236-
Opc != X86::VPMOVB2MZ128kr && Opc != X86::VPMOVB2MZ256kr)
237-
return false;
238-
239-
Register MaskReg = MI.getOperand(0).getReg();
240-
Register SrcVecReg = MI.getOperand(1).getReg();
241-
242-
unsigned MovMskOpc = 0;
243-
switch (Opc) {
244-
case X86::VPMOVD2MZ128kr:
245-
MovMskOpc = X86::VMOVMSKPSrr;
246-
break;
247-
case X86::VPMOVD2MZ256kr:
248-
MovMskOpc = X86::VMOVMSKPSYrr;
249-
break;
250-
case X86::VPMOVQ2MZ128kr:
251-
MovMskOpc = X86::VMOVMSKPDrr;
252-
break;
253-
case X86::VPMOVQ2MZ256kr:
254-
MovMskOpc = X86::VMOVMSKPDYrr;
255-
break;
256-
case X86::VPMOVB2MZ128kr:
257-
MovMskOpc = X86::VPMOVMSKBrr;
258-
break;
259-
case X86::VPMOVB2MZ256kr:
260-
MovMskOpc = X86::VPMOVMSKBYrr;
261-
break;
262-
default:
263-
llvm_unreachable("Unknown VPMOV opcode");
264-
}
265-
266-
MachineInstr *KMovMI = nullptr;
267-
268-
for (MachineInstr &CurMI : llvm::make_range(
269-
std::next(MachineBasicBlock::iterator(MI)), MBB.end())) {
270-
if (CurMI.modifiesRegister(MaskReg, TRI)) {
271-
if (!KMovMI)
272-
return false; // Mask clobbered before use
273-
break;
274-
}
275-
276-
if (CurMI.readsRegister(MaskReg, TRI)) {
277-
if (KMovMI)
278-
return false; // Fail: Mask has MULTIPLE uses
279-
280-
unsigned UseOpc = CurMI.getOpcode();
281-
bool IsKMOV = UseOpc == X86::KMOVBrk || UseOpc == X86::KMOVWrk ||
282-
UseOpc == X86::KMOVDrk;
283-
// Only allow non-narrowing KMOV uses of the mask.
284-
if (IsKMOV && CurMI.getOperand(1).getReg() == MaskReg &&
285-
!isKMovNarrowing(Opc, UseOpc)) {
286-
KMovMI = &CurMI;
287-
// continue scanning to ensure
288-
// there are no *other* uses of the mask later in the block.
289-
} else {
290-
return false;
291-
}
292-
}
293-
294-
if (!KMovMI && CurMI.modifiesRegister(SrcVecReg, TRI)) {
295-
return false; // SrcVecReg modified before it could be used by MOVMSK
296-
}
297-
}
298-
299-
if (!KMovMI)
300-
return false;
301-
302-
// Apply the transformation
303-
KMovMI->setDesc(TII->get(MovMskOpc));
304-
KMovMI->getOperand(1).setReg(SrcVecReg);
305-
KMovMI->setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
306-
307-
ToErase.push_back(&MI);
308-
return true;
309-
}
310-
311181
static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB,
312-
const X86Subtarget &ST,
313-
SmallVectorImpl<MachineInstr *> &ToErase) {
182+
const X86Subtarget &ST) {
314183
uint64_t TSFlags = MI.getDesc().TSFlags;
315184

316185
// Check for EVEX instructions only.
@@ -321,10 +190,6 @@ static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB,
321190
if (TSFlags & (X86II::EVEX_K | X86II::EVEX_L2))
322191
return false;
323192

324-
// Specialized VPMOVD2M + KMOV -> MOVMSK fold first.
325-
if (tryCompressVPMOVPattern(MI, MBB, ST, ToErase))
326-
return true;
327-
328193
auto IsRedundantNewDataDest = [&](unsigned &Opc) {
329194
// $rbx = ADD64rr_ND $rbx, $rax / $rbx = ADD64rr_ND $rax, $rbx
330195
// ->
@@ -485,15 +350,9 @@ static bool runOnMF(MachineFunction &MF) {
485350
bool Changed = false;
486351

487352
for (MachineBasicBlock &MBB : MF) {
488-
SmallVector<MachineInstr *, 4> ToErase;
489-
490-
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
491-
Changed |= CompressEVEXImpl(MI, MBB, ST, ToErase);
492-
}
493-
494-
for (MachineInstr *MI : ToErase) {
495-
MI->eraseFromParent();
496-
}
353+
// Traverse the basic block.
354+
for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
355+
Changed |= CompressEVEXImpl(MI, MBB, ST);
497356
}
498357
LLVM_DEBUG(dbgs() << "End X86CompressEVEXPass\n";);
499358
return Changed;

llvm/test/CodeGen/X86/avx512-ext.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1745,7 +1745,8 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
17451745
; AVX512DQNOBW: # %bb.0:
17461746
; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
17471747
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1748-
; AVX512DQNOBW-NEXT: vmovmskps %ymm0, %eax
1748+
; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0
1749+
; AVX512DQNOBW-NEXT: kmovw %k0, %eax
17491750
; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax
17501751
; AVX512DQNOBW-NEXT: vzeroupper
17511752
; AVX512DQNOBW-NEXT: retq

llvm/test/CodeGen/X86/avx512-insert-extract.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1669,7 +1669,8 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) n
16691669
; SKX-NEXT: vpmovm2b %k0, %ymm0
16701670
; SKX-NEXT: vpbroadcastb %eax, %ymm0 {%k1}
16711671
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
1672-
; SKX-NEXT: vpmovmskb %ymm0, %eax
1672+
; SKX-NEXT: vpmovb2m %ymm0, %k0
1673+
; SKX-NEXT: kmovd %k0, %eax
16731674
; SKX-NEXT: vzeroupper
16741675
; SKX-NEXT: retq
16751676
%t1 = icmp ugt <32 x i8> %a, zeroinitializer

llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2751,7 +2751,8 @@ declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
27512751
define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
27522752
; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
27532753
; CHECK: # %bb.0:
2754-
; CHECK-NEXT: vmovmskps %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x50,0xc0]
2754+
; CHECK-NEXT: vpmovd2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
2755+
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
27552756
; CHECK-NEXT: # kill: def $al killed $al killed $eax
27562757
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
27572758
%res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
@@ -2776,7 +2777,8 @@ declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
27762777
define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
27772778
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
27782779
; CHECK: # %bb.0:
2779-
; CHECK-NEXT: vmovmskpd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x50,0xc0]
2780+
; CHECK-NEXT: vpmovq2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
2781+
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
27802782
; CHECK-NEXT: # kill: def $al killed $al killed $eax
27812783
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
27822784
%res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
@@ -2788,7 +2790,8 @@ declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
27882790
define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
27892791
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
27902792
; CHECK: # %bb.0:
2791-
; CHECK-NEXT: vmovmskpd %ymm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x50,0xc0]
2793+
; CHECK-NEXT: vpmovq2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
2794+
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
27922795
; CHECK-NEXT: # kill: def $al killed $al killed $eax
27932796
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
27942797
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]

llvm/test/CodeGen/X86/masked_compressstore.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3444,7 +3444,8 @@ define void @compressstore_v8i16_v8i16(ptr %base, <8 x i16> %V, <8 x i16> %trigg
34443444
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
34453445
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
34463446
; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1
3447-
; AVX512VLDQ-NEXT: vmovmskps %ymm1, %eax
3447+
; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k0
3448+
; AVX512VLDQ-NEXT: kmovw %k0, %eax
34483449
; AVX512VLDQ-NEXT: testb $1, %al
34493450
; AVX512VLDQ-NEXT: jne LBB11_1
34503451
; AVX512VLDQ-NEXT: ## %bb.2: ## %else

llvm/test/CodeGen/X86/masked_expandload.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3047,7 +3047,8 @@ define <8 x i16> @expandload_v8i16_v8i16(ptr %base, <8 x i16> %src0, <8 x i16> %
30473047
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
30483048
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
30493049
; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1
3050-
; AVX512VLDQ-NEXT: vmovmskps %ymm1, %eax
3050+
; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k0
3051+
; AVX512VLDQ-NEXT: kmovw %k0, %eax
30513052
; AVX512VLDQ-NEXT: testb $1, %al
30523053
; AVX512VLDQ-NEXT: jne LBB11_1
30533054
; AVX512VLDQ-NEXT: ## %bb.2: ## %else

llvm/test/CodeGen/X86/masked_gather_scatter.ll

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -966,9 +966,10 @@ define <2 x double> @test17(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x doub
966966
; X86-SKX-LABEL: test17:
967967
; X86-SKX: # %bb.0:
968968
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
969+
; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
969970
; X86-SKX-NEXT: vpslld $3, %xmm0, %xmm0
970971
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
971-
; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
972+
; X86-SKX-NEXT: kmovw %k0, %eax
972973
; X86-SKX-NEXT: testb $1, %al
973974
; X86-SKX-NEXT: jne .LBB16_1
974975
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1254,7 +1255,8 @@ define void @test20(<2 x float>%a1, <2 x ptr> %ptr, <2 x i1> %mask) {
12541255
; X64-SKX-LABEL: test20:
12551256
; X64-SKX: # %bb.0:
12561257
; X64-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
1257-
; X64-SKX-NEXT: vmovmskpd %xmm2, %eax
1258+
; X64-SKX-NEXT: vpmovq2m %xmm2, %k0
1259+
; X64-SKX-NEXT: kmovw %k0, %eax
12581260
; X64-SKX-NEXT: testb $1, %al
12591261
; X64-SKX-NEXT: jne .LBB19_1
12601262
; X64-SKX-NEXT: # %bb.2: # %else
@@ -1275,7 +1277,8 @@ define void @test20(<2 x float>%a1, <2 x ptr> %ptr, <2 x i1> %mask) {
12751277
; X86-SKX-LABEL: test20:
12761278
; X86-SKX: # %bb.0:
12771279
; X86-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
1278-
; X86-SKX-NEXT: vmovmskpd %xmm2, %eax
1280+
; X86-SKX-NEXT: vpmovq2m %xmm2, %k0
1281+
; X86-SKX-NEXT: kmovw %k0, %eax
12791282
; X86-SKX-NEXT: testb $1, %al
12801283
; X86-SKX-NEXT: jne .LBB19_1
12811284
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1349,7 +1352,8 @@ define void @test21(<2 x i32>%a1, <2 x ptr> %ptr, <2 x i1>%mask) {
13491352
; X64-SKX-LABEL: test21:
13501353
; X64-SKX: # %bb.0:
13511354
; X64-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
1352-
; X64-SKX-NEXT: vmovmskpd %xmm2, %eax
1355+
; X64-SKX-NEXT: vpmovq2m %xmm2, %k0
1356+
; X64-SKX-NEXT: kmovw %k0, %eax
13531357
; X64-SKX-NEXT: testb $1, %al
13541358
; X64-SKX-NEXT: jne .LBB20_1
13551359
; X64-SKX-NEXT: # %bb.2: # %else
@@ -1370,7 +1374,8 @@ define void @test21(<2 x i32>%a1, <2 x ptr> %ptr, <2 x i1>%mask) {
13701374
; X86-SKX-LABEL: test21:
13711375
; X86-SKX: # %bb.0:
13721376
; X86-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
1373-
; X86-SKX-NEXT: vmovmskpd %xmm2, %eax
1377+
; X86-SKX-NEXT: vpmovq2m %xmm2, %k0
1378+
; X86-SKX-NEXT: kmovw %k0, %eax
13741379
; X86-SKX-NEXT: testb $1, %al
13751380
; X86-SKX-NEXT: jne .LBB20_1
13761381
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1489,9 +1494,10 @@ define <2 x float> @test22(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float
14891494
; X86-SKX-LABEL: test22:
14901495
; X86-SKX: # %bb.0:
14911496
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
1497+
; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
14921498
; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0
14931499
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
1494-
; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
1500+
; X86-SKX-NEXT: kmovw %k0, %eax
14951501
; X86-SKX-NEXT: testb $1, %al
14961502
; X86-SKX-NEXT: jne .LBB21_1
14971503
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1611,10 +1617,11 @@ define <2 x float> @test22a(ptr %base, <2 x i64> %ind, <2 x i1> %mask, <2 x floa
16111617
; X86-SKX-LABEL: test22a:
16121618
; X86-SKX: # %bb.0:
16131619
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
1620+
; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
16141621
; X86-SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
16151622
; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0
16161623
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
1617-
; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
1624+
; X86-SKX-NEXT: kmovw %k0, %eax
16181625
; X86-SKX-NEXT: testb $1, %al
16191626
; X86-SKX-NEXT: jne .LBB22_1
16201627
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1734,9 +1741,10 @@ define <2 x i32> @test23(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %s
17341741
; X86-SKX-LABEL: test23:
17351742
; X86-SKX: # %bb.0:
17361743
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
1744+
; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
17371745
; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0
17381746
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
1739-
; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
1747+
; X86-SKX-NEXT: kmovw %k0, %eax
17401748
; X86-SKX-NEXT: testb $1, %al
17411749
; X86-SKX-NEXT: jne .LBB23_1
17421750
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1852,10 +1860,11 @@ define <2 x i32> @test23b(ptr %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32> %
18521860
; X86-SKX-LABEL: test23b:
18531861
; X86-SKX: # %bb.0:
18541862
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
1863+
; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
18551864
; X86-SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
18561865
; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0
18571866
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
1858-
; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
1867+
; X86-SKX-NEXT: kmovw %k0, %eax
18591868
; X86-SKX-NEXT: testb $1, %al
18601869
; X86-SKX-NEXT: jne .LBB24_1
18611870
; X86-SKX-NEXT: # %bb.2: # %else
@@ -2025,9 +2034,10 @@ define <2 x i64> @test25(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %s
20252034
; X86-SKX-LABEL: test25:
20262035
; X86-SKX: # %bb.0:
20272036
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
2037+
; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
20282038
; X86-SKX-NEXT: vpslld $3, %xmm0, %xmm0
20292039
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
2030-
; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
2040+
; X86-SKX-NEXT: kmovw %k0, %eax
20312041
; X86-SKX-NEXT: testb $1, %al
20322042
; X86-SKX-NEXT: jne .LBB26_1
20332043
; X86-SKX-NEXT: # %bb.2: # %else
@@ -3752,9 +3762,10 @@ define void @test_scatter_2i32_index(<2 x double> %a1, ptr %base, <2 x i32> %ind
37523762
; X86-SKX-LABEL: test_scatter_2i32_index:
37533763
; X86-SKX: # %bb.0:
37543764
; X86-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
3765+
; X86-SKX-NEXT: vpmovq2m %xmm2, %k0
37553766
; X86-SKX-NEXT: vpslld $3, %xmm1, %xmm1
37563767
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm1, %xmm1
3757-
; X86-SKX-NEXT: vmovmskpd %xmm2, %eax
3768+
; X86-SKX-NEXT: kmovw %k0, %eax
37583769
; X86-SKX-NEXT: testb $1, %al
37593770
; X86-SKX-NEXT: jne .LBB52_1
37603771
; X86-SKX-NEXT: # %bb.2: # %else

0 commit comments

Comments
 (0)