Skip to content

Commit 4147cd2

Browse files
authored
[WebAssembly][FastISel] Emit signed loads for sext of i8/i16/i32 (llvm#182767)
FastISel currently defaults to unsigned loads for i8/i16/i32 types, leaving any sign-extension to be handled by a separate instruction. This patch optimizes this by folding the SExtInst into the LoadInst, directly emitting a signed load (e.g., i32.load8_s). When a load has a single SExtInst use, selectLoad emits a signed load and safely removes the redundantly emitted SExtInst. Fixed: llvm#180783
1 parent f71bd1c commit 4147cd2

3 files changed

Lines changed: 55 additions & 14 deletions

File tree

llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ class WebAssemblyFastISel final : public FastISel {
204204
}
205205

206206
bool fastSelectInstruction(const Instruction *I) override;
207+
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
208+
const LoadInst *LI) override;
207209

208210
#include "WebAssemblyGenFastISel.inc"
209211
};
@@ -1267,6 +1269,52 @@ bool WebAssemblyFastISel::selectBitCast(const Instruction *I) {
12671269
return true;
12681270
}
12691271

1272+
static unsigned getSExtLoadOpcode(unsigned Opc, bool A64) {
1273+
switch (Opc) {
1274+
default:
1275+
return WebAssembly::INSTRUCTION_LIST_END;
1276+
case WebAssembly::I32_EXTEND8_S_I32:
1277+
Opc = A64 ? WebAssembly::LOAD8_S_I32_A64 : WebAssembly::LOAD8_S_I32_A32;
1278+
break;
1279+
case WebAssembly::I32_EXTEND16_S_I32:
1280+
Opc = A64 ? WebAssembly::LOAD16_S_I32_A64 : WebAssembly::LOAD16_S_I32_A32;
1281+
break;
1282+
case WebAssembly::I64_EXTEND8_S_I64:
1283+
Opc = A64 ? WebAssembly::LOAD8_S_I64_A64 : WebAssembly::LOAD8_S_I64_A32;
1284+
break;
1285+
case WebAssembly::I64_EXTEND16_S_I64:
1286+
Opc = A64 ? WebAssembly::LOAD16_S_I64_A64 : WebAssembly::LOAD16_S_I64_A32;
1287+
break;
1288+
case WebAssembly::I64_EXTEND32_S_I64:
1289+
case WebAssembly::I64_EXTEND_S_I32:
1290+
Opc = A64 ? WebAssembly::LOAD32_S_I64_A64 : WebAssembly::LOAD32_S_I64_A32;
1291+
break;
1292+
}
1293+
1294+
return Opc;
1295+
}
1296+
1297+
bool WebAssemblyFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
1298+
const LoadInst *LI) {
1299+
bool A64 = Subtarget->hasAddr64();
1300+
unsigned NewOpc;
1301+
if ((NewOpc = getSExtLoadOpcode(MI->getOpcode(), A64)) ==
1302+
WebAssembly::INSTRUCTION_LIST_END)
1303+
return false;
1304+
1305+
Address Addr;
1306+
if (!computeAddress(LI->getPointerOperand(), Addr))
1307+
return false;
1308+
1309+
Register ResultReg = MI->getOperand(0).getReg();
1310+
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1311+
TII.get(NewOpc), ResultReg);
1312+
addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(LI));
1313+
MachineBasicBlock::iterator Iter(MI);
1314+
removeDeadCode(Iter, std::next(Iter));
1315+
return true;
1316+
}
1317+
12701318
bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
12711319
const auto *Load = cast<LoadInst>(I);
12721320
if (Load->isAtomic())

llvm/test/CodeGen/WebAssembly/load-ext.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ define i32 @sext_i8_i32(ptr %p) {
2626
; WASM32-FAST-LABEL: sext_i8_i32:
2727
; WASM32-FAST: .functype sext_i8_i32 (i32) -> (i32)
2828
; WASM32-FAST-NEXT: # %bb.0:
29-
; WASM32-FAST-NEXT: i32.load8_u $push1=, 0($0)
30-
; WASM32-FAST-NEXT: i32.extend8_s $push0=, $pop1
29+
; WASM32-FAST-NEXT: i32.load8_s $push0=, 0($0)
3130
; WASM32-FAST-NEXT: return $pop0
3231
;
3332
; WASM32-FAST-MVP-LABEL: sext_i8_i32:
@@ -55,8 +54,7 @@ define i32 @sext_i8_i32(ptr %p) {
5554
; WASM64-FAST-LABEL: sext_i8_i32:
5655
; WASM64-FAST: .functype sext_i8_i32 (i64) -> (i32)
5756
; WASM64-FAST-NEXT: # %bb.0:
58-
; WASM64-FAST-NEXT: i32.load8_u $push1=, 0($0)
59-
; WASM64-FAST-NEXT: i32.extend8_s $push0=, $pop1
57+
; WASM64-FAST-NEXT: i32.load8_s $push0=, 0($0)
6058
; WASM64-FAST-NEXT: return $pop0
6159
;
6260
; WASM64-FAST-MVP-LABEL: sext_i8_i32:
@@ -150,8 +148,7 @@ define i32 @sext_i16_i32(ptr %p) {
150148
; WASM32-FAST-LABEL: sext_i16_i32:
151149
; WASM32-FAST: .functype sext_i16_i32 (i32) -> (i32)
152150
; WASM32-FAST-NEXT: # %bb.0:
153-
; WASM32-FAST-NEXT: i32.load16_u $push1=, 0($0)
154-
; WASM32-FAST-NEXT: i32.extend16_s $push0=, $pop1
151+
; WASM32-FAST-NEXT: i32.load16_s $push0=, 0($0)
155152
; WASM32-FAST-NEXT: return $pop0
156153
;
157154
; WASM32-FAST-MVP-LABEL: sext_i16_i32:
@@ -179,8 +176,7 @@ define i32 @sext_i16_i32(ptr %p) {
179176
; WASM64-FAST-LABEL: sext_i16_i32:
180177
; WASM64-FAST: .functype sext_i16_i32 (i64) -> (i32)
181178
; WASM64-FAST-NEXT: # %bb.0:
182-
; WASM64-FAST-NEXT: i32.load16_u $push1=, 0($0)
183-
; WASM64-FAST-NEXT: i32.extend16_s $push0=, $pop1
179+
; WASM64-FAST-NEXT: i32.load16_s $push0=, 0($0)
184180
; WASM64-FAST-NEXT: return $pop0
185181
;
186182
; WASM64-FAST-MVP-LABEL: sext_i16_i32:
@@ -538,8 +534,7 @@ define i64 @sext_i32_i64(ptr %p) {
538534
; WASM32-FAST-LABEL: sext_i32_i64:
539535
; WASM32-FAST: .functype sext_i32_i64 (i32) -> (i64)
540536
; WASM32-FAST-NEXT: # %bb.0:
541-
; WASM32-FAST-NEXT: i32.load $push1=, 0($0)
542-
; WASM32-FAST-NEXT: i64.extend_i32_s $push0=, $pop1
537+
; WASM32-FAST-NEXT: i64.load32_s $push0=, 0($0)
543538
; WASM32-FAST-NEXT: return $pop0
544539
;
545540
; WASM32-FAST-MVP-LABEL: sext_i32_i64:
@@ -564,8 +559,7 @@ define i64 @sext_i32_i64(ptr %p) {
564559
; WASM64-FAST-LABEL: sext_i32_i64:
565560
; WASM64-FAST: .functype sext_i32_i64 (i64) -> (i64)
566561
; WASM64-FAST-NEXT: # %bb.0:
567-
; WASM64-FAST-NEXT: i32.load $push1=, 0($0)
568-
; WASM64-FAST-NEXT: i64.extend_i32_s $push0=, $pop1
562+
; WASM64-FAST-NEXT: i64.load32_s $push0=, 0($0)
569563
; WASM64-FAST-NEXT: return $pop0
570564
;
571565
; WASM64-FAST-MVP-LABEL: sext_i32_i64:

llvm/test/CodeGen/WebAssembly/offset-fastisel.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,7 @@ define i32 @load_i8_s_with_folded_offset(ptr %p) {
121121
; DEFAULT-LABEL: load_i8_s_with_folded_offset:
122122
; DEFAULT: .functype load_i8_s_with_folded_offset (i32) -> (i32)
123123
; DEFAULT-NEXT: # %bb.0:
124-
; DEFAULT-NEXT: i32.load8_u $push1=, 24($0)
125-
; DEFAULT-NEXT: i32.extend8_s $push0=, $pop1
124+
; DEFAULT-NEXT: i32.load8_s $push0=, 24($0)
126125
; DEFAULT-NEXT: # fallthrough-return
127126
;
128127
; MVP-LABEL: load_i8_s_with_folded_offset:

0 commit comments

Comments
 (0)