Skip to content

Commit 7297374

Browse files
authored
YJIT: Reduce paddings if --yjit-exec-mem-size <= 128 on arm64 (ruby#7671)
* YJIT: Reduce paddings if --yjit-exec-mem-size <= 128 on arm64 * YJIT: Define jmp_ptr_bytes on CodeBlock
1 parent 8c360ce commit 7297374

3 files changed

Lines changed: 52 additions & 16 deletions

File tree

yjit/src/asm/mod.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@ use std::cell::RefCell;
22
use std::fmt;
33
use std::mem;
44
use std::rc::Rc;
5-
#[cfg(target_arch = "x86_64")]
6-
use crate::backend::x86_64::JMP_PTR_BYTES;
7-
#[cfg(target_arch = "aarch64")]
8-
use crate::backend::arm64::JMP_PTR_BYTES;
95
use crate::core::IseqPayload;
106
use crate::core::for_each_off_stack_iseq_payload;
117
use crate::core::for_each_on_stack_iseq_payload;
@@ -123,7 +119,7 @@ impl CodeBlock {
123119
page_size,
124120
write_pos: 0,
125121
past_page_bytes: 0,
126-
page_end_reserve: JMP_PTR_BYTES,
122+
page_end_reserve: 0,
127123
label_addrs: Vec::new(),
128124
label_names: Vec::new(),
129125
label_refs: Vec::new(),
@@ -133,6 +129,7 @@ impl CodeBlock {
133129
dropped_bytes: false,
134130
freed_pages,
135131
};
132+
cb.page_end_reserve = cb.jmp_ptr_bytes();
136133
cb.write_pos = cb.page_start();
137134
cb
138135
}
@@ -196,7 +193,7 @@ impl CodeBlock {
196193
self.write_pos = dst_pos;
197194
let dst_ptr = self.get_write_ptr();
198195
self.write_pos = src_pos;
199-
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
196+
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(cb.jmp_ptr_bytes())));
200197

201198
// Generate jmp_ptr from src_pos to dst_pos
202199
self.without_page_end_reserve(|cb| {
@@ -242,6 +239,11 @@ impl CodeBlock {
242239
self.mem_block.borrow().mapped_region_size()
243240
}
244241

242+
/// Size of the region in bytes where writes could be attempted.
243+
pub fn virtual_region_size(&self) -> usize {
244+
self.mem_block.borrow().virtual_region_size()
245+
}
246+
245247
/// Return the number of code pages that have been mapped by the VirtualMemory.
246248
pub fn num_mapped_pages(&self) -> usize {
247249
// CodeBlock's page size != VirtualMem's page size on Linux,
@@ -287,7 +289,7 @@ impl CodeBlock {
287289
if cfg!(debug_assertions) && !cfg!(test) {
288290
// Leave illegal instructions at the beginning of each page to assert
289291
// we're not accidentally crossing page boundaries.
290-
start += JMP_PTR_BYTES;
292+
start += self.jmp_ptr_bytes();
291293
}
292294
start
293295
}

yjit/src/backend/arm64/mod.rs

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#![allow(unused_variables)]
33
#![allow(unused_imports)]
44

5+
use crate::asm::x86_64::jmp_ptr;
56
use crate::asm::{CodeBlock};
67
use crate::asm::arm64::*;
78
use crate::codegen::{JITState, CodegenGlobals};
@@ -38,8 +39,25 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
3839
pub const C_SP_REG: A64Opnd = X31;
3940
pub const C_SP_STEP: i32 = 16;
4041

41-
// The number of bytes that are generated by emit_jmp_ptr
42-
pub const JMP_PTR_BYTES: usize = 20;
42+
impl CodeBlock {
43+
// The maximum number of bytes that can be generated by emit_jmp_ptr.
44+
pub fn jmp_ptr_bytes(&self) -> usize {
45+
// b instruction's offset is encoded as imm26 times 4. It can jump to
46+
// +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128.
47+
let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) {
48+
1 // b instruction
49+
} else {
50+
5 // 4 instructions to load a 64-bit absolute address + br instruction
51+
};
52+
num_insns * 4
53+
}
54+
55+
// The maximum number of instructions that can be generated by emit_conditional_jump.
56+
fn conditional_jump_insns(&self) -> i32 {
57+
// The worst case is instructions for a jump + bcond.
58+
self.jmp_ptr_bytes() as i32 / 4 + 1
59+
}
60+
}
4361

4462
/// Map Opnd to A64Opnd
4563
impl From<Opnd> for A64Opnd {
@@ -110,7 +128,8 @@ fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
110128
// Make sure it's always a consistent number of
111129
// instructions in case it gets patched and has to
112130
// use the other branch.
113-
for _ in num_insns..(JMP_PTR_BYTES / 4) {
131+
assert!(num_insns * 4 <= cb.jmp_ptr_bytes());
132+
for _ in num_insns..(cb.jmp_ptr_bytes() / 4) {
114133
nop(cb);
115134
}
116135
}
@@ -697,6 +716,18 @@ impl Assembler
697716
// Here we're going to return 1 because we've only
698717
// written out 1 instruction.
699718
1
719+
} else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond
720+
// If the jump offset fits into the unconditional jump as
721+
// an immediate value, we can use inverse b.cond + b.
722+
//
723+
// We're going to write out the inverse condition so
724+
// that if it doesn't match it will skip over the
725+
// instruction used for branching.
726+
bcond(cb, Condition::inverse(CONDITION), 2.into());
727+
b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond
728+
729+
// We've only written out 2 instructions.
730+
2
700731
} else {
701732
// Otherwise, we need to load the address into a
702733
// register and use the branch register instruction.
@@ -720,7 +751,8 @@ impl Assembler
720751
// We need to make sure we have at least 6 instructions for
721752
// every kind of jump for invalidation purposes, so we're
722753
// going to write out padding nop instructions here.
723-
for _ in num_insns..6 { nop(cb); }
754+
assert!(num_insns <= cb.conditional_jump_insns());
755+
for _ in num_insns..cb.conditional_jump_insns() { nop(cb); }
724756
}
725757
},
726758
Target::Label(label_idx) => {
@@ -1063,7 +1095,7 @@ impl Assembler
10631095
Insn::RegTemps(_) |
10641096
Insn::SpillTemp(_) => (), // just a reg alloc signal, no code
10651097
Insn::PadInvalPatch => {
1066-
while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES && !cb.has_dropped_bytes() {
1098+
while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() {
10671099
nop(cb);
10681100
}
10691101
}

yjit/src/backend/x86_64/mod.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [
3434
pub const C_RET_REG: Reg = RAX_REG;
3535
pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
3636

37-
// The number of bytes that are generated by jmp_ptr
38-
pub const JMP_PTR_BYTES: usize = 6;
37+
impl CodeBlock {
38+
// The number of bytes that are generated by jmp_ptr
39+
pub fn jmp_ptr_bytes(&self) -> usize { 6 }
40+
}
3941

4042
/// Map Opnd to X86Opnd
4143
impl From<Opnd> for X86Opnd {
@@ -718,8 +720,8 @@ impl Assembler
718720
Insn::SpillTemp(_) => (), // just a reg alloc signal, no code
719721
Insn::PadInvalPatch => {
720722
let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
721-
if code_size < JMP_PTR_BYTES {
722-
nop(cb, (JMP_PTR_BYTES - code_size) as u32);
723+
if code_size < cb.jmp_ptr_bytes() {
724+
nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32);
723725
}
724726
}
725727

0 commit comments

Comments
 (0)