summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2022-09-09 11:37:41 -0400
committer<[email protected]>2022-09-09 11:37:41 -0400
commit848037cadd63091d5c39d06cd5d49aeee2258b4e ()
treeafd060fa7289b5d0851382e03e76f47a9d22cef6
parentb350053ae47dc4653ecb85f64360cf6eafba6323 (diff)
Better offsets (#6315)
* Introduce InstructionOffset for AArch64 There are a lot of instructions on AArch64 where we take an offset from PC in terms of the number of instructions. This is for loading a value relative to the PC or for jumping. We were usually accepting an A64Opnd or an i32. It can get confusing and inconsistent though because sometimes you would divide by 4 to get the number of instructions or multiply by 4 to get the number of bytes. This commit adds a struct that wraps an i32 in order to keep all of that logic in one place. It makes it much easier to read and reason about how these offsets are getting used. * Use b instruction when the offset fits on AArch64
Notes: Merged-By: maximecb <[email protected]>
-rw-r--r--yjit/src/asm/arm64/arg/inst_offset.rs47
-rw-r--r--yjit/src/asm/arm64/arg/mod.rs2
-rw-r--r--yjit/src/asm/arm64/inst/branch_cond.rs18
-rw-r--r--yjit/src/asm/arm64/inst/call.rs28
-rw-r--r--yjit/src/asm/arm64/inst/load_literal.rs14
-rw-r--r--yjit/src/asm/arm64/mod.rs63
-rw-r--r--yjit/src/backend/arm64/mod.rs52
7 files changed, 133 insertions, 91 deletions
@@ -0,0 +1,47 @@
@@ -3,6 +3,7 @@
mod bitmask_imm;
mod condition;
mod sf;
mod shifted_imm;
mod sys_reg;
@@ -10,6 +11,7 @@ mod truncate;
pub use bitmask_imm::BitmaskImmediate;
pub use condition::Condition;
pub use sf::Sf;
pub use shifted_imm::ShiftedImmediate;
pub use sys_reg::SystemRegister;
@@ -1,4 +1,4 @@
-use super::super::arg::{Condition, truncate_imm};
/// The struct that represents an A64 conditional branch instruction that can be
/// encoded.
@@ -14,14 +14,14 @@ pub struct BranchCond {
cond: u8,
/// The instruction offset from this instruction to branch to.
- imm19: i32
}
impl BranchCond {
/// B.cond
/// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
- pub fn bcond(cond: u8, imm19: i32) -> Self {
- Self { cond, imm19 }
}
}
@@ -34,7 +34,7 @@ impl From<BranchCond> for u32 {
0
| (1 << 30)
| (FAMILY << 26)
- | (truncate_imm::<_, 19>(inst.imm19) << 5)
| (inst.cond as u32)
}
}
@@ -53,25 +53,25 @@ mod tests {
#[test]
fn test_b_eq() {
- let result: u32 = BranchCond::bcond(Condition::EQ, 32).into();
assert_eq!(0x54000400, result);
}
#[test]
fn test_b_vs() {
- let result: u32 = BranchCond::bcond(Condition::VS, 32).into();
assert_eq!(0x54000406, result);
}
#[test]
fn test_b_eq_max() {
- let result: u32 = BranchCond::bcond(Condition::EQ, (1 << 18) - 1).into();
assert_eq!(0x547fffe0, result);
}
#[test]
fn test_b_eq_min() {
- let result: u32 = BranchCond::bcond(Condition::EQ, -(1 << 18)).into();
assert_eq!(0x54800000, result);
}
}
@@ -1,4 +1,4 @@
-use super::super::arg::truncate_imm;
/// The operation to perform for this instruction.
enum Op {
@@ -20,8 +20,8 @@ enum Op {
/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
///
pub struct Call {
- /// The PC-relative offset to jump to (which will be multiplied by 4).
- imm26: i32,
/// The operation to perform for this instruction.
op: Op
@@ -30,14 +30,14 @@ pub struct Call {
impl Call {
/// B
/// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-
- pub fn b(imm26: i32) -> Self {
- Self { imm26, op: Op::Branch }
}
/// BL
/// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
- pub fn bl(imm26: i32) -> Self {
- Self { imm26, op: Op::BranchWithLink }
}
}
@@ -50,7 +50,7 @@ impl From<Call> for u32 {
0
| ((inst.op as u32) << 31)
| (FAMILY << 26)
- | truncate_imm::<_, 26>(inst.imm26)
}
}
@@ -68,37 +68,37 @@ mod tests {
#[test]
fn test_bl() {
- let result: u32 = Call::bl(0).into();
assert_eq!(0x94000000, result);
}
#[test]
fn test_bl_positive() {
- let result: u32 = Call::bl(256).into();
assert_eq!(0x94000100, result);
}
#[test]
fn test_bl_negative() {
- let result: u32 = Call::bl(-256).into();
assert_eq!(0x97ffff00, result);
}
#[test]
fn test_b() {
- let result: u32 = Call::b(0).into();
assert_eq!(0x14000000, result);
}
#[test]
fn test_b_positive() {
- let result: u32 = Call::b((1 << 25) - 1).into();
assert_eq!(0x15ffffff, result);
}
#[test]
fn test_b_negative() {
- let result: u32 = Call::b(-(1 << 25)).into();
assert_eq!(0x16000000, result);
}
}
@@ -1,4 +1,4 @@
-use super::super::arg::truncate_imm;
/// The size of the operands being operated on.
enum Opc {
@@ -32,7 +32,7 @@ pub struct LoadLiteral {
rt: u8,
/// The PC-relative number of instructions to load the value from.
- imm19: i32,
/// The size of the operands being operated on.
opc: Opc
@@ -41,8 +41,8 @@ pub struct LoadLiteral {
impl LoadLiteral {
/// LDR (load literal)
/// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en
- pub fn ldr_literal(rt: u8, imm19: i32, num_bits: u8) -> Self {
- Self { rt, imm19, opc: num_bits.into() }
}
}
@@ -56,7 +56,7 @@ impl From<LoadLiteral> for u32 {
| ((inst.opc as u32) << 30)
| (1 << 28)
| (FAMILY << 25)
- | (truncate_imm::<_, 19>(inst.imm19) << 5)
| (inst.rt as u32)
}
}
@@ -75,14 +75,14 @@ mod tests {
#[test]
fn test_ldr_positive() {
- let inst = LoadLiteral::ldr_literal(0, 5, 64);
let result: u32 = inst.into();
assert_eq!(0x580000a0, result);
}
#[test]
fn test_ldr_negative() {
- let inst = LoadLiteral::ldr_literal(0, -5, 64);
let result: u32 = inst.into();
assert_eq!(0x58ffff60, result);
}
@@ -190,15 +190,9 @@ pub const fn b_offset_fits_bits(offset: i64) -> bool {
}
/// B - branch without link (offset is number of instructions to jump)
-pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) {
- let bytes: [u8; 4] = match imm26 {
- A64Opnd::Imm(imm26) => {
- assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less.");
-
- Call::b(imm26 as i32).into()
- },
- _ => panic!("Invalid operand combination to b instruction.")
- };
cb.write_bytes(&bytes);
}
@@ -208,33 +202,21 @@ pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) {
/// value into a register first, then use the b.cond instruction to skip past a
/// direct jump.
pub const fn bcond_offset_fits_bits(offset: i64) -> bool {
- imm_fits_bits(offset, 21) && (offset & 0b11 == 0)
}
/// B.cond - branch to target if condition is true
-pub fn bcond(cb: &mut CodeBlock, cond: u8, byte_offset: A64Opnd) {
- let bytes: [u8; 4] = match byte_offset {
- A64Opnd::Imm(imm) => {
- assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary.");
-
- BranchCond::bcond(cond, (imm / 4) as i32).into()
- },
- _ => panic!("Invalid operand combination to bcond instruction."),
- };
cb.write_bytes(&bytes);
}
/// BL - branch with link (offset is number of instructions to jump)
-pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) {
- let bytes: [u8; 4] = match imm26 {
- A64Opnd::Imm(imm26) => {
- assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less.");
-
- Call::bl(imm26 as i32).into()
- },
- _ => panic!("Invalid operand combination to bl instruction.")
- };
cb.write_bytes(&bytes);
}
@@ -413,7 +395,7 @@ pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
}
/// LDR - load a PC-relative memory address into a register
-pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) {
let bytes: [u8; 4] = match rt {
A64Opnd::Reg(rt) => {
LoadLiteral::ldr_literal(rt.reg_no, rn, rt.num_bits).into()
@@ -1087,45 +1069,52 @@ mod tests {
#[test]
fn test_bcond() {
- check_bytes("01200054", |cb| bcond(cb, Condition::NE, A64Opnd::new_imm(0x400)));
}
#[test]
fn test_b() {
- check_bytes("ffffff15", |cb| b(cb, A64Opnd::new_imm((1 << 25) - 1)));
}
#[test]
#[should_panic]
fn test_b_too_big() {
// There are 26 bits available
- check_bytes("", |cb| b(cb, A64Opnd::new_imm(1 << 25)));
}
#[test]
#[should_panic]
fn test_b_too_small() {
// There are 26 bits available
- check_bytes("", |cb| b(cb, A64Opnd::new_imm(-(1 << 25) - 1)));
}
#[test]
fn test_bl() {
- check_bytes("00000096", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25))));
}
#[test]
#[should_panic]
fn test_bl_too_big() {
// There are 26 bits available
- check_bytes("", |cb| bl(cb, A64Opnd::new_imm(1 << 25)));
}
#[test]
#[should_panic]
fn test_bl_too_small() {
// There are 26 bits available
- check_bytes("", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25) - 1)));
}
#[test]
@@ -1200,7 +1189,7 @@ mod tests {
#[test]
fn test_ldr_literal() {
- check_bytes("40010058", |cb| ldr_literal(cb, X0, 10));
}
#[test]
@@ -589,13 +589,15 @@ impl Assembler
Target::CodePtr(dst_ptr) => {
let dst_addr = dst_ptr.into_i64();
let src_addr = cb.get_write_ptr().into_i64();
- let offset = dst_addr - src_addr;
- let num_insns = if bcond_offset_fits_bits(offset) {
// If the jump offset fits into the conditional jump as
// an immediate value and it's properly aligned, then we
- // can use the b.cond instruction directly.
- bcond(cb, CONDITION, A64Opnd::new_imm(offset));
// Here we're going to return 1 because we've only
// written out 1 instruction.
@@ -604,12 +606,12 @@ impl Assembler
// Otherwise, we need to load the address into a
// register and use the branch register instruction.
let dst_addr = dst_ptr.into_u64();
- let load_insns: i64 = emit_load_size(dst_addr).into();
// We're going to write out the inverse condition so
// that if it doesn't match it will skip over the
// instructions used for branching.
- bcond(cb, Condition::inverse(CONDITION), A64Opnd::new_imm((load_insns + 2) * 4));
emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
br(cb, Assembler::SCRATCH0);
@@ -630,7 +632,8 @@ impl Assembler
// offset. We're going to assume we can fit into a single
// b.cond instruction. It will panic otherwise.
cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
- bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - (src_addr - 4)));
});
},
Target::FunPtr(_) => unreachable!()
@@ -756,8 +759,8 @@ impl Assembler
// references to GC'd Value operands. If the value
// being loaded is a heap object, we'll report that
// back out to the gc_offsets list.
- ldr_literal(cb, out.into(), 2);
- b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4));
cb.write_bytes(&value.as_u64().to_le_bytes());
let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
@@ -844,14 +847,11 @@ impl Assembler
// The offset to the call target in bytes
let src_addr = cb.get_write_ptr().into_i64();
let dst_addr = target.unwrap_fun_ptr() as i64;
- let offset = dst_addr - src_addr;
- // The offset in instruction count for BL's immediate
- let offset = offset / 4;
// Use BL if the offset is short enough to encode as an immediate.
// Otherwise, use BLR with a register.
- if b_offset_fits_bits(offset) {
- bl(cb, A64Opnd::new_imm(offset));
} else {
emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
blr(cb, Self::SCRATCH0);
@@ -875,19 +875,22 @@ impl Assembler
let src_addr = cb.get_write_ptr().into_i64();
let dst_addr = dst_ptr.into_i64();
- // The offset between the two instructions in bytes.
- // Note that when we encode this into a b
- // instruction, we'll divide by 4 because it accepts
- // the number of instructions to jump over.
- let offset = dst_addr - src_addr;
- let offset = offset / 4;
-
// If the offset is short enough, then we'll use the
// branch instruction. Otherwise, we'll move the
// destination into a register and use the branch
// register instruction.
- let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
- br(cb, Self::SCRATCH0);
for _ in num_insns..4 {
nop(cb);
}
@@ -899,7 +902,8 @@ impl Assembler
// to assume we can fit into a single b instruction.
// It will panic otherwise.
cb.label_ref(*label_idx, 4, |cb, src_addr, dst_addr| {
- b(cb, A64Opnd::new_imm((dst_addr - (src_addr - 4)) / 4));
});
},
_ => unreachable!()