summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Kokubun <[email protected]>2024-04-25 07:04:53 -0700
committer<[email protected]>2024-04-25 10:04:53 -0400
commit7ab1a608e7413cdb0f93243eb3e6e20a32cec44e ()
tree29836bd248b0e5bce497ae6969a6b6e7c2055152
parentf248e1008a8f79cca801b27d512a587f65a0dd36 (diff)
YJIT: Optimize local variables when EP == BP (take 2) (#10607)
* Revert "Revert "YJIT: Optimize local variables when EP == BP" (#10584)" This reverts commit c8783441952217c18e523749c821f82cd7e5d222. * YJIT: Take care of GC references in ISEQ invariants Co-authored-by: Alan Wu <[email protected]> --------- Co-authored-by: Alan Wu <[email protected]>
-rw-r--r--bootstraptest/test_yjit.rb13
-rw-r--r--iseq.c4
-rw-r--r--vm.c5
-rw-r--r--yjit.c6
-rw-r--r--yjit.h10
-rw-r--r--yjit/bindgen/src/main.rs2
-rw-r--r--yjit/src/codegen.rs96
-rw-r--r--yjit/src/core.rs31
-rw-r--r--yjit/src/cruby.rs1
-rw-r--r--yjit/src/cruby_bindings.inc.rs11
-rw-r--r--yjit/src/invariants.rs75
11 files changed, 222 insertions, 32 deletions
@@ -2317,6 +2317,19 @@ assert_equal '123', %q{
foo(Foo)
}
# invokesuper edge case
assert_equal '[:A, [:A, :B]]', %q{
class B
@@ -167,7 +167,7 @@ rb_iseq_free(const rb_iseq_t *iseq)
struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
rb_rjit_free_iseq(iseq); /* Notify RJIT */
#if USE_YJIT
- rb_yjit_iseq_free(body->yjit_payload);
if (FL_TEST_RAW((VALUE)iseq, ISEQ_TRANSLATED)) {
RUBY_ASSERT(rb_yjit_live_iseq_count > 0);
rb_yjit_live_iseq_count--;
@@ -377,7 +377,7 @@ rb_iseq_mark_and_move(rb_iseq_t *iseq, bool reference_updating)
rb_rjit_iseq_update_references(body);
#endif
#if USE_YJIT
- rb_yjit_iseq_update_references(body->yjit_payload);
#endif
}
else {
@@ -1007,6 +1007,11 @@ vm_make_env_each(const rb_execution_context_t * const ec, rb_control_frame_t *co
}
#endif
return (VALUE)env;
}
@@ -629,6 +629,12 @@ rb_get_iseq_body_stack_max(const rb_iseq_t *iseq)
return iseq->body->stack_max;
}
bool
rb_get_iseq_flags_has_lead(const rb_iseq_t *iseq)
{
@@ -40,14 +40,15 @@ void rb_yjit_init(bool yjit_enabled);
void rb_yjit_bop_redefined(int redefined_flag, enum ruby_basic_operators bop);
void rb_yjit_constant_state_changed(ID id);
void rb_yjit_iseq_mark(void *payload);
-void rb_yjit_iseq_update_references(void *payload);
-void rb_yjit_iseq_free(void *payload);
void rb_yjit_before_ractor_spawn(void);
void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic, unsigned insn_idx);
void rb_yjit_tracing_invalidate_all(void);
void rb_yjit_show_usage(int help, int highlight, unsigned int width, int columns);
void rb_yjit_lazy_push_frame(const VALUE *pc);
void rb_yjit_invalidate_no_singleton_class(VALUE klass);
#else
// !USE_YJIT
@@ -64,13 +65,14 @@ static inline void rb_yjit_init(bool yjit_enabled) {}
static inline void rb_yjit_bop_redefined(int redefined_flag, enum ruby_basic_operators bop) {}
static inline void rb_yjit_constant_state_changed(ID id) {}
static inline void rb_yjit_iseq_mark(void *payload) {}
-static inline void rb_yjit_iseq_update_references(void *payload) {}
-static inline void rb_yjit_iseq_free(void *payload) {}
static inline void rb_yjit_before_ractor_spawn(void) {}
static inline void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic, unsigned insn_idx) {}
static inline void rb_yjit_tracing_invalidate_all(void) {}
static inline void rb_yjit_lazy_push_frame(const VALUE *pc) {}
static inline void rb_yjit_invalidate_no_singleton_class(VALUE klass) {}
#endif // #if USE_YJIT
@@ -299,6 +299,7 @@ fn main() {
.allowlist_type("ruby_tag_type")
.allowlist_type("ruby_vm_throw_flags")
.allowlist_type("vm_check_match_type")
// From yjit.c
.allowlist_function("rb_iseq_(get|set)_yjit_payload")
@@ -416,6 +417,7 @@ fn main() {
.allowlist_function("rb_get_iseq_body_parent_iseq")
.allowlist_function("rb_get_iseq_body_iseq_encoded")
.allowlist_function("rb_get_iseq_body_stack_max")
.allowlist_function("rb_get_iseq_flags_has_lead")
.allowlist_function("rb_get_iseq_flags_has_opt")
.allowlist_function("rb_get_iseq_flags_has_kw")
@@ -46,7 +46,7 @@ type InsnGenFn = fn(
/// Represents a [core::Block] while we build it.
pub struct JITState {
/// Instruction sequence for the compiling block
- iseq: IseqPtr,
/// The iseq index of the first instruction in the block
starting_insn_idx: IseqIdx,
@@ -101,6 +101,9 @@ pub struct JITState {
/// A list of classes that are not supposed to have a singleton class.
pub no_singleton_class_assumptions: Vec<VALUE>,
/// When true, the block is valid only when there is a total of one ractor running
pub block_assumes_single_ractor: bool,
@@ -130,6 +133,7 @@ impl JITState {
bop_assumptions: vec![],
stable_constant_names_assumption: None,
no_singleton_class_assumptions: vec![],
block_assumes_single_ractor: false,
perf_map: Rc::default(),
perf_stack: vec![],
@@ -171,6 +175,23 @@ impl JITState {
unsafe { *(self.pc.offset(arg_idx + 1)) }
}
// Get the index of the next instruction
fn next_insn_idx(&self) -> u16 {
self.insn_idx + insn_len(self.get_opcode()) as u16
@@ -250,6 +271,19 @@ impl JITState {
true
}
fn get_cfp(&self) -> *mut rb_control_frame_struct {
unsafe { get_ec_cfp(self.ec) }
}
@@ -2203,16 +2237,22 @@ fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd {
fn gen_getlocal_generic(
jit: &mut JITState,
asm: &mut Assembler,
ep_offset: u32,
level: u32,
) -> Option<CodegenStatus> {
- // Load environment pointer EP (level 0) from CFP
- let ep_opnd = gen_get_ep(asm, level);
- // Load the local from the block
- // val = *(vm_get_ep(GET_EP(), level) - idx);
- let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
- let local_opnd = Opnd::mem(64, ep_opnd, offs);
// Write the local at SP
let stack_top = if level == 0 {
@@ -2230,29 +2270,29 @@ fn gen_getlocal_generic(
fn gen_getlocal(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
let level = jit.get_arg(1).as_u32();
- gen_getlocal_generic(jit, asm, idx, level)
}
fn gen_getlocal_wc0(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
- gen_getlocal_generic(jit, asm, idx, 0)
}
fn gen_getlocal_wc1(
jit: &mut JITState,
asm: &mut Assembler,
- _ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
- gen_getlocal_generic(jit, asm, idx, 1)
}
fn gen_setlocal_generic(
@@ -2264,11 +2304,11 @@ fn gen_setlocal_generic(
) -> Option<CodegenStatus> {
let value_type = asm.ctx.get_opnd_type(StackOpnd(0));
- // Load environment pointer EP at level
- let ep_opnd = gen_get_ep(asm, level);
-
// Fallback because of write barrier
if asm.ctx.get_chain_depth() > 0 {
// This function should not yield to the GC.
// void rb_vm_env_write(const VALUE *ep, int index, VALUE v)
let index = -(ep_offset as i64);
@@ -2286,16 +2326,27 @@ fn gen_setlocal_generic(
return Some(KeepCompiling);
}
- // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers
- // only affect heap objects being written. If we know an immediate value is being written we
- // can skip this check.
- if !value_type.is_imm() {
- // flags & VM_ENV_FLAG_WB_REQUIRED
let flags_opnd = Opnd::mem(
64,
ep_opnd,
SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32,
);
asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into());
// if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
@@ -2319,8 +2370,7 @@ fn gen_setlocal_generic(
let stack_top = asm.stack_pop(1);
// Write the value at the environment pointer
- let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
- asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top);
Some(KeepCompiling)
}
@@ -1138,8 +1138,12 @@ pub fn for_each_off_stack_iseq_payload<F: FnMut(&mut IseqPayload)>(mut callback:
/// Free the per-iseq payload
#[no_mangle]
-pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
let payload = {
if payload.is_null() {
// Nothing to free.
return;
@@ -1266,7 +1270,11 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
/// GC callback for updating GC objects in the per-iseq payload.
/// This is a mirror of [rb_yjit_iseq_mark].
#[no_mangle]
-pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
let payload = if payload.is_null() {
// Nothing to update.
return;
@@ -1657,6 +1665,9 @@ impl JITState {
for klass in self.no_singleton_class_assumptions {
track_no_singleton_class_assumption(blockref, klass);
}
blockref
}
@@ -1798,6 +1809,13 @@ impl Context {
return Opnd::mem(64, SP, offset);
}
/// Stop using a register for a given stack temp.
/// This allows us to reuse the register for a value that we know is dead
/// and will no longer be used (e.g. popped stack temp).
@@ -3130,6 +3148,12 @@ pub fn defer_compilation(
// Likely a stub due to the increased chain depth
let target0_address = branch.set_target(0, blockid, &next_ctx, ocb);
// Call the branch generation function
asm_comment!(asm, "defer_compilation");
asm.mark_branch_start(&branch);
@@ -3313,9 +3337,10 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
assert!(
cb.get_write_ptr() <= block_end,
- "invalidation wrote past end of block (code_size: {:?}, new_size: {})",
block.code_size(),
cb.get_write_ptr().as_offset() - block_start.as_offset(),
);
cb.set_write_ptr(cur_pos);
cb.set_dropped_bytes(cur_dropped_bytes);
@@ -170,6 +170,7 @@ pub use rb_iseq_encoded_size as get_iseq_encoded_size;
pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq;
pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded;
pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max;
pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead;
pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt;
pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw;
@@ -478,6 +478,16 @@ pub struct iseq_inline_iv_cache_entry {
pub struct iseq_inline_cvar_cache_entry {
pub entry: *mut rb_cvar_class_tbl_entry,
}
pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1;
pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2;
pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4;
@@ -1154,6 +1164,7 @@ extern "C" {
pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE;
pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_get_iseq_flags_has_lead(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool;
@@ -59,6 +59,11 @@ pub struct Invariants {
/// there has been a singleton class for the class after boot, so you cannot
/// assume no singleton class going forward.
no_singleton_classes: HashMap<VALUE, HashSet<BlockRef>>,
}
/// Private singleton instance of the invariants global struct.
@@ -76,6 +81,7 @@ impl Invariants {
constant_state_blocks: HashMap::new(),
block_constant_states: HashMap::new(),
no_singleton_classes: HashMap::new(),
});
}
}
@@ -154,6 +160,43 @@ pub fn has_singleton_class_of(klass: VALUE) -> bool {
.map_or(false, |blocks| blocks.is_empty())
}
// Checks rb_method_basic_definition_p and registers the current block for invalidation if method
// lookup changes.
// A "basic method" is one defined during VM boot, so we can use this to check assumptions based on
@@ -420,6 +463,10 @@ pub fn block_assumptions_free(blockref: BlockRef) {
for (_, blocks) in invariants.no_singleton_classes.iter_mut() {
blocks.remove(&blockref);
}
}
/// Callback from the opt_setinlinecache instruction in the interpreter.
@@ -515,6 +562,34 @@ pub extern "C" fn rb_yjit_invalidate_no_singleton_class(klass: VALUE) {
}
}
// Invalidate all generated code and C method return code to contain
// logic for firing the c_return TracePoint event. Once rb_vm_barrier()
// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which