diff options
author | Takashi Kokubun <[email protected]> | 2024-04-17 12:00:03 -0700 |
---|---|---|
committer | <[email protected]> | 2024-04-17 15:00:03 -0400 |
commit | 4cc58ea0b865f2fd20f1e881ddbd4c4fab0b072c () | |
tree | 1587d79e55cc448e08ae30f8c4a3175f8195b1cc | |
parent | ca764062b06f1bb587048bcf374b25a0903ca9e7 (diff) |
YJIT: Optimize local variables when EP == BP (#10487)
-rw-r--r-- | vm.c | 5 | ||||
-rw-r--r-- | yjit.c | 6 | ||||
-rw-r--r-- | yjit.h | 2 | ||||
-rw-r--r-- | yjit/bindgen/src/main.rs | 2 | ||||
-rw-r--r-- | yjit/src/codegen.rs | 96 | ||||
-rw-r--r-- | yjit/src/core.rs | 19 | ||||
-rw-r--r-- | yjit/src/cruby.rs | 1 | ||||
-rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 11 | ||||
-rw-r--r-- | yjit/src/invariants.rs | 55 |
9 files changed, 173 insertions, 24 deletions
@@ -1007,6 +1007,11 @@ vm_make_env_each(const rb_execution_context_t * const ec, rb_control_frame_t *co } #endif return (VALUE)env; } @@ -629,6 +629,12 @@ rb_get_iseq_body_stack_max(const rb_iseq_t *iseq) return iseq->body->stack_max; } bool rb_get_iseq_flags_has_lead(const rb_iseq_t *iseq) { @@ -48,6 +48,7 @@ void rb_yjit_tracing_invalidate_all(void); void rb_yjit_show_usage(int help, int highlight, unsigned int width, int columns); void rb_yjit_lazy_push_frame(const VALUE *pc); void rb_yjit_invalidate_no_singleton_class(VALUE klass); #else // !USE_YJIT @@ -71,6 +72,7 @@ static inline void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic static inline void rb_yjit_tracing_invalidate_all(void) {} static inline void rb_yjit_lazy_push_frame(const VALUE *pc) {} static inline void rb_yjit_invalidate_no_singleton_class(VALUE klass) {} #endif // #if USE_YJIT @@ -298,6 +298,7 @@ fn main() { .allowlist_type("ruby_tag_type") .allowlist_type("ruby_vm_throw_flags") .allowlist_type("vm_check_match_type") // From yjit.c .allowlist_function("rb_iseq_(get|set)_yjit_payload") @@ -415,6 +416,7 @@ fn main() { .allowlist_function("rb_get_iseq_body_parent_iseq") .allowlist_function("rb_get_iseq_body_iseq_encoded") .allowlist_function("rb_get_iseq_body_stack_max") .allowlist_function("rb_get_iseq_flags_has_lead") .allowlist_function("rb_get_iseq_flags_has_opt") .allowlist_function("rb_get_iseq_flags_has_kw") @@ -46,7 +46,7 @@ type InsnGenFn = fn( /// Represents a [core::Block] while we build it. pub struct JITState { /// Instruction sequence for the compiling block - iseq: IseqPtr, /// The iseq index of the first instruction in the block starting_insn_idx: IseqIdx, @@ -101,6 +101,9 @@ pub struct JITState { /// A list of classes that are not supposed to have a singleton class. pub no_singleton_class_assumptions: Vec<VALUE>, /// When true, the block is valid only when there is a total of one ractor running pub block_assumes_single_ractor: bool, @@ -130,6 +133,7 @@ impl JITState { bop_assumptions: vec![], stable_constant_names_assumption: None, no_singleton_class_assumptions: vec![], block_assumes_single_ractor: false, perf_map: Rc::default(), perf_stack: vec![], @@ -171,6 +175,23 @@ impl JITState { unsafe { *(self.pc.offset(arg_idx + 1)) } } // Get the index of the next instruction fn next_insn_idx(&self) -> u16 { self.insn_idx + insn_len(self.get_opcode()) as u16 @@ -250,6 +271,19 @@ impl JITState { true } fn get_cfp(&self) -> *mut rb_control_frame_struct { unsafe { get_ec_cfp(self.ec) } } @@ -2203,16 +2237,22 @@ fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd { fn gen_getlocal_generic( jit: &mut JITState, asm: &mut Assembler, ep_offset: u32, level: u32, ) -> Option<CodegenStatus> { - // Load environment pointer EP (level 0) from CFP - let ep_opnd = gen_get_ep(asm, level); - // Load the local from the block - // val = *(vm_get_ep(GET_EP(), level) - idx); - let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32); - let local_opnd = Opnd::mem(64, ep_opnd, offs); // Write the local at SP let stack_top = if level == 0 { @@ -2230,29 +2270,29 @@ fn gen_getlocal_generic( fn gen_getlocal( jit: &mut JITState, asm: &mut Assembler, - _ocb: &mut OutlinedCb, ) -> Option<CodegenStatus> { let idx = jit.get_arg(0).as_u32(); let level = jit.get_arg(1).as_u32(); - gen_getlocal_generic(jit, asm, idx, level) } fn gen_getlocal_wc0( jit: &mut JITState, asm: &mut Assembler, - _ocb: &mut OutlinedCb, ) -> Option<CodegenStatus> { let idx = jit.get_arg(0).as_u32(); - gen_getlocal_generic(jit, asm, idx, 0) } fn gen_getlocal_wc1( jit: &mut JITState, asm: &mut Assembler, - _ocb: &mut OutlinedCb, ) -> Option<CodegenStatus> { let idx = jit.get_arg(0).as_u32(); - gen_getlocal_generic(jit, asm, idx, 1) } fn gen_setlocal_generic( @@ -2264,11 +2304,11 @@ fn gen_setlocal_generic( ) -> Option<CodegenStatus> { let value_type = asm.ctx.get_opnd_type(StackOpnd(0)); - // Load environment pointer EP at level - let ep_opnd = gen_get_ep(asm, level); - // Fallback because of write barrier if asm.ctx.get_chain_depth() > 0 { // This function should not yield to the GC. // void rb_vm_env_write(const VALUE *ep, int index, VALUE v) let index = -(ep_offset as i64); @@ -2286,16 +2326,27 @@ fn gen_setlocal_generic( return Some(KeepCompiling); } - // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers - // only affect heap objects being written. If we know an immediate value is being written we - // can skip this check. - if !value_type.is_imm() { - // flags & VM_ENV_FLAG_WB_REQUIRED let flags_opnd = Opnd::mem( 64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 @@ -2319,8 +2370,7 @@ fn gen_setlocal_generic( let stack_top = asm.stack_pop(1); // Write the value at the environment pointer - let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32); - asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top); Some(KeepCompiling) } @@ -1657,6 +1657,9 @@ impl JITState { for klass in self.no_singleton_class_assumptions { track_no_singleton_class_assumption(blockref, klass); } blockref } @@ -1798,6 +1801,13 @@ impl Context { return Opnd::mem(64, SP, offset); } /// Stop using a register for a given stack temp. /// This allows us to reuse the register for a value that we know is dead /// and will no longer be used (e.g. popped stack temp). @@ -3124,6 +3134,12 @@ pub fn defer_compilation( // Likely a stub due to the increased chain depth let target0_address = branch.set_target(0, blockid, &next_ctx, ocb); // Call the branch generation function asm_comment!(asm, "defer_compilation"); asm.mark_branch_start(&branch); @@ -3307,9 +3323,10 @@ pub fn invalidate_block_version(blockref: &BlockRef) { assert!( cb.get_write_ptr() <= block_end, - "invalidation wrote past end of block (code_size: {:?}, new_size: {})", block.code_size(), cb.get_write_ptr().as_offset() - block_start.as_offset(), ); cb.set_write_ptr(cur_pos); cb.set_dropped_bytes(cur_dropped_bytes); @@ -170,6 +170,7 @@ pub use rb_iseq_encoded_size as get_iseq_encoded_size; pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq; pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded; pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max; pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead; pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt; pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw; @@ -478,6 +478,16 @@ pub struct iseq_inline_iv_cache_entry { pub struct iseq_inline_cvar_cache_entry { pub entry: *mut rb_cvar_class_tbl_entry, } pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1; pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2; pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4; @@ -1153,6 +1163,7 @@ extern "C" { pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE; pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_iseq_flags_has_lead(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool; @@ -59,6 +59,11 @@ pub struct Invariants { /// there has been a singleton class for the class after boot, so you cannot /// assume no singleton class going forward. no_singleton_classes: HashMap<VALUE, HashSet<BlockRef>>, } /// Private singleton instance of the invariants global struct. @@ -76,6 +81,7 @@ impl Invariants { constant_state_blocks: HashMap::new(), block_constant_states: HashMap::new(), no_singleton_classes: HashMap::new(), }); } } @@ -154,6 +160,23 @@ pub fn has_singleton_class_of(klass: VALUE) -> bool { .map_or(false, |blocks| blocks.is_empty()) } // Checks rb_method_basic_definition_p and registers the current block for invalidation if method // lookup changes. // A "basic method" is one defined during VM boot, so we can use this to check assumptions based on @@ -420,6 +443,10 @@ pub fn block_assumptions_free(blockref: BlockRef) { for (_, blocks) in invariants.no_singleton_classes.iter_mut() { blocks.remove(&blockref); } } /// Callback from the opt_setinlinecache instruction in the interpreter. @@ -515,6 +542,34 @@ pub extern "C" fn rb_yjit_invalidate_no_singleton_class(klass: VALUE) { } } // Invalidate all generated code and C method return code to contain // logic for firing the c_return TracePoint event. Once rb_vm_barrier() // returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which |