diff options
author | Alan Wu <[email protected]> | 2021-08-25 17:00:45 -0400 |
---|---|---|
committer | Alan Wu <[email protected]> | 2021-10-20 18:19:39 -0400 |
commit | bd876c243aeace00ea312d0a5bbff091ccc84ba2 () | |
tree | e30156ab87c3345a995dcd47c11dc9a535643694 | |
parent | 0562459473f44c270784074a09a33ea30d68e457 (diff) |
TracePoint support
This change fixes some cases where YJIT fails to fire tracing events. Most of the situations YJIT did not handle correctly involves enabling tracing while running inside generated code. A new operation to invalidate all generated code is added, which uses ing to make generated code exit at the next VM instruction boundary. A new routine called `jit_prepare_routine_call()` is introduced to facilitate this and should be used when generating code that could allocate, or could otherwise use `RB_VM_LOCK_ENTER()`. The `c_return` event is fired in the middle of an instruction as opposed to at an instruction boundary, so it requires special handling. C method call return points are ed to go to a fucntion which does everything the interpreter does, including firing the `c_return` event. The generated code for C method calls normally does not fire the event. Invalided code should not change after ing so the exits are not clobbered. A new variable is introduced to track the region of code that should not change.
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | bootstraptest/test_yjit.rb | 214 | ||||
-rw-r--r-- | common.mk | 6 | ||||
-rw-r--r-- | iseq.c | 16 | ||||
-rw-r--r-- | vm_trace.c | 5 | ||||
-rw-r--r-- | yjit.h | 1 | ||||
-rw-r--r-- | yjit_codegen.c | 318 | ||||
-rw-r--r-- | yjit_codegen.h | 5 | ||||
-rw-r--r-- | yjit_core.c | 53 | ||||
-rw-r--r-- | yjit_iface.c | 11 | ||||
-rw-r--r-- | yjit_iface.h | 4 |
11 files changed, 550 insertions, 84 deletions
@@ -32,7 +32,6 @@ To cite this repository in your publications, please use this bibtex snippet: YJIT is a work in progress and as such may not yet be mature enough for mission-critical software. Below is a list of known limitations, all of which we plan to eventually address: -- No support for the `TracePoint` API (see [#54](https://.com/Shopify/yjit/issues/54)). - No garbage collection for generated code. Because there is no GC for generated code yet, your software could run out of executable memory if it is large enough. You can change how much executable memory is allocated using [YJIT's command-line options](https://.com/Shopify/yjit#command-line-options). @@ -1612,3 +1612,217 @@ end bar(123, 1.1) bar(123, 1.1) } @@ -7024,7 +7024,6 @@ iseq.$(OBJEXT): {$(VPATH)}vm_callinfo.h iseq.$(OBJEXT): {$(VPATH)}vm_core.h iseq.$(OBJEXT): {$(VPATH)}vm_opts.h iseq.$(OBJEXT): {$(VPATH)}yjit.h -iseq.$(OBJEXT): {$(VPATH)}yjit_asm.h load.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h load.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h load.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -16722,6 +16721,7 @@ yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/gc.h yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/imemo.h yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/object.h yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/re.h yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/serial.h yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/static_assert.h yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/string.h @@ -16746,6 +16746,7 @@ yjit_codegen.$(OBJEXT): {$(VPATH)}darray.h yjit_codegen.$(OBJEXT): {$(VPATH)}debug_counter.h yjit_codegen.$(OBJEXT): {$(VPATH)}defines.h yjit_codegen.$(OBJEXT): {$(VPATH)}encoding.h yjit_codegen.$(OBJEXT): {$(VPATH)}id.h yjit_codegen.$(OBJEXT): {$(VPATH)}id_table.h yjit_codegen.$(OBJEXT): {$(VPATH)}insns.def @@ -16898,6 +16899,9 @@ yjit_codegen.$(OBJEXT): {$(VPATH)}missing.h yjit_codegen.$(OBJEXT): {$(VPATH)}node.h yjit_codegen.$(OBJEXT): {$(VPATH)}onigmo.h yjit_codegen.$(OBJEXT): {$(VPATH)}oniguruma.h yjit_codegen.$(OBJEXT): {$(VPATH)}ruby_assert.h yjit_codegen.$(OBJEXT): {$(VPATH)}ruby_atomic.h yjit_codegen.$(OBJEXT): {$(VPATH)}st.h @@ -3181,14 +3181,6 @@ typedef struct insn_data_struct { } insn_data_t; static insn_data_t insn_data[VM_INSTRUCTION_SIZE/2]; - - - -#include "yjit_asm.h" - - - - void rb_vm_encoded_insn_data_table_init(void) { @@ -3305,10 +3297,6 @@ iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VM_ASSERT(ISEQ_EXECUTABLE_P(iseq)); -#if USE_MJIT - // Force write the jit function to NULL - *((jit_func_t *)(&body->jit_func)) = 0; -#endif for (pc=0; pc<body->iseq_size;) { const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pc); @@ -3445,10 +3433,6 @@ rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events) rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc); pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & enabled_events, true); } -#if USE_MJIT - // Force write the jit function to NULL - *((jit_func_t *)(&body->jit_func)) = 0; -#endif } } @@ -30,6 +30,7 @@ #include "ruby/debug.h" #include "vm_core.h" #include "ruby/ractor.h" #include "builtin.h" @@ -97,6 +98,8 @@ update_global_event_hook(rb_event_flag_t vm_events) rb_clear_attr_ccs(); } ruby_vm_event_flags = vm_events; ruby_vm_event_enabled_global_flags |= vm_events; rb_objspace_set_event_hook(vm_events); @@ -1212,6 +1215,8 @@ rb_tracepoint_enable_for_target(VALUE tpval, VALUE target, VALUE target_line) rb_raise(rb_eArgError, "can not enable any hooks"); } ruby_vm_event_local_num++; tp->tracing = 1; @@ -73,5 +73,6 @@ void rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body); void rb_yjit_iseq_free(const struct rb_iseq_constant_body *body); void rb_yjit_before_ractor_spawn(void); void yjit_constant_ic_update(const rb_iseq_t *iseq, IC ic); #endif // #ifndef YJIT_H @@ -1,17 +1,20 @@ -#include <assert.h> -#include "insns.inc" #include "internal.h" #include "vm_core.h" #include "vm_sync.h" #include "vm_callinfo.h" #include "builtin.h" #include "internal/compile.h" #include "internal/class.h" #include "internal/object.h" #include "internal/string.h" #include "internal/variable.h" #include "internal/re.h" #include "insns_info.inc" #include "yjit.h" #include "yjit_iface.h" #include "yjit_core.h" @@ -36,6 +39,25 @@ codeblock_t* ocb = NULL; // Code for exiting back to the interpreter from the leave insn static void *leave_exit_code; // Print the current source location for debugging purposes RBIMPL_ATTR_MAYBE_UNUSED() static void @@ -156,6 +178,28 @@ jit_save_sp(jitstate_t* jit, ctx_t* ctx) } } static bool jit_guard_known_klass(jitstate_t *jit, ctx_t* ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit); #if RUBY_DEBUG @@ -290,15 +334,13 @@ _counted_side_exit(uint8_t *existing_side_exit, int64_t *counter) // Generate an exit to return to the interpreter -static uint8_t * -yjit_gen_exit(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) { - uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos); ADD_COMMENT(cb, "exit to interpreter"); - VALUE *exit_pc = jit->pc; - // Generate the code to exit to the interpreters // Write the adjusted SP back into the CFP if (ctx->sp_offset != 0) { @@ -329,7 +371,7 @@ yjit_gen_exit(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) mov(cb, RAX, imm_opnd(Qundef)); ret(cb); - return code_ptr; } // Generate a continuation for gen_leave() that exits to the interpreter at REG_CFP->pc. @@ -363,7 +405,8 @@ yjit_gen_leave_exit(codeblock_t *cb) static uint8_t * yjit_side_exit(jitstate_t *jit, ctx_t *ctx) { - return yjit_gen_exit(jit, ctx, ocb); } // Generate a runtime guard that ensures the PC is at the start of the iseq, @@ -399,6 +442,64 @@ yjit_pc_guard(const rb_iseq_t *iseq) cb_link_labels(cb); } /* Compile an interpreter entry block to be inserted into an iseq Returns `NULL` if compilation fails. @@ -473,6 +574,13 @@ jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context) blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) }; // Generate the jump instruction gen_direct_jump( jit->block, @@ -536,6 +644,14 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec) jit.pc = pc; jit.opcode = opcode; // Verify our existing assumption (DEBUG) if (jit_at_current_insn(&jit)) { verify_ctx(&jit, ctx); @@ -546,7 +662,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec) if (!gen_fn) { // If we reach an unknown instruction, // exit to the interpreter and stop compiling - yjit_gen_exit(&jit, ctx, cb); break; } @@ -576,7 +692,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec) // TODO: if the codegen funcion makes changes to ctx and then return YJIT_CANT_COMPILE, // the exit this generates would be wrong. We could save a copy of the entry context // and assert that ctx is the same here. - yjit_gen_exit(&jit, ctx, cb); break; } @@ -596,6 +712,10 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec) // Store the index of the last instruction in the block block->end_idx = insn_idx; if (YJIT_DUMP_MODE >= 2) { // Dump list of compiled instrutions fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq); @@ -735,8 +855,7 @@ gen_newarray(jitstate_t* jit, ctx_t* ctx) rb_num_t n = (rb_num_t)jit_get_arg(jit, 0); // Save the PC and SP because we are allocating - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n)); @@ -760,8 +879,7 @@ gen_duparray(jitstate_t* jit, ctx_t* ctx) VALUE ary = jit_get_arg(jit, 0); // Save the PC and SP because we are allocating - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); // call rb_ary_resurrect(VALUE ary); jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary); @@ -783,8 +901,7 @@ gen_splatarray(jitstate_t* jit, ctx_t* ctx) // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); // Get the operands from the stack x86opnd_t ary_opnd = ctx_stack_pop(ctx, 1); @@ -908,8 +1025,7 @@ gen_newhash(jitstate_t* jit, ctx_t* ctx) if (n == 0) { // Save the PC and SP because we are allocating - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); // val = rb_hash_new(); call_ptr(cb, REG0, (void *)rb_hash_new); @@ -1559,8 +1675,7 @@ gen_setinstancevariable(jitstate_t* jit, ctx_t* ctx) // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); // Get the operands from the stack x86opnd_t val_opnd = ctx_stack_pop(ctx, 1); @@ -1611,8 +1726,7 @@ gen_defined(jitstate_t* jit, ctx_t* ctx) // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); // Get the operands from the stack x86opnd_t v_opnd = ctx_stack_pop(ctx, 1); @@ -1706,8 +1820,7 @@ gen_concatstrings(jitstate_t* jit, ctx_t* ctx) rb_num_t n = (rb_num_t)jit_get_arg(jit, 0); // Save the PC and SP because we are allocating - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n)); @@ -1975,15 +2088,13 @@ gen_opt_aref(jitstate_t *jit, ctx_t *ctx) // Call VALUE rb_hash_aref(VALUE hash, VALUE key). { - // Write incremented pc to cfp->pc as the routine can raise and allocate - jit_save_pc(jit, REG0); - // About to change REG_SP which these operands depend on. Yikes. mov(cb, C_ARG_REGS[0], recv_opnd); mov(cb, C_ARG_REGS[1], idx_opnd); // Write sp to cfp->sp since rb_hash_aref might need to call #hash on the key - jit_save_sp(jit, ctx); call_ptr(cb, REG0, (void *)rb_hash_aref); @@ -2009,8 +2120,7 @@ gen_opt_aset(jitstate_t *jit, ctx_t *ctx) { // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); uint8_t* side_exit = yjit_side_exit(jit, ctx); @@ -2177,8 +2287,7 @@ gen_opt_mod(jitstate_t* jit, ctx_t* ctx) { // Save the PC and SP because the callee may allocate bignums // Note that this modifies REG_SP, which is why we do it first - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); uint8_t* side_exit = yjit_side_exit(jit, ctx); @@ -2691,6 +2800,25 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const return YJIT_CANT_COMPILE; } // Delegate to codegen for C methods if we have it. { method_codegen_t known_cfunc_codegen; @@ -2842,6 +2970,9 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const // Invalidation logic is in rb_yjit_method_lookup_change() call_ptr(cb, REG0, (void*)cfunc->func); // Push the return value on the Ruby stack x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN); mov(cb, stack_ret, RAX); @@ -2856,7 +2987,7 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const // cfunc calls may corrupt types ctx_clear_local_types(ctx); - // Note: gen_oswb_iseq() jumps to the next instruction with ctx->sp_offset == 0 // after the call, while this does not. This difference prevents // the two call types from sharing the same successor. @@ -3480,8 +3611,7 @@ gen_getglobal(jitstate_t* jit, ctx_t* ctx) ID gid = jit_get_arg(jit, 0); // Save the PC and SP because we might make a Ruby call for warning - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); mov(cb, C_ARG_REGS[0], imm_opnd(gid)); @@ -3500,8 +3630,7 @@ gen_setglobal(jitstate_t* jit, ctx_t* ctx) // Save the PC and SP because we might make a Ruby call for // Kernel#set_trace_var - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); mov(cb, C_ARG_REGS[0], imm_opnd(gid)); @@ -3519,8 +3648,7 @@ gen_tostring(jitstate_t* jit, ctx_t* ctx) { // Save the PC and SP because we might make a Ruby call for // Kernel#set_trace_var - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); x86opnd_t str = ctx_stack_pop(ctx, 1); x86opnd_t val = ctx_stack_pop(ctx, 1); @@ -3545,8 +3673,7 @@ gen_toregexp(jitstate_t* jit, ctx_t* ctx) // Save the PC and SP because this allocates an object and could // raise an exception. - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)cnt)); ctx_stack_pop(ctx, cnt); @@ -3678,8 +3805,7 @@ gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx) } // If the calls don't allocate, do they need up to date PC, SP? - jit_save_pc(jit, REG0); - jit_save_sp(jit, ctx); if (bf->argc > 0) { // Load environment pointer EP from CFP @@ -3706,6 +3832,107 @@ gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx) return YJIT_KEEP_COMPILING; } static void yjit_reg_method(VALUE klass, const char *mid_str, method_codegen_t gen_fn) { @@ -3749,6 +3976,9 @@ yjit_init_codegen(void) // Generate the interpreter exit code for leave leave_exit_code = yjit_gen_leave_exit(cb); // Map YARV opcodes to the corresponding codegen functions yjit_reg_op(BIN(nop), gen_nop); yjit_reg_op(BIN(dup), gen_dup); @@ -7,6 +7,7 @@ // Code blocks we generate code into extern codeblock_t *cb; extern codeblock_t *ocb; // Code generation state typedef struct JITState @@ -30,6 +31,10 @@ typedef struct JITState // This allows us to peek at run-time values rb_execution_context_t* ec; } jitstate_t; typedef enum codegen_status { @@ -506,11 +506,12 @@ static size_t get_num_versions(blockid_t blockid) static void add_block_version(blockid_t blockid, block_t* block) { - // Function entry blocks must have stack size 0 - RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0)); const rb_iseq_t *iseq = block->blockid.iseq; struct rb_iseq_constant_body *body = iseq->body; // Ensure yjit_blocks is initialized for this iseq if (rb_darray_size(body->yjit_blocks) == 0) { // Initialize yjit_blocks to be as wide as body->iseq_encoded @@ -772,7 +773,7 @@ branch_stub_hit(branch_t* branch, const uint32_t target_idx, rb_execution_contex // If this block hasn't yet been compiled if (!p_block) { // If the new block can be generated right after the branch (at cb->write_pos) - if (cb->write_pos == branch->end_pos) { // This branch should be terminating its block RUBY_ASSERT(branch->end_pos == branch->block->end_pos); @@ -801,12 +802,14 @@ branch_stub_hit(branch_t* branch, const uint32_t target_idx, rb_execution_contex branch->dst_addrs[target_idx] = dst_addr; // Rewrite the branch with the new jump target address - RUBY_ASSERT(branch->dst_addrs[0] != NULL); - uint32_t cur_pos = cb->write_pos; - cb_set_pos(cb, branch->start_pos); - branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); - RUBY_ASSERT(cb->write_pos == branch->end_pos && "branch can't change size"); - cb_set_pos(cb, cur_pos); // Mark this branch target as ed (no longer a stub) branch->blocks[target_idx] = p_block; @@ -921,8 +924,7 @@ void gen_direct_jump( block_t* p_block = find_block_version(target0, ctx); // If the version already exists - if (p_block) - { rb_darray_append(&p_block->incoming, branch); branch->dst_addrs[0] = cb_get_ptr(cb, p_block->start_pos); @@ -934,10 +936,9 @@ void gen_direct_jump( gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT); branch->end_pos = cb->write_pos; } - else - { - // The target block will be compiled right after this one (fallthrough) - // See the loop in gen_block_version() branch->dst_addrs[0] = NULL; branch->shape = SHAPE_NEXT0; branch->start_pos = cb->write_pos; @@ -1048,7 +1049,7 @@ block_array_remove(rb_yjit_block_array_t block_array, block_t *block) // Invalidate one specific block version void -invalidate_block_version(block_t* block) { ASSERT_vm_locking(); // TODO: want to assert that all other ractors are stopped here. Can't @@ -1067,8 +1068,7 @@ invalidate_block_version(block_t* block) uint8_t* code_ptr = cb_get_ptr(cb, block->start_pos); // For each incoming branch - rb_darray_for(block->incoming, incoming_idx) - { branch_t* branch = rb_darray_get(block->incoming, incoming_idx); uint32_t target_idx = (branch->dst_addrs[0] == code_ptr)? 0:1; RUBY_ASSERT(branch->dst_addrs[target_idx] == code_ptr); @@ -1077,6 +1077,11 @@ invalidate_block_version(block_t* block) // Mark this target as being a stub branch->blocks[target_idx] = NULL; // Create a stub for this branch target branch->dst_addrs[target_idx] = get_branch_target( block->blockid, @@ -1088,8 +1093,7 @@ invalidate_block_version(block_t* block) // Check if the invalidated block immediately follows bool target_next = block->start_pos == branch->end_pos; - if (target_next) - { // The new block will no longer be adjacent branch->shape = SHAPE_DEFAULT; } @@ -1103,8 +1107,13 @@ invalidate_block_version(block_t* block) branch->block->end_pos = cb->write_pos; cb_set_pos(cb, cur_pos); - if (target_next && branch->end_pos > block->end_pos) - { rb_bug("yjit invalidate rewrote branch past end of invalidated block"); } } @@ -81,6 +81,17 @@ map_addr2insn(void *code_ptr, int insn) } } int yjit_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc) { @@ -54,6 +54,7 @@ YJIT_DECLARE_COUNTERS( send_cfunc_ruby_array_varg, send_cfunc_argc_mismatch, send_cfunc_toomany_args, send_iseq_tailcall, send_iseq_arity_error, send_iseq_only_keywords, @@ -63,6 +64,8 @@ YJIT_DECLARE_COUNTERS( send_se_cf_overflow, send_se_protected_check_failed, leave_se_interrupt, leave_interp_return, leave_start_pc_non_zero, @@ -105,6 +108,7 @@ RUBY_EXTERN struct rb_yjit_runtime_counters yjit_runtime_counters; void yjit_map_addr2insn(void *code_ptr, int insn); VALUE *yjit_iseq_pc_at_idx(const rb_iseq_t *iseq, uint32_t insn_idx); int yjit_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc); void check_cfunc_dis(VALUE receiver, struct rb_callinfo *ci, void *callee, rb_callable_method_entry_t *compile_time_cme); |