diff options
-rw-r--r-- | ast.c | 27 | ||||
-rw-r--r-- | node.c | 37 | ||||
-rw-r--r-- | node.h | 4 | ||||
-rw-r--r-- | parse.y | 528 | ||||
-rw-r--r-- | ruby_parser.c | 25 | ||||
-rw-r--r-- | rubyparser.h | 20 | ||||
-rw-r--r-- | universal_parser.c | 3 |
7 files changed, 429 insertions, 215 deletions
@@ -774,10 +774,35 @@ ast_node_last_column(rb_execution_context_t *ec, VALUE self) static VALUE ast_node_all_tokens(rb_execution_context_t *ec, VALUE self) { struct ASTNodeData *data; TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data); - return rb_ast_tokens(data->ast); } static VALUE @@ -69,7 +69,7 @@ rb_node_buffer_new(void) init_node_buffer_list(&nb->unmarkable, (node_buffer_elem_t*)&nb[1], ruby_xmalloc); init_node_buffer_list(&nb->markable, (node_buffer_elem_t*)((size_t)nb->unmarkable.head + bucket_size), ruby_xmalloc); nb->local_tables = 0; - nb->tokens = Qnil; #ifdef UNIVERSAL_PARSER nb->config = config; #endif @@ -177,6 +177,24 @@ parser_string_free(rb_ast_t *ast, rb_parser_string_t *str) } static void free_ast_value(rb_ast_t *ast, void *ctx, NODE *node) { switch (nd_type(node)) { @@ -228,6 +246,9 @@ free_ast_value(rb_ast_t *ast, void *ctx, NODE *node) static void rb_node_buffer_free(rb_ast_t *ast, node_buffer_t *nb) { iterate_node_values(ast, &nb->unmarkable, free_ast_value, NULL); node_buffer_list_free(ast, &nb->unmarkable); node_buffer_list_free(ast, &nb->markable); @@ -388,8 +409,6 @@ void rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating) { if (ast->node_buffer) { - rb_gc_mark_and_move(&ast->node_buffer->tokens); - node_buffer_t *nb = ast->node_buffer; iterate_node_values(ast, &nb->markable, mark_and_move_ast_value, NULL); @@ -439,18 +458,6 @@ rb_ast_dispose(rb_ast_t *ast) } VALUE -rb_ast_tokens(rb_ast_t *ast) -{ - return ast->node_buffer->tokens; -} - -void -rb_ast_set_tokens(rb_ast_t *ast, VALUE tokens) -{ - RB_OBJ_WRITE(ast, &ast->node_buffer->tokens, tokens); -} - -VALUE rb_node_set_type(NODE *n, enum node_type t) { #if RUBY_DEBUG @@ -40,7 +40,7 @@ struct node_buffer_struct { // - text of token // - location info // Array, whose entry is array - VALUE tokens; #ifdef UNIVERSAL_PARSER const rb_parser_config_t *config; #endif @@ -55,7 +55,6 @@ rb_ast_t *rb_ast_new(void); #endif size_t rb_ast_memsize(const rb_ast_t*); void rb_ast_dispose(rb_ast_t*); -VALUE rb_ast_tokens(rb_ast_t *ast); #if RUBY_DEBUG void rb_ast_node_type_change(NODE *n, enum node_type type); #endif @@ -65,7 +64,6 @@ void rb_node_init(NODE *n, enum node_type type); void rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating); void rb_ast_update_references(rb_ast_t*); void rb_ast_free(rb_ast_t*); -void rb_ast_set_tokens(rb_ast_t*, VALUE); NODE *rb_ast_newnode(rb_ast_t*, enum node_type type, size_t size, size_t alignment); void rb_ast_delete_node(rb_ast_t*, NODE *n); rb_ast_id_table_t *rb_ast_new_local_table(rb_ast_t*, int); @@ -634,7 +634,7 @@ struct parser_params { /* id for terms */ int token_id; /* Array for term tokens */ - VALUE tokens; #else /* Ripper only */ @@ -875,170 +875,170 @@ peek_end_expect_token_locations(struct parser_params *p) return p->end_expect_token_locations; } -static ID -parser_token2id(struct parser_params *p, enum yytokentype tok) { switch ((int) tok) { -#define TOKEN2ID(tok) case tok: return rb_intern(#tok); -#define TOKEN2ID2(tok, name) case tok: return rb_intern(name); - TOKEN2ID2(' ', "words_sep") - TOKEN2ID2('!', "!") - TOKEN2ID2('%', "%"); - TOKEN2ID2('&', "&"); - TOKEN2ID2('*', "*"); - TOKEN2ID2('+', "+"); - TOKEN2ID2('-', "-"); - TOKEN2ID2('/', "/"); - TOKEN2ID2('<', "<"); - TOKEN2ID2('=', "="); - TOKEN2ID2('>', ">"); - TOKEN2ID2('?', "?"); - TOKEN2ID2('^', "^"); - TOKEN2ID2('|', "|"); - TOKEN2ID2('~', "~"); - TOKEN2ID2(':', ":"); - TOKEN2ID2(',', ","); - TOKEN2ID2('.', "."); - TOKEN2ID2(';', ";"); - TOKEN2ID2('`', "`"); - TOKEN2ID2('\n', "nl"); - TOKEN2ID2('{', "{"); - TOKEN2ID2('}', "}"); - TOKEN2ID2('[', "["); - TOKEN2ID2(']', "]"); - TOKEN2ID2('(', "("); - TOKEN2ID2(')', ")"); - TOKEN2ID2('\\', "backslash"); - TOKEN2ID(keyword_class); - TOKEN2ID(keyword_module); - TOKEN2ID(keyword_def); - TOKEN2ID(keyword_undef); - TOKEN2ID(keyword_begin); - TOKEN2ID(keyword_rescue); - TOKEN2ID(keyword_ensure); - TOKEN2ID(keyword_end); - TOKEN2ID(keyword_if); - TOKEN2ID(keyword_unless); - TOKEN2ID(keyword_then); - TOKEN2ID(keyword_elsif); - TOKEN2ID(keyword_else); - TOKEN2ID(keyword_case); - TOKEN2ID(keyword_when); - TOKEN2ID(keyword_while); - TOKEN2ID(keyword_until); - TOKEN2ID(keyword_for); - TOKEN2ID(keyword_break); - TOKEN2ID(keyword_next); - TOKEN2ID(keyword_redo); - TOKEN2ID(keyword_retry); - TOKEN2ID(keyword_in); - TOKEN2ID(keyword_do); - TOKEN2ID(keyword_do_cond); - TOKEN2ID(keyword_do_block); - TOKEN2ID(keyword_do_LAMBDA); - TOKEN2ID(keyword_return); - TOKEN2ID(keyword_yield); - TOKEN2ID(keyword_super); - TOKEN2ID(keyword_self); - TOKEN2ID(keyword_nil); - TOKEN2ID(keyword_true); - TOKEN2ID(keyword_false); - TOKEN2ID(keyword_and); - TOKEN2ID(keyword_or); - TOKEN2ID(keyword_not); - TOKEN2ID(modifier_if); - TOKEN2ID(modifier_unless); - TOKEN2ID(modifier_while); - TOKEN2ID(modifier_until); - TOKEN2ID(modifier_rescue); - TOKEN2ID(keyword_alias); - TOKEN2ID(keyword_defined); - TOKEN2ID(keyword_BEGIN); - TOKEN2ID(keyword_END); - TOKEN2ID(keyword__LINE__); - TOKEN2ID(keyword__FILE__); - TOKEN2ID(keyword__ENCODING__); - TOKEN2ID(tIDENTIFIER); - TOKEN2ID(tFID); - TOKEN2ID(tGVAR); - TOKEN2ID(tIVAR); - TOKEN2ID(tCONSTANT); - TOKEN2ID(tCVAR); - TOKEN2ID(tLABEL); - TOKEN2ID(tINTEGER); - TOKEN2ID(tFLOAT); - TOKEN2ID(tRATIONAL); - TOKEN2ID(tIMAGINARY); - TOKEN2ID(tCHAR); - TOKEN2ID(tNTH_REF); - TOKEN2ID(tBACK_REF); - TOKEN2ID(tSTRING_CONTENT); - TOKEN2ID(tREGEXP_END); - TOKEN2ID(tDUMNY_END); - TOKEN2ID(tSP); - TOKEN2ID(tUPLUS); - TOKEN2ID(tUMINUS); - TOKEN2ID(tPOW); - TOKEN2ID(tCMP); - TOKEN2ID(tEQ); - TOKEN2ID(tEQQ); - TOKEN2ID(tNEQ); - TOKEN2ID(tGEQ); - TOKEN2ID(tLEQ); - TOKEN2ID(tANDOP); - TOKEN2ID(tOROP); - TOKEN2ID(tMATCH); - TOKEN2ID(tNMATCH); - TOKEN2ID(tDOT2); - TOKEN2ID(tDOT3); - TOKEN2ID(tBDOT2); - TOKEN2ID(tBDOT3); - TOKEN2ID(tAREF); - TOKEN2ID(tASET); - TOKEN2ID(tLSHFT); - TOKEN2ID(tRSHFT); - TOKEN2ID(tANDDOT); - TOKEN2ID(tCOLON2); - TOKEN2ID(tCOLON3); - TOKEN2ID(tOP_ASGN); - TOKEN2ID(tASSOC); - TOKEN2ID(tLPAREN); - TOKEN2ID(tLPAREN_ARG); - TOKEN2ID(tRPAREN); - TOKEN2ID(tLBRACK); - TOKEN2ID(tLBRACE); - TOKEN2ID(tLBRACE_ARG); - TOKEN2ID(tSTAR); - TOKEN2ID(tDSTAR); - TOKEN2ID(tAMPER); - TOKEN2ID(tLAMBDA); - TOKEN2ID(tSYMBEG); - TOKEN2ID(tSTRING_BEG); - TOKEN2ID(tXSTRING_BEG); - TOKEN2ID(tREGEXP_BEG); - TOKEN2ID(tWORDS_BEG); - TOKEN2ID(tQWORDS_BEG); - TOKEN2ID(tSYMBOLS_BEG); - TOKEN2ID(tQSYMBOLS_BEG); - TOKEN2ID(tSTRING_END); - TOKEN2ID(tSTRING_DEND); - TOKEN2ID(tSTRING_DBEG); - TOKEN2ID(tSTRING_DVAR); - TOKEN2ID(tLAMBEG); - TOKEN2ID(tLABEL_END); - TOKEN2ID(tIGNORED_NL); - TOKEN2ID(tCOMMENT); - TOKEN2ID(tEMBDOC_BEG); - TOKEN2ID(tEMBDOC); - TOKEN2ID(tEMBDOC_END); - TOKEN2ID(tHEREDOC_BEG); - TOKEN2ID(tHEREDOC_END); - TOKEN2ID(k__END__); - TOKEN2ID(tLOWEST); - TOKEN2ID(tUMINUS_NUM); - TOKEN2ID(tLAST_TOKEN); -#undef TOKEN2ID -#undef TOKEN2ID2 } rb_bug("parser_token2id: unknown token %d", tok); @@ -2565,8 +2565,8 @@ rb_parser_str_resize(struct parser_params *p, rb_parser_string_t *str, long len) return str; } -#ifndef UNIVERSAL_PARSER #ifndef RIPPER # define PARSER_ENC_STRING_GETMEM(str, ptrvar, lenvar, encvar) \ ((ptrvar) = str->ptr, \ (lenvar) = str->len, \ @@ -2587,7 +2587,73 @@ rb_parser_string_hash_cmp(rb_parser_string_t *str1, rb_parser_string_t *str2) memcmp(ptr1, ptr2, len1) != 0); } #endif -#endif %} %expect 0 @@ -7035,35 +7101,100 @@ parser_has_token(struct parser_params *p) return pcur > ptok; } -static VALUE -code_loc_to_ary(struct parser_params *p, const rb_code_location_t *loc) { - VALUE ary = rb_ary_new_from_args(4, - INT2NUM(loc->beg_pos.lineno), INT2NUM(loc->beg_pos.column), - INT2NUM(loc->end_pos.lineno), INT2NUM(loc->end_pos.column)); - rb_obj_freeze(ary); - - return ary; } -static void -parser_append_tokens(struct parser_params *p, VALUE str, enum yytokentype t, int line) { - VALUE ary; - int token_id; - ary = rb_ary_new2(4); - token_id = p->token_id; - rb_ary_push(ary, INT2FIX(token_id)); - rb_ary_push(ary, ID2SYM(parser_token2id(p, t))); - rb_ary_push(ary, str); - rb_ary_push(ary, code_loc_to_ary(p, p->yylloc)); - rb_obj_freeze(ary); - rb_ary_push(p->tokens, ary); p->token_id++; if (p->debug) { - rb_parser_printf(p, "Append tokens (line: %d) %"PRIsVALUE"\n", line, ary); } } @@ -7077,7 +7208,7 @@ parser_dis_scan_event(struct parser_params *p, enum yytokentype t, int line RUBY_SET_YYLLOC(*p->yylloc); if (p->keep_tokens) { - VALUE str = STR_NEW(p->lex.ptok, p->lex.pcur - p->lex.ptok); parser_append_tokens(p, str, t, line); } @@ -7095,7 +7226,8 @@ parser_dis_delayed_token(struct parser_params *p, enum yytokentype t, int l RUBY_SET_YYLLOC_OF_DELAYED_TOKEN(*p->yylloc); if (p->keep_tokens) { - parser_append_tokens(p, p->delayed.token, t, line); } p->delayed.token = Qnil; @@ -7607,7 +7739,7 @@ yycompile0(VALUE arg) tree = NEW_NIL(&NULL_LOC); } else { - VALUE tokens = p->tokens; NODE *prelude; NODE *body = parser_append_options(p, RNODE_SCOPE(tree)->nd_body); prelude = block_append(p, p->eval_tree_begin, body); @@ -7615,8 +7747,8 @@ yycompile0(VALUE arg) p->ast->body.frozen_string_literal = p->frozen_string_literal; p->ast->body.coverage_enabled = cov; if (p->keep_tokens) { - rb_obj_freeze(tokens); - rb_ast_set_tokens(p->ast, tokens); } } p->ast->body.root = tree; @@ -9230,7 +9362,7 @@ parser_dis_heredoc_end(struct parser_params *p, int line) dis_delayed_token(p, tSTRING_CONTENT); if (p->keep_tokens) { - VALUE str = STR_NEW(p->lex.ptok, p->lex.pend - p->lex.ptok); RUBY_SET_YYLLOC_OF_HEREDOC_END(*p->yylloc); parser_append_tokens(p, str, tHEREDOC_END, line); } @@ -15973,7 +16105,7 @@ parser_initialize(struct parser_params *p) p->error_buffer = Qfalse; p->end_expect_token_locations = NULL; p->token_id = 0; - p->tokens = Qnil; #else p->result = Qnil; p->parsing_thread = Qnil; @@ -16006,7 +16138,6 @@ rb_ruby_parser_mark(void *ptr) #ifndef RIPPER rb_gc_mark(p->debug_lines); rb_gc_mark(p->error_buffer); - rb_gc_mark(p->tokens); #else rb_gc_mark(p->value); rb_gc_mark(p->result); @@ -16028,6 +16159,12 @@ rb_ruby_parser_free(void *ptr) struct parser_params *p = (struct parser_params*)ptr; struct local_vars *local, *prev; if (p->tokenbuf) { ruby_sized_xfree(p->tokenbuf, p->toksiz); } @@ -16145,8 +16282,7 @@ void rb_ruby_parser_keep_tokens(rb_parser_t *p) { p->keep_tokens = 1; - // TODO - p->tokens = rb_ary_new(); } #ifndef UNIVERSAL_PARSER @@ -461,6 +461,27 @@ str_coderange_scan_restartable(const char *s, const char *e, void *enc, int *cr) return rb_str_coderange_scan_restartable(s, e, (rb_encoding *)enc, cr); } VALUE rb_io_gets_internal(VALUE io); extern VALUE rb_eArgError; extern VALUE rb_mRubyVMFrozenCore; @@ -596,6 +617,10 @@ static const rb_parser_config_t rb_global_parser_config = { .encoding_set = encoding_set, .encoding_is_ascii8bit = encoding_is_ascii8bit, .usascii_encoding = usascii_encoding, .ractor_make_shareable = rb_ractor_make_shareable, @@ -189,6 +189,22 @@ typedef struct rb_code_location_struct { rb_code_position_t end_pos; } rb_code_location_t; /* Header part of AST Node */ typedef struct RNode { VALUE flags; @@ -1340,6 +1356,10 @@ typedef struct rb_parser_config_struct { void (*encoding_set)(VALUE obj, int encindex); int (*encoding_is_ascii8bit)(VALUE obj); rb_encoding *(*usascii_encoding)(void); /* Ractor */ VALUE (*ractor_make_shareable)(VALUE obj); @@ -293,6 +293,9 @@ struct rb_imemo_tmpbuf_struct { #define rb_mRubyVMFrozenCore p->config->mRubyVMFrozenCore() #undef rb_long2int #define rb_long2int p->config->long2int #define rb_node_case_when_optimizable_literal p->config->node_case_when_optimizable_literal |