diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index f6058c5abe521..a02332e0d39c3 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -1443,24 +1443,6 @@ bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref) } } -static int ir_ref_cmp(const void *p1, const void *p2) -{ - return *(ir_ref*)p1 - *(ir_ref*)p2; -} - -void ir_use_list_sort(ir_ctx *ctx, ir_ref ref) -{ - ir_use_list *use_list; - uint32_t n; - - IR_ASSERT(ref > 0); - use_list = &ctx->use_lists[ref]; - n = use_list->count; - if (n > 1) { - qsort(ctx->use_edges + use_list->refs, n, sizeof(ir_ref), ir_ref_cmp); - } -} - void ir_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref) { int i, j, n, *p, use; diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 5a6718b77c1c1..bdf6b027b9fea 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -720,7 +720,7 @@ get_arg_hints: break; case IR_PARAM: constraints->def_reg = ir_get_param_reg(ctx, ref); - flags = 0; + flags = (constraints->def_reg != IR_REG_NONE) ? IR_USE_SHOULD_BE_IN_REG : 0; break; case IR_PI: case IR_PHI: diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h index 03add75906553..084216a063438 100644 --- a/ext/opcache/jit/ir/ir_builder.h +++ b/ext/opcache/jit/ir/ir_builder.h @@ -654,7 +654,7 @@ void _ir_TAILCALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_re void _ir_TAILCALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4); void _ir_TAILCALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5); void _ir_TAILCALL_6(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5, ir_ref arg6); -ir_ref _ir_TAILCALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args); +void _ir_TAILCALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args); ir_ref _ir_ALLOCA(ir_ctx *ctx, ir_ref size); void _ir_AFREE(ir_ctx *ctx, ir_ref size); ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var); diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 7edb012f61724..1b45eb834ce6a 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -408,8 +408,6 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) } /* Reconstruct IR: Update DEF->USE lists, CFG mapping and etc */ - ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list)); - ctx->cfg_map = ir_mem_realloc(ctx->cfg_map, ctx->insns_count * sizeof(uint32_t)); n = ctx->use_lists[ref].refs; for (i = 0; i < clones_count; i++) { clone = clones[i].ref; @@ -428,6 +426,7 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) uint32_t u = clones[i].use; while (u != (uint32_t)-1) { + uint32_t src = uses[u].block; use = uses[u].ref; ctx->use_edges[n++] = use; u = uses[u].next; @@ -437,9 +436,11 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) ir_ref k, l = insn->inputs_count; if (insn->op == IR_PHI) { - for (k = 1; k <= l; k++) { - if (ir_insn_op(insn, k) == ref) { - j = ctx->cfg_map[ir_insn_op(&ctx->ir_base[insn->op1], k - 1)]; + ir_insn *merge = &ctx->ir_base[insn->op1]; + for (k = 2; k <= l; k++) { + j = ctx->cfg_map[ir_insn_op(merge, k - 1)]; + if (j == src) { + IR_ASSERT(ir_insn_op(insn, k) == ref); if (j != clones[i].block) { uint32_t dom_depth = ctx->cfg_blocks[clones[i].block].dom_depth; while (ctx->cfg_blocks[j].dom_depth > dom_depth) { diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index 96b81a0fcd721..9e3a3a171b46c 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -1047,7 +1047,6 @@ void ir_use_list_remove_one(ir_ctx *ctx, ir_ref def, ir_ref use); void ir_use_list_replace_all(ir_ctx *ctx, ir_ref def, ir_ref use, ir_ref new_use); void ir_use_list_replace_one(ir_ctx *ctx, ir_ref def, ir_ref use, ir_ref new_use); bool ir_use_list_add(ir_ctx *ctx, ir_ref def, ir_ref use); -void ir_use_list_sort(ir_ctx *ctx, ir_ref def); IR_ALWAYS_INLINE ir_ref ir_next_control(const ir_ctx *ctx, ir_ref ref) { @@ -1100,6 +1099,7 @@ void ir_iter_add_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist); void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist); void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist); void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist); +void ir_iter_cleanup(ir_ctx *ctx); /*** IR Basic Blocks info ***/ #define IR_IS_BB_START(op) \ diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index 4a893410d4995..aff9aa7bab3fd 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -3190,6 +3190,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li return IR_REG_NONE; } if (split_pos >= blockPos[reg]) { +try_next_available_register: IR_REGSET_EXCL(available, reg); if (IR_REGSET_IS_EMPTY(available)) { fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n"); @@ -3222,31 +3223,33 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li IR_LOG_LSRA_CONFLICT(" ---- Conflict with active", other, overlap); split_pos = ir_last_use_pos_before(other, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); - if (split_pos == 0) { - split_pos = ival->range.start; - } - split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->range.start, 1); - if (split_pos > other->range.start) { - child = ir_split_interval_at(ctx, other, split_pos); - if (prev) { - prev->list_next = other->list_next; + if (split_pos) { + split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->range.start, 1); + if (split_pos > other->range.start) { + child = ir_split_interval_at(ctx, other, split_pos); + if (prev) { + prev->list_next = other->list_next; + } else { + *active = other->list_next; + } + IR_LOG_LSRA(" ---- Finish", other, ""); } else { - *active = other->list_next; + goto try_next_available_register; } - IR_LOG_LSRA(" ---- Finish", other, ""); } else { child = other; - other->reg = IR_REG_NONE; - if (prev) { - prev->list_next = other->list_next; - } else { - *active = other->list_next; - } - IR_LOG_LSRA(" ---- Spill and Finish", other, " (it must not be in reg)"); } split_pos = ir_first_use_pos_after(child, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; // TODO: ??? if (split_pos > child->range.start && split_pos < child->end) { + if (child == other) { + other->reg = IR_REG_NONE; + if (prev) { + prev->list_next = other->list_next; + } else { + *active = other->list_next; + } + } ir_live_pos opt_split_pos = ir_find_optimal_split_position(ctx, child, ival->range.start, split_pos, 1); if (opt_split_pos > child->range.start) { split_pos = opt_split_pos; @@ -3259,6 +3262,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li // TODO: this may cause endless loop ir_add_to_unhandled(unhandled, child); IR_LOG_LSRA(" ---- Queue", child, ""); + } else { + goto try_next_available_register; } } break; diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 6478ec6975629..921790fd92bda 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -603,7 +603,7 @@ static IR_NEVER_INLINE void ir_sccp_analyze(const ir_ctx *ctx, ir_sccp_val *_val bool may_benefit = 0; bool has_top = 0; - if (_values[i].op != IR_TOP) { + if (_values[i].op != IR_TOP || insn->op == IR_COPY) { may_benefit = 1; } @@ -987,6 +987,7 @@ static void ir_sccp_remove_if(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref re insn->optx = IR_OPTX(IR_END, IR_VOID, 1); next_insn = &ctx->ir_base[dst]; next_insn->op = IR_BEGIN; + next_insn->op2 = IR_UNUSED; } } @@ -2726,7 +2727,16 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re } return 1; - } else if (cond->op != IR_OVERFLOW && insn->op2 <= cond_ref && insn->op3 <= cond_ref) { + } else if (insn->op2 <= cond_ref && insn->op3 <= cond_ref + && cond->op != IR_OVERFLOW + // TODO: temporary disable IF-conversion for RLOAD. + // We don't track anti-dependencies in GCM and Local Scheduling. + // As result COND may be scheduled below the following RSTORE. + // See: https://github.com/dstogov/ir/issues/132 + && cond->op != IR_RLOAD + && !((cond->op >= IR_EQ && cond->op <= IR_UNORDERED) + && ((!IR_IS_CONST_REF(cond->op1) && ctx->ir_base[cond->op1].op == IR_RLOAD) + || (!IR_IS_CONST_REF(cond->op2) && ctx->ir_base[cond->op2].op == IR_RLOAD)))) { /* COND * * prev prev @@ -2968,9 +2978,11 @@ static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); if_false->op1 = end1_ref; + if_false->op2 = IR_UNUSED; if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); if_true->op1 = end2_ref; + if_true->op2 = IR_UNUSED; ir_bitqueue_add(worklist, if_false_ref); ir_bitqueue_add(worklist, if_true_ref); @@ -3008,6 +3020,7 @@ static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue if_true->optx = IR_BEGIN; if_true->op1 = IR_UNUSED; + if_true->op2 = IR_UNUSED; ctx->flags2 &= ~IR_CFG_REACHABLE; @@ -3157,9 +3170,11 @@ static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqu if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); if_false->op1 = end1_ref; + if_false->op2 = IR_UNUSED; if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); if_true->op1 = end2_ref; + if_true->op2 = IR_UNUSED; ir_bitqueue_add(worklist, if_false_ref); ir_bitqueue_add(worklist, if_true_ref); @@ -3201,6 +3216,7 @@ static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqu if_true->optx = IR_BEGIN; if_true->op1 = IR_UNUSED; + if_true->op2 = IR_UNUSED; ctx->flags2 &= ~IR_CFG_REACHABLE; @@ -3487,7 +3503,9 @@ static void ir_iter_optimize_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqu if_true = &ctx->ir_base[if_true_ref]; if_false = &ctx->ir_base[if_false_ref]; if_true->op = IR_BEGIN; + if_true->op2 = IR_UNUSED; if_false->op = IR_BEGIN; + if_false->op2 = IR_UNUSED; if (ir_ref_is_true(ctx, condition)) { if_false->op1 = IR_UNUSED; ir_use_list_remove_one(ctx, ref, if_false_ref); @@ -3750,6 +3768,47 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) } } +void ir_iter_cleanup(ir_ctx *ctx) +{ + ir_bitqueue iter_worklist; + ir_bitqueue cfg_worklist; + ir_ref i, n; + ir_insn *insn; + + ir_bitqueue_init(&cfg_worklist, ctx->insns_count); + ir_bitqueue_init(&iter_worklist, ctx->insns_count); + + /* Remove unused nodes */ + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + if (IR_IS_FOLDABLE_OP(insn->op)) { + if (insn->op != IR_NOP && ctx->use_lists[i].count == 0) { + ir_iter_remove_insn(ctx, i, &iter_worklist); + } + } else if (insn->op == IR_IF || insn->op == IR_MERGE) { + ir_bitqueue_add(&cfg_worklist, i); + } + n = insn->inputs_count; + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + + while ((i = ir_bitqueue_pop(&iter_worklist)) >= 0) { + insn = &ctx->ir_base[i]; + if (IR_IS_FOLDABLE_OP(insn->op)) { + if (ctx->use_lists[i].count == 0) { + ir_iter_remove_insn(ctx, i, &iter_worklist); + } + } + } + + /* Cleanup Control Flow */ + ir_iter_opt(ctx, &cfg_worklist); + + ir_bitqueue_free(&iter_worklist); + ir_bitqueue_free(&cfg_worklist); +} + int ir_sccp(ir_ctx *ctx) { ir_bitqueue sccp_worklist, iter_worklist; diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 049c341cc8fe3..9cd41c37ffef6 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1666,7 +1666,7 @@ get_arg_hints: break; case IR_PARAM: constraints->def_reg = ir_get_param_reg(ctx, ref); - flags = 0; + flags = (constraints->def_reg != IR_REG_NONE) ? IR_USE_SHOULD_BE_IN_REG : 0; break; case IR_PI: case IR_PHI: