Skip to content

Commit 5154573

Browse files
committed
fixed few bugs
1 parent 090b614 commit 5154573

33 files changed

Lines changed: 1935 additions & 438 deletions

arm-thumb-gen.c

Lines changed: 92 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4152,6 +4152,34 @@ static void thumb_emit_regonly_binop32(IROperand src1, IROperand src2, IROperand
41524152
ScratchRegAlloc rm_alloc = {0};
41534153
uint32_t exclude = (1u << rd);
41544154

4155+
/* Pre-exclude already-materialized operand registers before any scratch
4156+
* allocation. Without this, get_scratch_reg_with_save() can return a
4157+
* register that is already occupied by the OTHER operand, causing
4158+
* load_to_reg_ir() to clobber it.
4159+
*
4160+
* Bug history: in parse_number() (tccpp.c), the 64-bit multiply
4161+
* n = n * b + t
4162+
* decomposes into cross-term MULs where one operand lives in a register
4163+
* and the other is spilled. Under high register pressure (R9 reserved
4164+
* as GOT pointer), the scratch allocator picked the SAME register for
4165+
* the spilled operand as the materialized one, producing
4166+
* mul.w r0, r3, r3 (b*b = 100)
4167+
* instead of
4168+
* mul.w r0, r2, r3 (n_hi * b)
4169+
*/
4170+
if (!(rn == PREG_REG_NONE || src1.is_lval || thumb_irop_needs_value_load(src1) ||
4171+
thumb_irop_has_immediate_value(src1)))
4172+
{
4173+
if (thumb_is_hw_reg(rn))
4174+
exclude |= (1u << rn);
4175+
}
4176+
if (!(rm == PREG_REG_NONE || src2.is_lval || thumb_irop_needs_value_load(src2) ||
4177+
thumb_irop_has_immediate_value(src2)))
4178+
{
4179+
if (thumb_is_hw_reg(rm))
4180+
exclude |= (1u << rm);
4181+
}
4182+
41554183
if (rn == PREG_REG_NONE || src1.is_lval || thumb_irop_needs_value_load(src1) || thumb_irop_has_immediate_value(src1))
41564184
{
41574185
rn_alloc = get_scratch_reg_with_save(exclude);
@@ -4160,10 +4188,6 @@ static void thumb_emit_regonly_binop32(IROperand src1, IROperand src2, IROperand
41604188
IROperand src1_tmp = src1;
41614189
load_to_reg_ir(rn, PREG_NONE, src1_tmp);
41624190
}
4163-
else
4164-
{
4165-
thumb_require_materialized_reg(ctx, "src1", rn);
4166-
}
41674191

41684192
if (rm == PREG_REG_NONE || src2.is_lval || thumb_irop_needs_value_load(src2) || thumb_irop_has_immediate_value(src2))
41694193
{
@@ -4172,10 +4196,6 @@ static void thumb_emit_regonly_binop32(IROperand src1, IROperand src2, IROperand
41724196
IROperand src2_tmp = src2;
41734197
load_to_reg_ir(rm, PREG_NONE, src2_tmp);
41744198
}
4175-
else
4176-
{
4177-
thumb_require_materialized_reg(ctx, "src2", rm);
4178-
}
41794199

41804200
ot_check(emitter((uint32_t)rd, (uint32_t)rn, (uint32_t)rm));
41814201

@@ -4222,6 +4242,21 @@ static void thumb_emit_mod32(IROperand src1, IROperand src2, IROperand dest, Tcc
42224242
ScratchRegAlloc quotient_alloc = {0};
42234243
uint32_t exclude_regs = (1u << dest_reg);
42244244

4245+
/* Pre-exclude already-materialized operand registers before any scratch
4246+
* allocation, same rationale as in thumb_emit_regonly_binop32(). */
4247+
if (!(src1_reg == PREG_REG_NONE || src1.is_lval || thumb_irop_needs_value_load(src1) ||
4248+
thumb_irop_has_immediate_value(src1)))
4249+
{
4250+
if (thumb_is_hw_reg(src1_reg))
4251+
exclude_regs |= (1u << src1_reg);
4252+
}
4253+
if (!(src2_reg == PREG_REG_NONE || src2.is_lval || thumb_irop_needs_value_load(src2) ||
4254+
thumb_irop_has_immediate_value(src2)))
4255+
{
4256+
if (thumb_is_hw_reg(src2_reg))
4257+
exclude_regs |= (1u << src2_reg);
4258+
}
4259+
42254260
if (src1_reg == PREG_REG_NONE || src1.is_lval || thumb_irop_needs_value_load(src1) ||
42264261
thumb_irop_has_immediate_value(src1))
42274262
{
@@ -4231,11 +4266,6 @@ static void thumb_emit_mod32(IROperand src1, IROperand src2, IROperand dest, Tcc
42314266
IROperand src1_tmp = src1;
42324267
load_to_reg_ir(src1_reg, PREG_NONE, src1_tmp);
42334268
}
4234-
else
4235-
{
4236-
thumb_require_materialized_reg(ctx, "src1", src1_reg);
4237-
exclude_regs |= (1u << src1_reg);
4238-
}
42394269

42404270
if (src2_reg == PREG_REG_NONE || src2.is_lval || thumb_irop_needs_value_load(src2) ||
42414271
thumb_irop_has_immediate_value(src2))
@@ -4246,11 +4276,6 @@ static void thumb_emit_mod32(IROperand src1, IROperand src2, IROperand dest, Tcc
42464276
IROperand src2_tmp = src2;
42474277
load_to_reg_ir(src2_reg, PREG_NONE, src2_tmp);
42484278
}
4249-
else
4250-
{
4251-
thumb_require_materialized_reg(ctx, "src2", src2_reg);
4252-
exclude_regs |= (1u << src2_reg);
4253-
}
42544279

42554280
/* quotient = src1 / src2 */
42564281
quotient_alloc = get_scratch_reg_with_save(exclude_regs);
@@ -4295,6 +4320,21 @@ static void thumb_emit_longmul32x32_to64(IROperand src1, IROperand src2, IROpera
42954320

42964321
uint32_t exclude = 0;
42974322

4323+
/* Pre-exclude already-materialized operand registers before any scratch
4324+
* allocation, same rationale as in thumb_emit_regonly_binop32(). */
4325+
if (!(rn == PREG_REG_NONE || src1.is_lval || thumb_irop_needs_value_load(src1) ||
4326+
thumb_irop_has_immediate_value(src1)))
4327+
{
4328+
if (thumb_is_hw_reg(rn))
4329+
exclude |= (1u << rn);
4330+
}
4331+
if (!(rm == PREG_REG_NONE || src2.is_lval || thumb_irop_needs_value_load(src2) ||
4332+
thumb_irop_has_immediate_value(src2)))
4333+
{
4334+
if (thumb_is_hw_reg(rm))
4335+
exclude |= (1u << rm);
4336+
}
4337+
42984338
if (rn == PREG_REG_NONE || src1.is_lval || thumb_irop_needs_value_load(src1) || thumb_irop_has_immediate_value(src1))
42994339
{
43004340
rn_alloc = get_scratch_reg_with_save(exclude);
@@ -4303,12 +4343,6 @@ static void thumb_emit_longmul32x32_to64(IROperand src1, IROperand src2, IROpera
43034343
IROperand src1_tmp = src1;
43044344
load_to_reg_ir(rn, PREG_NONE, src1_tmp);
43054345
}
4306-
else
4307-
{
4308-
thumb_require_materialized_reg(ctx, "src1", rn);
4309-
if (thumb_is_hw_reg(rn))
4310-
exclude |= (1u << rn);
4311-
}
43124346

43134347
if (rm == PREG_REG_NONE || src2.is_lval || thumb_irop_needs_value_load(src2) || thumb_irop_has_immediate_value(src2))
43144348
{
@@ -4318,12 +4352,6 @@ static void thumb_emit_longmul32x32_to64(IROperand src1, IROperand src2, IROpera
43184352
IROperand src2_tmp = src2;
43194353
load_to_reg_ir(rm, PREG_NONE, src2_tmp);
43204354
}
4321-
else
4322-
{
4323-
thumb_require_materialized_reg(ctx, "src2", rm);
4324-
if (thumb_is_hw_reg(rm))
4325-
exclude |= (1u << rm);
4326-
}
43274355

43284356
ScratchRegAlloc rd_low_alloc = {0};
43294357
ScratchRegAlloc rd_high_alloc = {0};
@@ -4691,16 +4719,21 @@ void tcc_gen_machine_data_processing_op(IROperand src1, IROperand src2, IROperan
46914719
}
46924720
}
46934721

4694-
/* Ensure all operands are in registers */
4695-
if (src1_reg == PREG_REG_NONE || src2_reg == PREG_REG_NONE || accum_reg == PREG_REG_NONE ||
4696-
dest_reg == PREG_REG_NONE)
4722+
/* Helper: check if a register is a valid data register (R0-R12, R14/LR).
4723+
* Excludes SP (R13), PC (R15), and PREG_REG_NONE. */
4724+
#define IS_VALID_DATA_REG(r) ((r) >= 0 && (r) <= 14 && (r) != 13)
4725+
4726+
/* Ensure all operands are in valid data registers.
4727+
* The live interval lookup for accum can return bogus values (e.g. R15/PC)
4728+
* when the accumulator was produced by a MUL that spilled its result. */
4729+
if (!IS_VALID_DATA_REG(src1_reg) || !IS_VALID_DATA_REG(src2_reg) || !IS_VALID_DATA_REG(accum_reg) ||
4730+
!IS_VALID_DATA_REG(dest_reg))
46974731
{
46984732
/* Fallback: emit MUL then ADD */
46994733
/* First emit MUL: dest = src1 * src2 */
47004734
thumb_emit_mul32(src1, src2, dest, TCCIR_OP_MUL);
47014735
/* Then emit ADD: dest = dest + accum */
4702-
/* Use th_add_reg if accum is in a register, otherwise th_add_imm */
4703-
if (accum_reg != PREG_REG_NONE)
4736+
if (IS_VALID_DATA_REG(accum_reg))
47044737
{
47054738
ot_check(th_add_reg((uint32_t)dest_reg, (uint32_t)dest_reg, (uint32_t)accum_reg, FLAGS_BEHAVIOUR_NOT_IMPORTANT,
47064739
THUMB_SHIFT_DEFAULT, ENFORCE_ENCODING_NONE));
@@ -4711,8 +4744,32 @@ void tcc_gen_machine_data_processing_op(IROperand src1, IROperand src2, IROperan
47114744
ot_check(th_add_imm((uint32_t)dest_reg, (uint32_t)dest_reg, (uint32_t)imm, FLAGS_BEHAVIOUR_NOT_IMPORTANT,
47124745
ENFORCE_ENCODING_NONE));
47134746
}
4747+
else
4748+
{
4749+
/* Accumulator is spilled — load from its stack slot via live interval.
4750+
* Cannot use load_to_reg_ir(accum) because the operand pool entry
4751+
* has stale pr0_reg from before register allocation. */
4752+
int loaded = 0;
4753+
if (accum_vr >= 0)
4754+
{
4755+
IRLiveInterval *accum_li = tcc_ir_get_live_interval(ir_state, accum_vr);
4756+
if (accum_li)
4757+
{
4758+
int spill_offset = accum_li->allocation.offset;
4759+
ScratchRegAlloc accum_scratch = get_scratch_reg_with_save((1u << dest_reg));
4760+
tcc_machine_load_spill_slot(accum_scratch.reg, spill_offset);
4761+
ot_check(th_add_reg((uint32_t)dest_reg, (uint32_t)dest_reg, (uint32_t)accum_scratch.reg,
4762+
FLAGS_BEHAVIOUR_NOT_IMPORTANT, THUMB_SHIFT_DEFAULT, ENFORCE_ENCODING_NONE));
4763+
restore_scratch_reg(&accum_scratch);
4764+
loaded = 1;
4765+
}
4766+
}
4767+
if (!loaded)
4768+
tcc_error("compiler_error: MLA accumulator has no register and no spill slot");
4769+
}
47144770
return;
47154771
}
4772+
#undef IS_VALID_DATA_REG
47164773

47174774
/* Emit MLA instruction: th_mla(rd, rn, rm, ra) -> rd = rn * rm + ra */
47184775
/* src1 = rn, src2 = rm, accum = ra, dest = rd */

bin/armv8m-tcc.elf

-6.02 KB
Binary file not shown.

ir/operand.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -467,10 +467,10 @@ IROperand svalue_to_iroperand(TCCIRState *ir, const SValue *sv)
467467

468468
if (tag == IROP_TAG_STACKOFF)
469469
{
470-
/* Stack offset: store offset/4 in aux_data (assumes 4-byte aligned, ±128KB range) */
470+
/* Stack offset: store directly in aux_data (±32KB range) */
471471
int32_t offset = result.u.imm32;
472472
result.u.s.ctype_idx = (uint16_t)ctype_idx;
473-
result.u.s.aux_data = (int16_t)(offset >> 2); /* offset/4 to fit in 16 bits */
473+
result.u.s.aux_data = (int16_t)offset; /* store offset directly, no alignment assumption */
474474
}
475475
else if (tag == IROP_TAG_SYMREF)
476476
{
@@ -550,9 +550,9 @@ void iroperand_to_svalue(const TCCIRState *ir, IROperand op, SValue *out)
550550
/* Restore VT_PARAM from explicit is_param flag */
551551
if (op.is_param)
552552
out->r |= VT_PARAM;
553-
/* For STRUCT types, offset is stored in aux_data * 4 */
553+
/* For STRUCT types, offset is stored directly in aux_data */
554554
if (irop_bt == IROP_BTYPE_STRUCT)
555-
out->c.i = (int64_t)op.u.s.aux_data << 2; /* aux_data * 4 */
555+
out->c.i = (int64_t)op.u.s.aux_data; /* offset stored directly */
556556
else
557557
out->c.i = (int64_t)op.u.imm32; /* stack offset stored in imm32 */
558558
break;

ir/operand.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ typedef struct __attribute__((packed)) IROperand
9999
struct
100100
{ /* for STRUCT types - split encoding */
101101
uint16_t ctype_idx; /* index into pool_ctype (lower 16 bits) */
102-
int16_t aux_data; /* aux: stack offset/4 for STACKOFF, symref_idx for SYMREF */
102+
int16_t aux_data; /* aux: stack offset for STACKOFF, symref_idx for SYMREF */
103103
} s;
104104
} u;
105105
/* Physical register allocation (filled by register allocator for codegen) */
@@ -221,9 +221,9 @@ static inline int64_t irop_get_imm64_ex(const struct TCCIRState *ir, IROperand o
221221
/* Sign-extend 32-bit immediate to 64-bit */
222222
return (int64_t)op.u.imm32;
223223
case IROP_TAG_STACKOFF:
224-
/* For STRUCT types, offset is in aux_data * 4; otherwise in imm32 */
224+
/* For STRUCT types, offset is stored directly in aux_data; otherwise in imm32 */
225225
if (op.btype == IROP_BTYPE_STRUCT)
226-
return (int64_t)((int32_t)op.u.s.aux_data << 2);
226+
return (int64_t)((int32_t)op.u.s.aux_data);
227227
return (int64_t)op.u.imm32;
228228
case IROP_TAG_I64:
229229
/* Look up in pool */
@@ -495,7 +495,7 @@ static inline int irop_has_vreg(const IROperand op)
495495
static inline int32_t irop_get_stack_offset(const IROperand op)
496496
{
497497
if (op.btype == IROP_BTYPE_STRUCT)
498-
return (int32_t)op.u.s.aux_data << 2; /* Stored as offset/4 */
498+
return (int32_t)op.u.s.aux_data; /* Stored directly */
499499
return op.u.imm32;
500500
}
501501

0 commit comments

Comments
 (0)