@@ -4152,6 +4152,34 @@ static void thumb_emit_regonly_binop32(IROperand src1, IROperand src2, IROperand
41524152 ScratchRegAlloc rm_alloc = {0 };
41534153 uint32_t exclude = (1u << rd );
41544154
4155+ /* Pre-exclude already-materialized operand registers before any scratch
4156+ * allocation. Without this, get_scratch_reg_with_save() can return a
4157+ * register that is already occupied by the OTHER operand, causing
4158+ * load_to_reg_ir() to clobber it.
4159+ *
4160+ * Bug history: in parse_number() (tccpp.c), the 64-bit multiply
4161+ * n = n * b + t
4162+ * decomposes into cross-term MULs where one operand lives in a register
4163+ * and the other is spilled. Under high register pressure (R9 reserved
4164+ * as GOT pointer), the scratch allocator picked the SAME register for
4165+ * the spilled operand as the materialized one, producing
4166+ * mul.w r0, r3, r3 (b*b = 100)
4167+ * instead of
4168+ * mul.w r0, r2, r3 (n_hi * b)
4169+ */
4170+ if (!(rn == PREG_REG_NONE || src1 .is_lval || thumb_irop_needs_value_load (src1 ) ||
4171+ thumb_irop_has_immediate_value (src1 )))
4172+ {
4173+ if (thumb_is_hw_reg (rn ))
4174+ exclude |= (1u << rn );
4175+ }
4176+ if (!(rm == PREG_REG_NONE || src2 .is_lval || thumb_irop_needs_value_load (src2 ) ||
4177+ thumb_irop_has_immediate_value (src2 )))
4178+ {
4179+ if (thumb_is_hw_reg (rm ))
4180+ exclude |= (1u << rm );
4181+ }
4182+
41554183 if (rn == PREG_REG_NONE || src1 .is_lval || thumb_irop_needs_value_load (src1 ) || thumb_irop_has_immediate_value (src1 ))
41564184 {
41574185 rn_alloc = get_scratch_reg_with_save (exclude );
@@ -4160,10 +4188,6 @@ static void thumb_emit_regonly_binop32(IROperand src1, IROperand src2, IROperand
41604188 IROperand src1_tmp = src1 ;
41614189 load_to_reg_ir (rn , PREG_NONE , src1_tmp );
41624190 }
4163- else
4164- {
4165- thumb_require_materialized_reg (ctx , "src1" , rn );
4166- }
41674191
41684192 if (rm == PREG_REG_NONE || src2 .is_lval || thumb_irop_needs_value_load (src2 ) || thumb_irop_has_immediate_value (src2 ))
41694193 {
@@ -4172,10 +4196,6 @@ static void thumb_emit_regonly_binop32(IROperand src1, IROperand src2, IROperand
41724196 IROperand src2_tmp = src2 ;
41734197 load_to_reg_ir (rm , PREG_NONE , src2_tmp );
41744198 }
4175- else
4176- {
4177- thumb_require_materialized_reg (ctx , "src2" , rm );
4178- }
41794199
41804200 ot_check (emitter ((uint32_t )rd , (uint32_t )rn , (uint32_t )rm ));
41814201
@@ -4222,6 +4242,21 @@ static void thumb_emit_mod32(IROperand src1, IROperand src2, IROperand dest, Tcc
42224242 ScratchRegAlloc quotient_alloc = {0 };
42234243 uint32_t exclude_regs = (1u << dest_reg );
42244244
4245+ /* Pre-exclude already-materialized operand registers before any scratch
4246+ * allocation, same rationale as in thumb_emit_regonly_binop32(). */
4247+ if (!(src1_reg == PREG_REG_NONE || src1 .is_lval || thumb_irop_needs_value_load (src1 ) ||
4248+ thumb_irop_has_immediate_value (src1 )))
4249+ {
4250+ if (thumb_is_hw_reg (src1_reg ))
4251+ exclude_regs |= (1u << src1_reg );
4252+ }
4253+ if (!(src2_reg == PREG_REG_NONE || src2 .is_lval || thumb_irop_needs_value_load (src2 ) ||
4254+ thumb_irop_has_immediate_value (src2 )))
4255+ {
4256+ if (thumb_is_hw_reg (src2_reg ))
4257+ exclude_regs |= (1u << src2_reg );
4258+ }
4259+
42254260 if (src1_reg == PREG_REG_NONE || src1 .is_lval || thumb_irop_needs_value_load (src1 ) ||
42264261 thumb_irop_has_immediate_value (src1 ))
42274262 {
@@ -4231,11 +4266,6 @@ static void thumb_emit_mod32(IROperand src1, IROperand src2, IROperand dest, Tcc
42314266 IROperand src1_tmp = src1 ;
42324267 load_to_reg_ir (src1_reg , PREG_NONE , src1_tmp );
42334268 }
4234- else
4235- {
4236- thumb_require_materialized_reg (ctx , "src1" , src1_reg );
4237- exclude_regs |= (1u << src1_reg );
4238- }
42394269
42404270 if (src2_reg == PREG_REG_NONE || src2 .is_lval || thumb_irop_needs_value_load (src2 ) ||
42414271 thumb_irop_has_immediate_value (src2 ))
@@ -4246,11 +4276,6 @@ static void thumb_emit_mod32(IROperand src1, IROperand src2, IROperand dest, Tcc
42464276 IROperand src2_tmp = src2 ;
42474277 load_to_reg_ir (src2_reg , PREG_NONE , src2_tmp );
42484278 }
4249- else
4250- {
4251- thumb_require_materialized_reg (ctx , "src2" , src2_reg );
4252- exclude_regs |= (1u << src2_reg );
4253- }
42544279
42554280 /* quotient = src1 / src2 */
42564281 quotient_alloc = get_scratch_reg_with_save (exclude_regs );
@@ -4295,6 +4320,21 @@ static void thumb_emit_longmul32x32_to64(IROperand src1, IROperand src2, IROpera
42954320
42964321 uint32_t exclude = 0 ;
42974322
4323+ /* Pre-exclude already-materialized operand registers before any scratch
4324+ * allocation, same rationale as in thumb_emit_regonly_binop32(). */
4325+ if (!(rn == PREG_REG_NONE || src1 .is_lval || thumb_irop_needs_value_load (src1 ) ||
4326+ thumb_irop_has_immediate_value (src1 )))
4327+ {
4328+ if (thumb_is_hw_reg (rn ))
4329+ exclude |= (1u << rn );
4330+ }
4331+ if (!(rm == PREG_REG_NONE || src2 .is_lval || thumb_irop_needs_value_load (src2 ) ||
4332+ thumb_irop_has_immediate_value (src2 )))
4333+ {
4334+ if (thumb_is_hw_reg (rm ))
4335+ exclude |= (1u << rm );
4336+ }
4337+
42984338 if (rn == PREG_REG_NONE || src1 .is_lval || thumb_irop_needs_value_load (src1 ) || thumb_irop_has_immediate_value (src1 ))
42994339 {
43004340 rn_alloc = get_scratch_reg_with_save (exclude );
@@ -4303,12 +4343,6 @@ static void thumb_emit_longmul32x32_to64(IROperand src1, IROperand src2, IROpera
43034343 IROperand src1_tmp = src1 ;
43044344 load_to_reg_ir (rn , PREG_NONE , src1_tmp );
43054345 }
4306- else
4307- {
4308- thumb_require_materialized_reg (ctx , "src1" , rn );
4309- if (thumb_is_hw_reg (rn ))
4310- exclude |= (1u << rn );
4311- }
43124346
43134347 if (rm == PREG_REG_NONE || src2 .is_lval || thumb_irop_needs_value_load (src2 ) || thumb_irop_has_immediate_value (src2 ))
43144348 {
@@ -4318,12 +4352,6 @@ static void thumb_emit_longmul32x32_to64(IROperand src1, IROperand src2, IROpera
43184352 IROperand src2_tmp = src2 ;
43194353 load_to_reg_ir (rm , PREG_NONE , src2_tmp );
43204354 }
4321- else
4322- {
4323- thumb_require_materialized_reg (ctx , "src2" , rm );
4324- if (thumb_is_hw_reg (rm ))
4325- exclude |= (1u << rm );
4326- }
43274355
43284356 ScratchRegAlloc rd_low_alloc = {0 };
43294357 ScratchRegAlloc rd_high_alloc = {0 };
@@ -4691,16 +4719,21 @@ void tcc_gen_machine_data_processing_op(IROperand src1, IROperand src2, IROperan
46914719 }
46924720 }
46934721
4694- /* Ensure all operands are in registers */
4695- if (src1_reg == PREG_REG_NONE || src2_reg == PREG_REG_NONE || accum_reg == PREG_REG_NONE ||
4696- dest_reg == PREG_REG_NONE )
4722+ /* Helper: check if a register is a valid data register (R0-R12, R14/LR).
4723+ * Excludes SP (R13), PC (R15), and PREG_REG_NONE. */
4724+ #define IS_VALID_DATA_REG (r ) ((r) >= 0 && (r) <= 14 && (r) != 13)
4725+
4726+ /* Ensure all operands are in valid data registers.
4727+ * The live interval lookup for accum can return bogus values (e.g. R15/PC)
4728+ * when the accumulator was produced by a MUL that spilled its result. */
4729+ if (!IS_VALID_DATA_REG (src1_reg ) || !IS_VALID_DATA_REG (src2_reg ) || !IS_VALID_DATA_REG (accum_reg ) ||
4730+ !IS_VALID_DATA_REG (dest_reg ))
46974731 {
46984732 /* Fallback: emit MUL then ADD */
46994733 /* First emit MUL: dest = src1 * src2 */
47004734 thumb_emit_mul32 (src1 , src2 , dest , TCCIR_OP_MUL );
47014735 /* Then emit ADD: dest = dest + accum */
4702- /* Use th_add_reg if accum is in a register, otherwise th_add_imm */
4703- if (accum_reg != PREG_REG_NONE )
4736+ if (IS_VALID_DATA_REG (accum_reg ))
47044737 {
47054738 ot_check (th_add_reg ((uint32_t )dest_reg , (uint32_t )dest_reg , (uint32_t )accum_reg , FLAGS_BEHAVIOUR_NOT_IMPORTANT ,
47064739 THUMB_SHIFT_DEFAULT , ENFORCE_ENCODING_NONE ));
@@ -4711,8 +4744,32 @@ void tcc_gen_machine_data_processing_op(IROperand src1, IROperand src2, IROperan
47114744 ot_check (th_add_imm ((uint32_t )dest_reg , (uint32_t )dest_reg , (uint32_t )imm , FLAGS_BEHAVIOUR_NOT_IMPORTANT ,
47124745 ENFORCE_ENCODING_NONE ));
47134746 }
4747+ else
4748+ {
4749+ /* Accumulator is spilled — load from its stack slot via live interval.
4750+ * Cannot use load_to_reg_ir(accum) because the operand pool entry
4751+ * has stale pr0_reg from before register allocation. */
4752+ int loaded = 0 ;
4753+ if (accum_vr >= 0 )
4754+ {
4755+ IRLiveInterval * accum_li = tcc_ir_get_live_interval (ir_state , accum_vr );
4756+ if (accum_li )
4757+ {
4758+ int spill_offset = accum_li -> allocation .offset ;
4759+ ScratchRegAlloc accum_scratch = get_scratch_reg_with_save ((1u << dest_reg ));
4760+ tcc_machine_load_spill_slot (accum_scratch .reg , spill_offset );
4761+ ot_check (th_add_reg ((uint32_t )dest_reg , (uint32_t )dest_reg , (uint32_t )accum_scratch .reg ,
4762+ FLAGS_BEHAVIOUR_NOT_IMPORTANT , THUMB_SHIFT_DEFAULT , ENFORCE_ENCODING_NONE ));
4763+ restore_scratch_reg (& accum_scratch );
4764+ loaded = 1 ;
4765+ }
4766+ }
4767+ if (!loaded )
4768+ tcc_error ("compiler_error: MLA accumulator has no register and no spill slot" );
4769+ }
47144770 return ;
47154771 }
4772+ #undef IS_VALID_DATA_REG
47164773
47174774 /* Emit MLA instruction: th_mla(rd, rn, rm, ra) -> rd = rn * rm + ra */
47184775 /* src1 = rn, src2 = rm, accum = ra, dest = rd */
0 commit comments