@@ -49,11 +49,34 @@ asm (
4949" .popsection ;"
5050);
5151
52- void bpf_jit_build_fentry_stubs (u32 * image , struct codegen_context * ctx )
52+ void bpf_jit_build_fentry_stubs (u32 * image , u32 * fimage , struct codegen_context * ctx )
5353{
5454 int ool_stub_idx , long_branch_stub_idx ;
5555
5656 /*
57+ * In the final pass, align the mis-aligned dummy tramp address
58+ * in the fimage. The alignment NOP must appear before OOL stub,
59+ * to make ool_stub_idx & long_branch_stub_idx constant from end.
60+ *
61+ * Need alignment NOP in following conditions:
62+ *
63+ * OOL stub aligned CONFIG_PPC_FTRACE_OUT_OF_LINE Alignment NOP
64+ * Y Y N
65+ * Y N Y
66+ * N Y Y
67+ * N N N
68+ */
69+ #ifdef CONFIG_PPC64
70+ if (fimage && image ) {
71+ unsigned long pc = (unsigned long )fimage + CTX_NIA (ctx );
72+
73+ if (IS_ALIGNED (pc , 8 ) ^
74+ IS_ENABLED (CONFIG_PPC_FTRACE_OUT_OF_LINE ))
75+ EMIT (PPC_RAW_NOP ());
76+ }
77+ #endif
78+
79+ /* nop // optional, for alignment of dummy_tramp_addr
5780 * Out-of-line stub:
5881 * mflr r0
5982 * [b|bl] tramp
@@ -70,26 +93,36 @@ void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx)
7093
7194 /*
7295 * Long branch stub:
73- * .long <dummy_tramp_addr>
7496 * mflr r11
7597 * bcl 20,31,$+4
76- * mflr r12
77- * ld r12, -8-SZL (r12)
98+ * mflr r12 // lr/r12 stores current pc
99+ * ld r12, 20 (r12) // offset(dummy_tramp_addr) from prev inst. is 20
78100 * mtctr r12
79- * mtlr r11 // needed to retain ftrace ABI
101+ * mtlr r11 // needed to retain ftrace ABI
80102 * bctr
103+ * nop // for alignment of following address field
104+ * .long <dummy_tramp_addr> // 8-byte aligned
81105 */
82- if (image )
83- * ((unsigned long * )& image [ctx -> idx ]) = (unsigned long )dummy_tramp ;
84- ctx -> idx += SZL / 4 ;
85106 long_branch_stub_idx = ctx -> idx ;
86107 EMIT (PPC_RAW_MFLR (_R11 ));
87108 EMIT (PPC_RAW_BCL4 ());
88109 EMIT (PPC_RAW_MFLR (_R12 ));
89- EMIT (PPC_RAW_LL (_R12 , _R12 , -8 - SZL ));
110+ EMIT (PPC_RAW_LL (_R12 , _R12 , 20 ));
90111 EMIT (PPC_RAW_MTCTR (_R12 ));
91112 EMIT (PPC_RAW_MTLR (_R11 ));
92113 EMIT (PPC_RAW_BCTR ());
114+ /*
115+ * The start of Long branch stub is guaranteed to be aligned as
116+ * result of optional NOP injection before OOL stub above.
117+ * Append tail NOP to re-gain 8-byte alignment disturbed by odd
118+ * instruction count in Long branch stub.
119+ */
120+ EMIT (PPC_RAW_NOP ());
121+
122+ if (image )
123+ * ((unsigned long * )& image [ctx -> idx ]) = (unsigned long )dummy_tramp ;
124+
125+ ctx -> idx += SZL / 4 ;
93126
94127 if (!bpf_jit_ool_stub ) {
95128 bpf_jit_ool_stub = (ctx -> idx - ool_stub_idx ) * 4 ;
@@ -107,7 +140,7 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
107140 PPC_JMP (ctx -> alt_exit_addr );
108141 } else {
109142 ctx -> alt_exit_addr = ctx -> idx * 4 ;
110- bpf_jit_build_epilogue (image , ctx );
143+ bpf_jit_build_epilogue (image , NULL , ctx );
111144 }
112145
113146 return 0 ;
@@ -308,7 +341,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
308341 */
309342 bpf_jit_build_prologue (NULL , & cgctx );
310343 addrs [fp -> len ] = cgctx .idx * 4 ;
311- bpf_jit_build_epilogue (NULL , & cgctx );
344+ bpf_jit_build_epilogue (NULL , NULL , & cgctx );
312345
313346 fixup_len = fp -> aux -> num_exentries * BPF_FIXUP_LEN * 4 ;
314347 extable_len = fp -> aux -> num_exentries * sizeof (struct exception_table_entry );
@@ -343,7 +376,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
343376 fp = org_fp ;
344377 goto out_addrs ;
345378 }
346- bpf_jit_build_epilogue (code_base , & cgctx );
379+ bpf_jit_build_epilogue (code_base , fcode_base , & cgctx );
347380
348381 if (bpf_jit_enable > 1 )
349382 pr_info ("Pass %d: shrink = %d, seen = 0x%x\n" , pass ,
@@ -1280,21 +1313,23 @@ static void do_isync(void *info __maybe_unused)
12801313 * bpf_func:
12811314 * [nop|b] ool_stub
12821315 * 2. Out-of-line stub:
1316+ * nop // optional nop for alignment
12831317 * ool_stub:
12841318 * mflr r0
12851319 * [b|bl] <bpf_prog>/<long_branch_stub>
12861320 * mtlr r0 // CONFIG_PPC_FTRACE_OUT_OF_LINE only
12871321 * b bpf_func + 4
12881322 * 3. Long branch stub:
12891323 * long_branch_stub:
1290- * .long <branch_addr>/<dummy_tramp>
12911324 * mflr r11
12921325 * bcl 20,31,$+4
12931326 * mflr r12
12941327 * ld r12, -16(r12)
12951328 * mtctr r12
12961329 * mtlr r11 // needed to retain ftrace ABI
12971330 * bctr
1331+ * nop // nop for mem alignment of dummy_tramp_addr
1332+ * .long <branch_addr>/<dummy_tramp>
12981333 *
12991334 * dummy_tramp is used to reduce synchronization requirements.
13001335 *
@@ -1396,10 +1431,12 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
13961431 * 1. Update the address in the long branch stub:
13971432 * If new_addr is out of range, we will have to use the long branch stub, so patch new_addr
13981433 * here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here.
1434+ *
1435+ * dummy_tramp_addr moved to bottom of long branch stub.
13991436 */
14001437 if ((new_addr && !is_offset_in_branch_range (new_addr - ip )) ||
14011438 (old_addr && !is_offset_in_branch_range (old_addr - ip )))
1402- ret = patch_ulong ((void * )(bpf_func_end - bpf_jit_long_branch_stub - SZL ),
1439+ ret = patch_ulong ((void * )(bpf_func_end - SZL ), /* SZL: dummy_tramp_addr offset */
14031440 (new_addr && !is_offset_in_branch_range (new_addr - ip )) ?
14041441 (unsigned long )new_addr : (unsigned long )dummy_tramp );
14051442 if (ret )
0 commit comments