@@ -152,6 +152,50 @@ def _find_matching_brace(text: str, open_brace_index: int) -> Optional[int]:
152152 return None
153153
154154
155+ def _find_matching_paren (text : str , open_paren_index : int ) -> Optional [int ]:
156+ depth = 0
157+ for idx in range (open_paren_index , len (text )):
158+ ch = text [idx ]
159+ if ch == "(" :
160+ depth += 1
161+ elif ch == ")" :
162+ depth -= 1
163+ if depth == 0 :
164+ return idx
165+ return None
166+
167+
168+ def _split_top_level (text : str , sep : str ) -> list [str ]:
169+ parts = []
170+ start = 0
171+ paren_depth = 0
172+ brace_depth = 0
173+ bracket_depth = 0
174+ for idx , ch in enumerate (text ):
175+ if ch == "(" :
176+ paren_depth += 1
177+ elif ch == ")" :
178+ paren_depth = max (paren_depth - 1 , 0 )
179+ elif ch == "{" :
180+ brace_depth += 1
181+ elif ch == "}" :
182+ brace_depth = max (brace_depth - 1 , 0 )
183+ elif ch == "[" :
184+ bracket_depth += 1
185+ elif ch == "]" :
186+ bracket_depth = max (bracket_depth - 1 , 0 )
187+ elif (
188+ ch == sep
189+ and paren_depth == 0
190+ and brace_depth == 0
191+ and bracket_depth == 0
192+ ):
193+ parts .append (text [start :idx ].strip ())
194+ start = idx + 1
195+ parts .append (text [start :].strip ())
196+ return parts
197+
198+
155199def _extract_function_body (function_text : str ) -> str :
156200 brace_index = function_text .find ("{" )
157201 if brace_index < 0 :
@@ -907,6 +951,7 @@ def _infer_aicore_arch(kernel_text: str, soc_version: str) -> str:
907951 # IMPORTANT: the default arch depends on the Ascend SoC.
908952 has_mix_macros = "__DAV_CUBE__" in kernel_text and "__DAV_VEC__" in kernel_text
909953 has_intra_block_sync = "set_intra_block(" in kernel_text or "wait_intra_block(" in kernel_text
954+ has_mixed_section_sync = has_mix_macros and has_intra_block_sync
910955 cube_markers = (
911956 "TileType::Mat" ,
912957 "TileType::Left" ,
@@ -926,15 +971,16 @@ def _infer_aicore_arch(kernel_text: str, soc_version: str) -> str:
926971
927972 sv = (soc_version or "" ).lower ()
928973 if "950" in sv or "a5" in sv :
929- # Only inter-core mixed kernels (with intra-block sync intrinsics)
930- # require true mix arch. Generic sectioned kernels should keep vec arch.
931- if has_mix_macros and has_intra_block_sync :
974+ # Sectioned kernels that synchronize across DAV cube/vector regions
975+ # need PTO-ISA's mixed-kernel compile mode so the toolchain chooses
976+ # the correct pipe restrictions and DAV macro ownership.
977+ if has_mixed_section_sync :
932978 return "dav-c310"
933979 # Ascend950 (A5) uses A5 instruction set. pto-isa examples build A5
934980 # kernels with dav-c310-{vec|cube}.
935981 return "dav-c310-cube" if needs_cube else "dav-c310-vec"
936982 if "910b" in sv :
937- if has_mix_macros and has_intra_block_sync :
983+ if has_mixed_section_sync :
938984 return "dav-c310"
939985 # Ascend910B* (e.g. Ascend910B1) uses dav-c310 toolchain arch.
940986 return "dav-c310-cube" if needs_cube else "dav-c310-vec"
@@ -1127,15 +1173,30 @@ def _infer_int_var_maxima(kernel_text: str, seed_env: Optional[dict] = None) ->
11271173 assigns .append ((name , expr ))
11281174
11291175 loops = []
1130- for m in re .finditer (
1131- r"for\s*\(\s*(?:unsigned|int|long|size_t|int(?:8|16|32|64)_t|uint(?:8|16|32|64)_t)\s+(\w+)\s*=\s*([^;]+?)\s*;\s*\1\s*<\s*([^;]+?)\s*;\s*\1\s*\+=\s*([^)]+?)\s*\)" ,
1132- kernel_text ,
1133- ):
1134- ind = m .group (1 )
1135- start = m .group (2 ).strip ()
1136- end = m .group (3 ).strip ()
1137- step = m .group (4 ).strip ()
1138- loops .append ((ind , start , end , step ))
1176+ for m in re .finditer (r"\bfor\s*\(" , kernel_text ):
1177+ open_paren = kernel_text .find ("(" , m .start ())
1178+ if open_paren < 0 :
1179+ continue
1180+ close_paren = _find_matching_paren (kernel_text , open_paren )
1181+ if close_paren is None :
1182+ continue
1183+ header = kernel_text [open_paren + 1 :close_paren ]
1184+ parts = _split_top_level (header , ";" )
1185+ if len (parts ) != 3 :
1186+ continue
1187+ init , cond , step = parts
1188+ init_m = re .match (
1189+ r"^\s*(?:unsigned|int|long|size_t|int(?:8|16|32|64)_t|uint(?:8|16|32|64)_t)\s+(\w+)\s*=\s*(.+?)\s*$" ,
1190+ init ,
1191+ )
1192+ if not init_m :
1193+ continue
1194+ ind = init_m .group (1 )
1195+ cond_m = re .match (rf"^\s*{ re .escape (ind )} \s*<\s*(.+?)\s*$" , cond )
1196+ step_m = re .match (rf"^\s*{ re .escape (ind )} \s*\+=\s*(.+?)\s*$" , step )
1197+ if not cond_m or not step_m :
1198+ continue
1199+ loops .append ((ind , init_m .group (2 ).strip (), cond_m .group (1 ).strip (), step_m .group (1 ).strip ()))
11391200
11401201 maxima : dict [str , Optional [int ]] = {
11411202 k : (None if v is None else int (v ))
@@ -1403,6 +1464,9 @@ def generate_testcase(
14031464 has_dav_cube = "__DAV_CUBE__" in raw_kernel
14041465 has_dav_vec = "__DAV_VEC__" in raw_kernel
14051466 has_intra_block_sync = "set_intra_block(" in raw_kernel or "wait_intra_block(" in raw_kernel
1467+ has_mixed_section_sync = has_dav_cube and has_dav_vec and has_intra_block_sync
1468+ has_cube_only_section = has_dav_cube and not has_dav_vec
1469+ has_vec_only_section = has_dav_vec and not has_dav_cube
14061470
14071471 is_mixed_kernel = kernel_info ["kind" ] == "mixed"
14081472
@@ -1414,20 +1478,37 @@ def generate_testcase(
14141478 else :
14151479 aicore_arch = "dav-c220"
14161480 # Sectioned kernels contain `#if defined(__DAV_CUBE__)` / `__DAV_VEC__`
1417- # blocks. For inter-core-style mixed kernels (with intra-block sync),
1418- # align to PTO-ISA mix-kernel compile mode (`dav-c310`) so the
1419- # toolchain owns DAV macro definition.
1420- elif has_dav_cube and has_dav_vec and has_intra_block_sync :
1481+ # blocks. If they also carry explicit pipe synchronization, align to
1482+ # PTO-ISA mix-kernel compile mode (`dav-c310`) so the toolchain owns
1483+ # DAV macro definition and pipe legality checks .
1484+ elif has_mixed_section_sync :
14211485 sv = (soc_version or "" ).lower ()
14221486 if "950" in sv or "a5" in sv :
14231487 aicore_arch = "dav-c310"
14241488 elif "910b" in sv :
14251489 aicore_arch = "dav-c310"
14261490 else :
14271491 aicore_arch = "dav-c220"
1492+ elif has_cube_only_section :
1493+ # A cube-only section must keep the cube arch. Building it as vec
1494+ # while forcing `__DAV_CUBE__` makes AIC pipe synchronization fail
1495+ # legality checks on A5.
1496+ sv = (soc_version or "" ).lower ()
1497+ if "950" in sv or "a5" in sv or "910b" in sv :
1498+ aicore_arch = "dav-c310-cube"
1499+ else :
1500+ aicore_arch = "dav-c220-cube"
1501+ elif has_vec_only_section :
1502+ sv = (soc_version or "" ).lower ()
1503+ if "950" in sv or "a5" in sv :
1504+ aicore_arch = "dav-c310-vec"
1505+ elif "910b" in sv :
1506+ aicore_arch = "dav-c310-vec"
1507+ else :
1508+ aicore_arch = "dav-c220-vec"
14281509 elif has_dav_cube or has_dav_vec :
1429- # Single -section kernels can still be built with vec arch while
1430- # forcing the needed DAV macro.
1510+ # Generic multi -section kernels without mixed-kernel sync keep the
1511+ # historical vec-arch + forced- macro path .
14311512 sv = (soc_version or "" ).lower ()
14321513 if "950" in sv or "a5" in sv :
14331514 aicore_arch = "dav-c310-vec"
0 commit comments