Skip to content

Commit 122796b

Browse files
Merge pull request #484 from HecreReed/codex/fix-a5-remote-arch
fix: use mixed arch for sectioned sync kernels
2 parents 96046c1 + 63cf3d9 commit 122796b

1 file changed

Lines changed: 100 additions & 19 deletions

File tree

test/npu_validation/scripts/generate_testcase.py

Lines changed: 100 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,50 @@ def _find_matching_brace(text: str, open_brace_index: int) -> Optional[int]:
152152
return None
153153

154154

155+
def _find_matching_paren(text: str, open_paren_index: int) -> Optional[int]:
156+
depth = 0
157+
for idx in range(open_paren_index, len(text)):
158+
ch = text[idx]
159+
if ch == "(":
160+
depth += 1
161+
elif ch == ")":
162+
depth -= 1
163+
if depth == 0:
164+
return idx
165+
return None
166+
167+
168+
def _split_top_level(text: str, sep: str) -> list[str]:
169+
parts = []
170+
start = 0
171+
paren_depth = 0
172+
brace_depth = 0
173+
bracket_depth = 0
174+
for idx, ch in enumerate(text):
175+
if ch == "(":
176+
paren_depth += 1
177+
elif ch == ")":
178+
paren_depth = max(paren_depth - 1, 0)
179+
elif ch == "{":
180+
brace_depth += 1
181+
elif ch == "}":
182+
brace_depth = max(brace_depth - 1, 0)
183+
elif ch == "[":
184+
bracket_depth += 1
185+
elif ch == "]":
186+
bracket_depth = max(bracket_depth - 1, 0)
187+
elif (
188+
ch == sep
189+
and paren_depth == 0
190+
and brace_depth == 0
191+
and bracket_depth == 0
192+
):
193+
parts.append(text[start:idx].strip())
194+
start = idx + 1
195+
parts.append(text[start:].strip())
196+
return parts
197+
198+
155199
def _extract_function_body(function_text: str) -> str:
156200
brace_index = function_text.find("{")
157201
if brace_index < 0:
@@ -907,6 +951,7 @@ def _infer_aicore_arch(kernel_text: str, soc_version: str) -> str:
907951
# IMPORTANT: the default arch depends on the Ascend SoC.
908952
has_mix_macros = "__DAV_CUBE__" in kernel_text and "__DAV_VEC__" in kernel_text
909953
has_intra_block_sync = "set_intra_block(" in kernel_text or "wait_intra_block(" in kernel_text
954+
has_mixed_section_sync = has_mix_macros and has_intra_block_sync
910955
cube_markers = (
911956
"TileType::Mat",
912957
"TileType::Left",
@@ -926,15 +971,16 @@ def _infer_aicore_arch(kernel_text: str, soc_version: str) -> str:
926971

927972
sv = (soc_version or "").lower()
928973
if "950" in sv or "a5" in sv:
929-
# Only inter-core mixed kernels (with intra-block sync intrinsics)
930-
# require true mix arch. Generic sectioned kernels should keep vec arch.
931-
if has_mix_macros and has_intra_block_sync:
974+
# Sectioned kernels that synchronize across DAV cube/vector regions
975+
# need PTO-ISA's mixed-kernel compile mode so the toolchain chooses
976+
# the correct pipe restrictions and DAV macro ownership.
977+
if has_mixed_section_sync:
932978
return "dav-c310"
933979
# Ascend950 (A5) uses A5 instruction set. pto-isa examples build A5
934980
# kernels with dav-c310-{vec|cube}.
935981
return "dav-c310-cube" if needs_cube else "dav-c310-vec"
936982
if "910b" in sv:
937-
if has_mix_macros and has_intra_block_sync:
983+
if has_mixed_section_sync:
938984
return "dav-c310"
939985
# Ascend910B* (e.g. Ascend910B1) uses dav-c310 toolchain arch.
940986
return "dav-c310-cube" if needs_cube else "dav-c310-vec"
@@ -1127,15 +1173,30 @@ def _infer_int_var_maxima(kernel_text: str, seed_env: Optional[dict] = None) ->
11271173
assigns.append((name, expr))
11281174

11291175
loops = []
1130-
for m in re.finditer(
1131-
r"for\s*\(\s*(?:unsigned|int|long|size_t|int(?:8|16|32|64)_t|uint(?:8|16|32|64)_t)\s+(\w+)\s*=\s*([^;]+?)\s*;\s*\1\s*<\s*([^;]+?)\s*;\s*\1\s*\+=\s*([^)]+?)\s*\)",
1132-
kernel_text,
1133-
):
1134-
ind = m.group(1)
1135-
start = m.group(2).strip()
1136-
end = m.group(3).strip()
1137-
step = m.group(4).strip()
1138-
loops.append((ind, start, end, step))
1176+
for m in re.finditer(r"\bfor\s*\(", kernel_text):
1177+
open_paren = kernel_text.find("(", m.start())
1178+
if open_paren < 0:
1179+
continue
1180+
close_paren = _find_matching_paren(kernel_text, open_paren)
1181+
if close_paren is None:
1182+
continue
1183+
header = kernel_text[open_paren + 1:close_paren]
1184+
parts = _split_top_level(header, ";")
1185+
if len(parts) != 3:
1186+
continue
1187+
init, cond, step = parts
1188+
init_m = re.match(
1189+
r"^\s*(?:unsigned|int|long|size_t|int(?:8|16|32|64)_t|uint(?:8|16|32|64)_t)\s+(\w+)\s*=\s*(.+?)\s*$",
1190+
init,
1191+
)
1192+
if not init_m:
1193+
continue
1194+
ind = init_m.group(1)
1195+
cond_m = re.match(rf"^\s*{re.escape(ind)}\s*<\s*(.+?)\s*$", cond)
1196+
step_m = re.match(rf"^\s*{re.escape(ind)}\s*\+=\s*(.+?)\s*$", step)
1197+
if not cond_m or not step_m:
1198+
continue
1199+
loops.append((ind, init_m.group(2).strip(), cond_m.group(1).strip(), step_m.group(1).strip()))
11391200

11401201
maxima: dict[str, Optional[int]] = {
11411202
k: (None if v is None else int(v))
@@ -1403,6 +1464,9 @@ def generate_testcase(
14031464
has_dav_cube = "__DAV_CUBE__" in raw_kernel
14041465
has_dav_vec = "__DAV_VEC__" in raw_kernel
14051466
has_intra_block_sync = "set_intra_block(" in raw_kernel or "wait_intra_block(" in raw_kernel
1467+
has_mixed_section_sync = has_dav_cube and has_dav_vec and has_intra_block_sync
1468+
has_cube_only_section = has_dav_cube and not has_dav_vec
1469+
has_vec_only_section = has_dav_vec and not has_dav_cube
14061470

14071471
is_mixed_kernel = kernel_info["kind"] == "mixed"
14081472

@@ -1414,20 +1478,37 @@ def generate_testcase(
14141478
else:
14151479
aicore_arch = "dav-c220"
14161480
# Sectioned kernels contain `#if defined(__DAV_CUBE__)` / `__DAV_VEC__`
1417-
# blocks. For inter-core-style mixed kernels (with intra-block sync),
1418-
# align to PTO-ISA mix-kernel compile mode (`dav-c310`) so the
1419-
# toolchain owns DAV macro definition.
1420-
elif has_dav_cube and has_dav_vec and has_intra_block_sync:
1481+
# blocks. If they also carry explicit pipe synchronization, align to
1482+
# PTO-ISA mix-kernel compile mode (`dav-c310`) so the toolchain owns
1483+
# DAV macro definition and pipe legality checks.
1484+
elif has_mixed_section_sync:
14211485
sv = (soc_version or "").lower()
14221486
if "950" in sv or "a5" in sv:
14231487
aicore_arch = "dav-c310"
14241488
elif "910b" in sv:
14251489
aicore_arch = "dav-c310"
14261490
else:
14271491
aicore_arch = "dav-c220"
1492+
elif has_cube_only_section:
1493+
# A cube-only section must keep the cube arch. Building it as vec
1494+
# while forcing `__DAV_CUBE__` makes AIC pipe synchronization fail
1495+
# legality checks on A5.
1496+
sv = (soc_version or "").lower()
1497+
if "950" in sv or "a5" in sv or "910b" in sv:
1498+
aicore_arch = "dav-c310-cube"
1499+
else:
1500+
aicore_arch = "dav-c220-cube"
1501+
elif has_vec_only_section:
1502+
sv = (soc_version or "").lower()
1503+
if "950" in sv or "a5" in sv:
1504+
aicore_arch = "dav-c310-vec"
1505+
elif "910b" in sv:
1506+
aicore_arch = "dav-c310-vec"
1507+
else:
1508+
aicore_arch = "dav-c220-vec"
14281509
elif has_dav_cube or has_dav_vec:
1429-
# Single-section kernels can still be built with vec arch while
1430-
# forcing the needed DAV macro.
1510+
# Generic multi-section kernels without mixed-kernel sync keep the
1511+
# historical vec-arch + forced-macro path.
14311512
sv = (soc_version or "").lower()
14321513
if "950" in sv or "a5" in sv:
14331514
aicore_arch = "dav-c310-vec"

0 commit comments

Comments
 (0)