Skip to content

Commit 63e8d12

Browse files
committed
feat: add new JobStatusInfo model
This is intended to mirror and eventually replace the `ErrorMessage` model in `src/dvsim/launcher/base.py`. Notably, it extends it to include the ability to provide no context (`None`) rather than requiring an empty list, and it allows multiple ranges of lines to be provided (in the form (start, end)) rather than just a single line number. The intention is that: - This extended functionality can be used to provide richer error context where possible in the future. - This `JobStatusInfo` will be used by the new async scheduler and async backends, with the `CompletedJobStatus` eventually returning this as its `fail_msg`. It supersedes `ErrorMessage`, and removes the dependency of `job/data` on `launcher/base`. - While we could change all the `ErrorMessage`s to equivalent `JobStatusInfo` objects now, we instead retain the old type to reduce code churn for launchers that will be rewritten. Signed-off-by: Alex Jones <alex.jones@lowrisc.org>
1 parent d4103fa commit 63e8d12

3 files changed

Lines changed: 37 additions & 6 deletions

File tree

src/dvsim/job/data.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
from pydantic import BaseModel, ConfigDict
1616

1717
from dvsim.job.status import JobStatus
18-
from dvsim.launcher.base import ErrorMessage
1918
from dvsim.report.data import IPMeta, ToolMeta
2019

2120
__all__ = (
2221
"CompletedJobStatus",
2322
"JobSpec",
23+
"JobStatusInfo",
2424
"WorkspaceConfig",
2525
)
2626

@@ -123,6 +123,19 @@ def timeout_secs(self) -> int | None:
123123
return None if self.timeout_mins is None else self.timeout_mins * 60
124124

125125

126+
class JobStatusInfo(BaseModel):
127+
"""Context about some sort of failure / error within a job."""
128+
129+
model_config = ConfigDict(frozen=True, extra="forbid")
130+
131+
message: str
132+
"""Human readable error message."""
133+
lines: Sequence[int | tuple[int, int]] | None = None
134+
"""Relevant line information (in the job script or the job itself)."""
135+
context: Sequence[str] | None = None
136+
"""Arbitrary context strings."""
137+
138+
126139
class CompletedJobStatus(BaseModel):
127140
"""Job status."""
128141

@@ -166,5 +179,5 @@ class CompletedJobStatus(BaseModel):
166179

167180
status: JobStatus
168181
"""Status of the job."""
169-
fail_msg: ErrorMessage
182+
fail_msg: JobStatusInfo | None
170183
"""Error message."""

src/dvsim/scheduler/core.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
from dvsim import instrumentation
2424
from dvsim.instrumentation import NoOpInstrumentation
25-
from dvsim.job.data import CompletedJobStatus, JobSpec
25+
from dvsim.job.data import CompletedJobStatus, JobSpec, JobStatusInfo
2626
from dvsim.job.status import JobStatus
2727
from dvsim.launcher.base import Launcher, LauncherBusyError, LauncherError
2828
from dvsim.logging import log
@@ -264,6 +264,14 @@ def on_signal(signal_received: int, _: FrameType | None) -> None:
264264
launcher = self._launchers[name]
265265
job_spec = self._jobs[name]
266266

267+
fail_msg = None
268+
if launcher.fail_msg is not None:
269+
launcher_fail = launcher.fail_msg
270+
lines = None if launcher_fail.line_number is None else [launcher_fail.line_number]
271+
fail_msg = JobStatusInfo(
272+
message=launcher_fail.message, lines=lines, context=launcher_fail.context
273+
)
274+
267275
results.append(
268276
CompletedJobStatus(
269277
name=job_spec.name,
@@ -279,7 +287,7 @@ def on_signal(signal_received: int, _: FrameType | None) -> None:
279287
job_runtime=launcher.job_runtime.with_unit("s").get()[0],
280288
simulated_time=launcher.simulated_time.with_unit("us").get()[0],
281289
status=status,
282-
fail_msg=launcher.fail_msg,
290+
fail_msg=fail_msg,
283291
)
284292
)
285293

src/dvsim/sim_results.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,19 +133,29 @@ def from_job_status(results: Sequence["CompletedJobStatus"]) -> "BucketedFailure
133133

134134
for job_status in results:
135135
if job_status.status in (JobStatus.FAILED, JobStatus.KILLED):
136+
if job_status.fail_msg is None:
137+
continue
138+
136139
bucket = _bucketize(job_status.fail_msg.message)
137140

138141
if bucket not in buckets:
139142
buckets[bucket] = []
140143

144+
# TODO: expose all relevant line numbers through the bucket, not just the first.
145+
# We only expose the first one for now to keep changes to the scheduler minimal.
146+
first_line_num = None
147+
if job_status.fail_msg.lines:
148+
lines = job_status.fail_msg.lines[0]
149+
first_line_num = lines if isinstance(lines, int) else lines[0]
150+
141151
buckets[bucket].append(
142152
JobFailureOverview(
143153
name=job_status.name,
144154
qual_name=job_status.qual_name,
145155
seed=job_status.seed,
146-
line=job_status.fail_msg.line_number,
156+
line=first_line_num,
147157
log_path=job_status.log_path,
148-
log_context=job_status.fail_msg.context,
158+
log_context=job_status.fail_msg.context or [],
149159
),
150160
)
151161

0 commit comments

Comments
 (0)