1111from pathlib import Path
1212from typing import List
1313
14+ # Strip ANSI escape sequences and carriage-return progress lines
15+ _ANSI_RE = re .compile (r"\x1b\[[0-9;]*[A-Za-z]" )
16+
1417# Matches standard Python warning output:
1518# /path/to/file.py:42: DeprecationWarning: some message
19+ # <unknown>:2: DeprecationWarning: invalid escape sequence '\s'
1620# The message may span continuation lines (indented), but we grab the first line.
1721_WARNING_RE = re .compile (
18- r"^ (?P<path>.+?\ .py):(?P<lineno>\d+):\s+"
22+ r"(?P<path>/?[^\s:]+\ .py|<unknown> ):(?P<lineno>\d+):\s+"
1923 r"(?P<category>DeprecationWarning|FutureWarning):\s+"
2024 r"(?P<message>.+)$"
2125)
@@ -68,14 +72,20 @@ def parse_log(log_path: str | Path) -> List[BuildWarning]:
6872 repeating the same warning 50 times.
6973 """
7074 log_path = Path (log_path )
71- if not log_path .exists ():
75+ try :
76+ text = log_path .read_text (errors = "replace" )
77+ except FileNotFoundError :
7278 return []
7379
7480 seen : dict [tuple [str , str ], BuildWarning ] = {}
7581 warnings : list [BuildWarning ] = []
7682
77- for line in log_path .read_text (errors = "replace" ).splitlines ():
78- m = _WARNING_RE .match (line )
83+ for line in text .splitlines ():
84+ # Strip ANSI escapes and split on \r to handle progress-line overwriting
85+ line = _ANSI_RE .sub ("" , line )
86+ if "\r " in line :
87+ line = line .rsplit ("\r " , 1 )[- 1 ]
88+ m = _WARNING_RE .search (line )
7989 if m is None :
8090 continue
8191
@@ -101,3 +111,39 @@ def parse_log(log_path: str | Path) -> List[BuildWarning]:
101111def is_tutorial_source (path : str ) -> bool :
102112 """Return True if *path* belongs to a known tutorial source directory."""
103113 return any (path .startswith (d ) for d in _SOURCE_DIRS )
114+
115+
116+ # Package prefixes that belong to PyTorch core
117+ _PYTORCH_CORE_PACKAGES = (
118+ "/torch/" ,
119+ "torch/" ,
120+ )
121+
122+ # Package prefixes for PyTorch ecosystem libraries
123+ _PYTORCH_LIB_PACKAGES = (
124+ "/torchvision/" ,
125+ "/torchaudio/" ,
126+ "/torchtext/" ,
127+ "/torchrl/" ,
128+ "/tensordict/" ,
129+ "/torchdata/" ,
130+ "/torchtune/" ,
131+ "/torchtitan/" ,
132+ "/functorch/" ,
133+ "/torch_xla/" ,
134+ "/executorch/" ,
135+ )
136+
137+
138+ def classify_dependency (path : str ) -> str :
139+ """Classify a non-tutorial warning path into a dependency category.
140+
141+ Returns one of: ``"pytorch"``, ``"pytorch_libs"``, ``"third_party"``.
142+ """
143+ for prefix in _PYTORCH_CORE_PACKAGES :
144+ if prefix in path :
145+ return "pytorch"
146+ for prefix in _PYTORCH_LIB_PACKAGES :
147+ if prefix in path :
148+ return "pytorch_libs"
149+ return "third_party"
0 commit comments