8282 os .path .join (os .path .dirname (__file__ ), "tools_snapshot.json" ),
8383)
8484
85+ # HTTP behavior controls
86+ HTTP_RETRIES : int = int (os .environ .get ("CLI_AUDIT_HTTP_RETRIES" , "2" ))
87+ HTTP_BACKOFF_BASE : float = float (os .environ .get ("CLI_AUDIT_BACKOFF_BASE" , "0.2" ))
88+ HTTP_BACKOFF_JITTER : float = float (os .environ .get ("CLI_AUDIT_BACKOFF_JITTER" , "0.1" ))
89+
90+ # Ultra-verbose tracing
91+ TRACE : bool = os .environ .get ("CLI_AUDIT_TRACE" , "0" ) == "1"
92+ TRACE_NET : bool = os .environ .get ("CLI_AUDIT_TRACE_NET" , "0" ) == "1"
93+ SLOW_MS : int = int (os .environ .get ("CLI_AUDIT_SLOW_MS" , "2000" ))
94+
95+ def _vlog (msg : str ) -> None :
96+ if PROGRESS or TRACE :
97+ try :
98+ print (msg , file = sys .stderr )
99+ except Exception :
100+ pass
101+
102+ def _tlog (msg : str ) -> None :
103+ if TRACE :
104+ try :
105+ print (msg , file = sys .stderr )
106+ except Exception :
107+ pass
108+
85109def _now_iso () -> str :
86110 try :
87111 return datetime .datetime .utcnow ().replace (microsecond = 0 ).isoformat () + "Z"
@@ -127,7 +151,7 @@ def load_snapshot(paths: Sequence[str] | None = None) -> dict[str, Any]:
127151 return d
128152 return {}
129153
130- def write_snapshot (tools_payload : list [dict [str , Any ]], extra : dict [str , Any ] | None = None ) -> None :
154+ def write_snapshot (tools_payload : list [dict [str , Any ]], extra : dict [str , Any ] | None = None ) -> dict [ str , Any ] :
131155 meta = {
132156 "schema_version" : 1 ,
133157 "created_at" : _now_iso (),
@@ -142,6 +166,7 @@ def write_snapshot(tools_payload: list[dict[str, Any]], extra: dict[str, Any] |
142166 pass
143167 doc = {"__meta__" : meta , "tools" : tools_payload }
144168 _atomic_write_json (SNAPSHOT_FILE , doc )
169+ return meta
145170
146171def render_from_snapshot (doc : dict [str , Any ], selected : set [str ] | None = None ) -> list [tuple [str , str , str , str , str , str , str , str ]]:
147172 items = doc .get ("tools" , [])
@@ -212,9 +237,9 @@ def http_fetch(
212237 url : str ,
213238 timeout : float | int = TIMEOUT_SECONDS ,
214239 headers : dict [str , str ] | None = None ,
215- retries : int = 3 ,
216- backoff_base : float = 0.2 ,
217- jitter : float = 0.1 ,
240+ retries : int = None ,
241+ backoff_base : float = None ,
242+ jitter : float = None ,
218243 method : str | None = None ,
219244) -> bytes :
220245 """Fetch URL with retries, jitter, and per-origin concurrency caps.
@@ -236,31 +261,47 @@ def http_fetch(
236261 if GITHUB_TOKEN and host == "api.github.com" :
237262 req_headers ["Authorization" ] = f"Bearer { GITHUB_TOKEN } "
238263
264+ if retries is None :
265+ retries = HTTP_RETRIES
266+ if backoff_base is None :
267+ backoff_base = HTTP_BACKOFF_BASE
268+ if jitter is None :
269+ jitter = HTTP_BACKOFF_JITTER
239270 last_exc : Exception | None = None
240271 for attempt in range (max (1 , retries )):
241272 try :
242273 if sem is None :
243274 req = urllib .request .Request (url , headers = req_headers , method = method )
244275 with urllib .request .urlopen (req , timeout = timeout ) as resp :
276+ if TRACE_NET :
277+ _tlog (f"# http_open host={ host } code={ getattr (resp , 'status' , 0 )} url={ url } " )
245278 return resp .read ()
246279 with sem :
247280 req = urllib .request .Request (url , headers = req_headers , method = method )
248281 with urllib .request .urlopen (req , timeout = timeout ) as resp :
282+ if TRACE_NET :
283+ _tlog (f"# http_open host={ host } code={ getattr (resp , 'status' , 0 )} url={ url } " )
249284 return resp .read ()
250285 except urllib .error .HTTPError as e :
251286 last_exc = e
252287 code = getattr (e , "code" , 0 ) or 0
253288 retryable = (code == 429 ) or (500 <= code <= 599 ) or (host == "api.github.com" and code == 403 )
289+ if TRACE_NET :
290+ _tlog (f"# http_error host={ host } code={ code } retryable={ retryable } url={ url } " )
254291 if attempt >= retries - 1 or not retryable :
255292 raise
256293 except Exception as e :
257294 last_exc = e
295+ if TRACE_NET :
296+ _tlog (f"# http_exc host={ host } type={ type (e ).__name__ } attempt={ attempt + 1 } /{ retries } url={ url } " )
258297 if attempt >= retries - 1 :
259298 raise
260299 # backoff with jitter
261300 try :
262- delay = backoff_base * (2 ** attempt ) + random .random () * jitter
263- time .sleep (delay )
301+ delay = (backoff_base or 0 ) * (2 ** attempt ) + random .random () * (jitter or 0 )
302+ if delay > 0 and (PROGRESS or TRACE_NET ):
303+ _tlog (f"# http_backoff host={ host } attempt={ attempt + 1 } /{ retries } delay={ delay :.2f} s url={ url } " )
304+ time .sleep (delay )
264305 except Exception :
265306 pass
266307 if last_exc :
@@ -2005,6 +2046,10 @@ def audit_tool(tool: Tool) -> tuple[str, str, str, str, str, str, str, str]:
20052046 latest_start = time .time ()
20062047 latest_tag , latest_num = get_latest (tool )
20072048 latest_end = time .time ()
2049+ # Slow operation trace
2050+ dur_ms = int ((latest_end - latest_start ) * 1000 )
2051+ if dur_ms >= SLOW_MS :
2052+ _vlog (f"# slow latest tool={ tool .name } dur={ dur_ms } ms method={ upstream_method_for (tool )} offline={ OFFLINE_MODE } " )
20082053
20092054 if installed_line == "X" :
20102055 status = "NOT INSTALLED"
@@ -2133,7 +2178,10 @@ def main() -> int:
21332178 print ("|" .join (headers ))
21342179 streamed_header_printed = True
21352180
2136- with ThreadPoolExecutor (max_workers = min (MAX_WORKERS , len (tools_seq ))) as executor :
2181+ total_tools = len (tools_seq )
2182+ completed_tools = 0
2183+ print (f"# start collect: tools={ total_tools } timeout={ TIMEOUT_SECONDS } s retries={ HTTP_RETRIES } offline={ OFFLINE_MODE } " , file = sys .stderr ) if PROGRESS else None
2184+ with ThreadPoolExecutor (max_workers = min (MAX_WORKERS , total_tools )) as executor :
21372185 future_to_idx = {}
21382186 for idx , tool in enumerate (tools_seq ):
21392187 if PROGRESS :
@@ -2147,6 +2195,13 @@ def main() -> int:
21472195 t = tools_seq [idx ]
21482196 row = (t .name , "X" , "" , "" , upstream_method_for (t ), "UNKNOWN" , tool_homepage_url (t ), latest_target_url (t , "" , "" ))
21492197 results [idx ] = row
2198+ if PROGRESS :
2199+ try :
2200+ name , installed , _installed_method , latest , upstream_method , status , _tool_url , _latest_url = row
2201+ completed_tools += 1
2202+ print (f"# done { name } ({ completed_tools } /{ total_tools } ) status={ status } installed='{ installed } ' latest='{ latest } ' upstream={ upstream_method } " , file = sys .stderr )
2203+ except Exception :
2204+ pass
21502205 # In streaming mode, print each row as soon as available (no grouping)
21512206 if STREAM_OUTPUT and os .environ .get ("CLI_AUDIT_JSON" , "0" ) != "1" :
21522207 name , installed , installed_method , latest , upstream_method , status , tool_url , latest_url = row
@@ -2291,7 +2346,17 @@ def _category_key(row: tuple[str, ...]) -> tuple[int, str]:
22912346 "tool_url" : tool_url ,
22922347 "latest_url" : latest_url ,
22932348 })
2294- write_snapshot (payload )
2349+ if PROGRESS :
2350+ print (f"# writing snapshot to { SNAPSHOT_FILE } ..." , file = sys .stderr )
2351+ meta = write_snapshot (payload )
2352+ if PROGRESS :
2353+ try :
2354+ print (
2355+ f"# snapshot written: path={ SNAPSHOT_FILE } count={ meta .get ('count' )} created_at={ meta .get ('created_at' )} offline={ meta .get ('offline' )} " ,
2356+ file = sys .stderr ,
2357+ )
2358+ except Exception :
2359+ pass
22952360 except Exception as e :
22962361 if AUDIT_DEBUG :
22972362 print (f"# DEBUG: failed to write snapshot: { e } " , file = sys .stderr )
0 commit comments