@@ -208,122 +208,187 @@ async function handleProgress(
208208}
209209
210210
211- async function handleStatus (
212- req : Request ,
213- env : Env ,
214- url : URL ,
215- ) : Promise < Response > {
216- const user = url . searchParams . get ( "user" ) ?. trim ( ) ;
217- if ( ! user || ! VALID_LOGIN . test ( user ) ) {
218- return json ( { error : "invalid user" } , 400 ) ;
219- }
220- const repo = env . GH_REPO ?? "ArchiveBox/githubusers" ;
221- // Fetch the most recent workflow run regardless of event (dispatch /
222- // push / schedule) — they all run the same mining job, and the
223- // concurrency.group serializes them so the latest run is always the
224- // most relevant.
211+ // Fetch the GH-API-derived state used by /api/status. Encapsulated so
212+ // handleStatus can cache the whole result. On any non-OK GH response,
213+ // returns { error: ..., status: ... } — callers fall back to a stale
214+ // cached copy when present.
215+ async function fetchGhState ( env : Env , repo : string ) : Promise < any > {
216+ const ghHeaders = {
217+ Authorization : `Bearer ${ env . GH_DISPATCH_TOKEN } ` ,
218+ "User-Agent" : "githubusers-archivebox-io" ,
219+ Accept : "application/vnd.github+json" ,
220+ } ;
221+ // 1) Recent workflow runs.
225222 const r = await fetch (
226223 `https://api.github.com/repos/${ repo } /actions/runs?per_page=5` ,
227- {
228- headers : {
229- Authorization : `Bearer ${ env . GH_DISPATCH_TOKEN } ` ,
230- "User-Agent" : "githubusers-archivebox-io" ,
231- Accept : "application/vnd.github+json" ,
232- } ,
233- } ,
224+ { headers : ghHeaders } ,
234225 ) ;
235226 if ( ! r . ok ) {
236- return json ( { error : "gh api failed" , status : r . status } , 502 ) ;
227+ let message = "" ;
228+ try { message = ( await r . json ( ) as any ) . message ?? "" ; } catch { }
229+ return { error : "gh_api_failed" , status : r . status , message } ;
237230 }
238231 const data = await r . json ( ) as any ;
239- // Prefer an in_progress / queued run; fall back to most recent overall.
240232 const runs = data . workflow_runs ?? [ ] ;
241233 const run = runs . find ( ( x : any ) => x . status === "in_progress" )
242234 ?? runs . find ( ( x : any ) => x . status === "queued" )
243235 ?? runs [ 0 ] ;
244- if ( ! run ) {
245- return json ( { ok : false , status : "no_runs" } ) ;
246- }
247- // Get job steps for the run.
236+ if ( ! run ) return { run : null } ;
237+
238+ // 2) Job + steps for the chosen run.
248239 const jr = await fetch (
249240 `https://api.github.com/repos/${ repo } /actions/runs/${ run . id } /jobs` ,
250- {
251- headers : {
252- Authorization : `Bearer ${ env . GH_DISPATCH_TOKEN } ` ,
253- "User-Agent" : "githubusers-archivebox-io" ,
254- Accept : "application/vnd.github+json" ,
255- } ,
256- } ,
241+ { headers : ghHeaders } ,
257242 ) ;
258- const jdata = await jr . json ( ) as any ;
259- const job = ( jdata . jobs ?? [ ] ) [ 0 ] ;
260- const steps = ( job ?. steps ?? [ ] ) . map ( ( s : any ) => ( {
261- name : s . name ,
262- status : s . status ,
263- conclusion : s . conclusion ,
264- } ) ) ;
265-
266- // Surface current GitHub API rate-limit state so the loading page can
267- // explain delays. Uses the same PAT the CI runs with, so the search /
268- // core remaining numbers are very close to what the CI job sees.
243+ let steps : any [ ] = [ ] ;
244+ let job : any = null ;
245+ if ( jr . ok ) {
246+ const jdata = await jr . json ( ) as any ;
247+ job = ( jdata . jobs ?? [ ] ) [ 0 ] ;
248+ steps = ( job ?. steps ?? [ ] ) . map ( ( s : any ) => ( {
249+ name : s . name ,
250+ status : s . status ,
251+ conclusion : s . conclusion ,
252+ } ) ) ;
253+ }
254+
255+ // 3) Rate-limit gauge (free endpoint — doesn't count against quota).
269256 let rateLimit : any = null ;
270257 try {
271- const rl = await fetch ( "https://api.github.com/rate_limit" , {
272- headers : {
273- Authorization : `Bearer ${ env . GH_DISPATCH_TOKEN } ` ,
274- "User-Agent" : "githubusers-archivebox-io" ,
275- Accept : "application/vnd.github+json" ,
276- } ,
277- } ) ;
258+ const rl = await fetch ( "https://api.github.com/rate_limit" ,
259+ { headers : ghHeaders } ) ;
278260 if ( rl . ok ) {
279261 const rd = await rl . json ( ) as any ;
280- const r = rd ?. resources ?? { } ;
262+ const rr = rd ?. resources ?? { } ;
281263 rateLimit = {
282- search : r . search ? {
283- remaining : r . search . remaining ,
284- limit : r . search . limit ,
285- reset : r . search . reset , // epoch seconds
264+ search : rr . search ? {
265+ remaining : rr . search . remaining ,
266+ limit : rr . search . limit ,
267+ reset : rr . search . reset ,
286268 } : null ,
287- core : r . core ? {
288- remaining : r . core . remaining ,
289- limit : r . core . limit ,
290- reset : r . core . reset ,
269+ core : rr . core ? {
270+ remaining : rr . core . remaining ,
271+ limit : rr . core . limit ,
272+ reset : rr . core . reset ,
291273 } : null ,
292274 } ;
293275 }
294276 } catch { }
295277
296- // Tail the job's live log for richer progress info (e.g. the Python
297- // script's `>> [N/M] ...` lines). The GH API redirects to a signed
298- // download URL — fetch() follows by default.
278+ // 4) Tail of recent log output (only when the job is in_progress —
279+ // saves a hefty fetch on idle runs).
299280 let recentLog : string [ ] = [ ] ;
300- if ( job ?. id ) {
281+ if ( job ?. id && job . status === "in_progress" ) {
301282 try {
302283 const lr = await fetch (
303284 `https://api.github.com/repos/${ repo } /actions/jobs/${ job . id } /logs` ,
304- {
305- headers : {
306- Authorization : `Bearer ${ env . GH_DISPATCH_TOKEN } ` ,
307- "User-Agent" : "githubusers-archivebox-io" ,
308- Accept : "application/vnd.github+json" ,
309- } ,
310- } ,
285+ { headers : ghHeaders } ,
311286 ) ;
312287 if ( lr . ok ) {
313288 const txt = await lr . text ( ) ;
314- // Each line is "<ISO timestamp> <message>"; strip timestamp +
315- // filter to lines that look like Python script output.
316- const interesting = txt
289+ recentLog = txt
317290 . split ( "\n" )
318291 . map ( ( l ) => l . replace ( / ^ \d { 4 } - \d { 2 } - \d { 2 } T [ \d : . ] + Z \s ? / , "" ) )
319292 . filter ( ( l ) => / ^ ( > > | \s * \[ | \s * - { 2 } | \s * ! | \s * r e s o l v e d \b | \s * s c a n n i n g | \s * f e t c h i n g | \s * m i n i n g | \s * d e p l o y i n g | \s * s e a r c h q u o t a | \s * r e s o l v i n g ) / i
320293 . test ( l ) )
321294 . slice ( - 20 ) ;
322- recentLog = interesting ;
323295 }
324296 } catch { }
325297 }
326298
299+ return {
300+ run : {
301+ id : run . id ,
302+ status : run . status ,
303+ conclusion : run . conclusion ,
304+ run_started_at : run . run_started_at ,
305+ html_url : run . html_url ,
306+ } ,
307+ job : job ? { id : job . id , status : job . status } : null ,
308+ steps,
309+ rate_limit : rateLimit ,
310+ recent_log : recentLog ,
311+ } ;
312+ }
313+
314+
315+ async function handleStatus (
316+ req : Request ,
317+ env : Env ,
318+ url : URL ,
319+ ) : Promise < Response > {
320+ const user = url . searchParams . get ( "user" ) ?. trim ( ) ;
321+ if ( ! user || ! VALID_LOGIN . test ( user ) ) {
322+ return json ( { error : "invalid user" } , 400 ) ;
323+ }
324+ const repo = env . GH_REPO ?? "ArchiveBox/githubusers" ;
325+
326+ // GH API state (workflow runs, jobs, logs, rate-limit) is the same for
327+ // every visitor / user — cache it globally for 15s. Loading pages poll
328+ // every 4s; without this cache we burn ~45 GH API requests per minute
329+ // per active visitor, which exhausts the 5000/hr PAT limit fast.
330+ const ghStateKey = new Request (
331+ `https://internal-status.invalid/gh-state-v1` ,
332+ ) ;
333+ let ghState : any = null ;
334+ let stale = false ;
335+ const cached = await caches . default . match ( ghStateKey ) ;
336+ if ( cached ) {
337+ try { ghState = await cached . json ( ) ; } catch { }
338+ }
339+ if ( ! ghState ) {
340+ ghState = await fetchGhState ( env , repo ) ;
341+ if ( ghState . error ) {
342+ // Couldn't reach GH — fall back to whatever we last saw (if any).
343+ // Without a fallback we serve {error:"..."} which makes the
344+ // loading page render nothing.
345+ const stale_resp = await caches . default . match (
346+ new Request ( `https://internal-status.invalid/gh-state-stale-v1` ) ,
347+ ) ;
348+ if ( stale_resp ) {
349+ try { ghState = await stale_resp . json ( ) ; stale = true ; } catch { }
350+ }
351+ if ( ! ghState || ghState . error ) {
352+ return json ( {
353+ ok : false ,
354+ error : "gh_unreachable" ,
355+ gh_status : ghState ?. status ,
356+ gh_message : ghState ?. message ,
357+ } , 200 ) ;
358+ }
359+ } else {
360+ // Cache for 15s (frequent polling) and keep a separate "stale"
361+ // copy that lives much longer (1h) so we can fall back when GH
362+ // rate-limits us.
363+ await caches . default . put (
364+ ghStateKey ,
365+ new Response ( JSON . stringify ( ghState ) , {
366+ headers : {
367+ "Cache-Control" : "max-age=15" ,
368+ "Content-Type" : "application/json" ,
369+ } ,
370+ } ) ,
371+ ) ;
372+ await caches . default . put (
373+ new Request ( `https://internal-status.invalid/gh-state-stale-v1` ) ,
374+ new Response ( JSON . stringify ( ghState ) , {
375+ headers : {
376+ "Cache-Control" : "max-age=3600" ,
377+ "Content-Type" : "application/json" ,
378+ } ,
379+ } ) ,
380+ ) ;
381+ }
382+ }
383+ const run = ghState . run ;
384+ if ( ! run ) {
385+ return json ( { ok : false , status : "no_runs" , stale } ) ;
386+ }
387+ const steps = ghState . steps ?? [ ] ;
388+ const rateLimit = ghState . rate_limit ?? null ;
389+ const recentLog : string [ ] = ghState . recent_log ?? [ ] ;
390+ const job = ghState . job ;
391+
327392 // Read the latest progress update posted by the running Python script.
328393 let progress : any = null ;
329394 try {
@@ -580,7 +645,15 @@ async function fetchStatus() {
580645 const r = await fetch("/api/status?user=" + encodeURIComponent(USER),
581646 { cache: "no-store" });
582647 if (!r.ok) return null;
583- return await r.json();
648+ const j = await r.json();
649+ // Worker hit a GH API outage / rate limit. Surface a friendly note
650+ // instead of silently rendering nothing.
651+ if (j && j.error === "gh_unreachable") {
652+ $now.textContent = "Waiting on GitHub API… (" +
653+ (j.gh_status || "unreachable") + ") — will retry";
654+ return null;
655+ }
656+ return j;
584657 } catch (e) { return null; }
585658}
586659
@@ -722,7 +795,7 @@ function renderSteps(status) {
722795 $now.textContent = "Dashboard ready — reloading…";
723796 setTimeout(() => location.reload(), 500);
724797 }
725- }, 4000 );
798+ }, 8000 );
726799})();
727800</script>
728801</body>
0 commit comments