@@ -268,22 +268,47 @@ function updateTagsForLeaderboard(leaderboardName) {
268268// Make function globally accessible
269269window . updateTagsForLeaderboard = updateTagsForLeaderboard ;
270270
271+ // Function to get the current agent mode for the Verified tab
272+ function getVerifiedAgentMode ( ) {
273+ const dropdown = document . getElementById ( 'agent-dropdown' ) ;
274+ return dropdown ? dropdown . value : 'mini-v2' ;
275+ }
276+
277+ // Function to get the current models filter for the Verified tab
278+ function getVerifiedModelsFilter ( ) {
279+ const dropdown = document . getElementById ( 'models-dropdown' ) ;
280+ return dropdown ? dropdown . value : 'all' ;
281+ }
282+
283+ window . getVerifiedAgentMode = getVerifiedAgentMode ;
284+ window . getVerifiedModelsFilter = getVerifiedModelsFilter ;
285+
271286// Function to show/hide filter elements based on leaderboard type
272287function updateFilterVisibility ( leaderboardName ) {
288+ const verifiedFilters = document . getElementById ( 'verified-filters' ) ;
289+ const standardFilters = document . getElementById ( 'standard-filters' ) ;
273290 const mainFiltersContainer = document . getElementById ( 'main-filters' ) ;
274291 const tagFiltersContainer = document . getElementById ( 'tag-filters' ) ;
292+ const legacyVersionFilter = document . getElementById ( 'legacy-version-filter' ) ;
275293
276294 const leaderboardNameLower = leaderboardName . toLowerCase ( ) ;
277- const isBashOnly = leaderboardNameLower === 'bash-only ' ;
295+ const isVerified = leaderboardNameLower === 'verified ' ;
278296 const isMultilingual = leaderboardNameLower === 'multilingual' ;
279- const hideMainFilters = isBashOnly || isMultilingual ;
280297
281- // Hide main filters (open scaffold/weight/checked) for bash-only and multilingual, but keep tag filters visible
282- if ( mainFiltersContainer ) mainFiltersContainer . style . display = hideMainFilters ? 'none' : '' ;
283- if ( tagFiltersContainer ) tagFiltersContainer . style . display = '' ;
298+ if ( isVerified ) {
299+ // Show Verified-specific dropdowns, hide standard filters
300+ if ( verifiedFilters ) verifiedFilters . style . display = '' ;
301+ if ( standardFilters ) standardFilters . style . display = 'none' ;
302+ } else {
303+ // Show standard filters, hide Verified dropdowns
304+ if ( verifiedFilters ) verifiedFilters . style . display = 'none' ;
305+ if ( standardFilters ) standardFilters . style . display = '' ;
284306
285- const legacyVersionFilter = document . getElementById ( 'legacy-version-filter' ) ;
286- if ( legacyVersionFilter ) legacyVersionFilter . style . display = isBashOnly ? '' : 'none' ;
307+ const hideMainFilters = isMultilingual ;
308+ if ( mainFiltersContainer ) mainFiltersContainer . style . display = hideMainFilters ? 'none' : '' ;
309+ if ( tagFiltersContainer ) tagFiltersContainer . style . display = '' ;
310+ if ( legacyVersionFilter ) legacyVersionFilter . style . display = 'none' ;
311+ }
287312}
288313
289314// Table Update Logic - Optimized for lazy loading
@@ -297,41 +322,58 @@ function updateTable() {
297322
298323 const tableRows = visibleLeaderboard . querySelectorAll ( '.data-table tbody tr:not(.no-results)' ) ;
299324 let visibleRowCount = 0 ;
325+
326+ // Determine if we're on the Verified tab
327+ const isVerifiedTab = visibleLeaderboard . id === 'leaderboard-Verified' ;
328+ const modelsFilter = isVerifiedTab ? getVerifiedModelsFilter ( ) : null ;
300329
301330 tableRows . forEach ( row => {
302331 // Show row by default
303332 let showRow = true ;
304333
305- // Check filters
306- for ( const filter of activeFilters ) {
307- if ( row . getAttribute ( `data-${ filter } ` ) !== 'true' ) {
308- showRow = false ;
309- break ;
334+ if ( isVerifiedTab ) {
335+ // For Verified tab, apply models filter
336+ if ( modelsFilter === 'open-source' ) {
337+ if ( row . getAttribute ( 'data-os_model' ) !== 'true' ) {
338+ showRow = false ;
339+ }
340+ } else if ( modelsFilter === 'proprietary' ) {
341+ if ( row . getAttribute ( 'data-os_model' ) === 'true' ) {
342+ showRow = false ;
343+ }
310344 }
311- }
312-
313- // Check legacy version filter
314- if ( showRow ) {
315- const legacyFilterContainer = document . getElementById ( 'legacy-version-filter' ) ;
316- const showLegacyCheckbox = document . getElementById ( 'show-legacy-versions' ) ;
317- if ( legacyFilterContainer && legacyFilterContainer . style . display !== 'none' &&
318- showLegacyCheckbox && ! showLegacyCheckbox . checked &&
319- row . classList . contains ( 'legacy-version-row' ) ) {
320- showRow = false ;
345+ } else {
346+ // For non-Verified tabs, apply standard filters
347+ for ( const filter of activeFilters ) {
348+ if ( row . getAttribute ( `data-${ filter } ` ) !== 'true' ) {
349+ showRow = false ;
350+ break ;
351+ }
321352 }
322- }
323-
324- // Check tag filter
325- if ( showRow && window . tagFiltersDropdown ) {
326- const selectedTags = window . tagFiltersDropdown . getSelectedValues ( ) ;
327- const allTagsSelected = window . tagFiltersDropdown . isAllSelected ( ) ;
328353
329- if ( ! allTagsSelected ) {
330- const rowTags = ( row . getAttribute ( 'data-tags' ) || '' ) . split ( ',' ) . map ( t => t . trim ( ) ) . filter ( Boolean ) ;
331- if ( ! rowTags . some ( tag => selectedTags . includes ( tag ) ) ) {
354+ // Check legacy version filter
355+ if ( showRow ) {
356+ const legacyFilterContainer = document . getElementById ( 'legacy-version-filter' ) ;
357+ const showLegacyCheckbox = document . getElementById ( 'show-legacy-versions' ) ;
358+ if ( legacyFilterContainer && legacyFilterContainer . style . display !== 'none' &&
359+ showLegacyCheckbox && ! showLegacyCheckbox . checked &&
360+ row . classList . contains ( 'legacy-version-row' ) ) {
332361 showRow = false ;
333362 }
334363 }
364+
365+ // Check tag filter
366+ if ( showRow && window . tagFiltersDropdown ) {
367+ const selectedTags = window . tagFiltersDropdown . getSelectedValues ( ) ;
368+ const allTagsSelected = window . tagFiltersDropdown . isAllSelected ( ) ;
369+
370+ if ( ! allTagsSelected ) {
371+ const rowTags = ( row . getAttribute ( 'data-tags' ) || '' ) . split ( ',' ) . map ( t => t . trim ( ) ) . filter ( Boolean ) ;
372+ if ( ! rowTags . some ( tag => selectedTags . includes ( tag ) ) ) {
373+ showRow = false ;
374+ }
375+ }
376+ }
335377 }
336378
337379 // Toggle row visibility
@@ -340,11 +382,10 @@ function updateTable() {
340382 } ) ;
341383
342384 const noResultsMessage = visibleLeaderboard . querySelector ( '.no-results' ) ;
343- // Show/hide no results message
344- if ( visibleRowCount === 0 && ( activeFilters . size > 0 || ! isAllTagsSelected ( ) ) ) {
345- noResultsMessage . style . display = 'table-row' ;
385+ if ( visibleRowCount === 0 ) {
386+ if ( noResultsMessage ) noResultsMessage . style . display = 'table-row' ;
346387 } else {
347- noResultsMessage . style . display = 'none' ;
388+ if ( noResultsMessage ) noResultsMessage . style . display = 'none' ;
348389 }
349390
350391 // Update the select-all checkbox state after filtering
@@ -393,7 +434,7 @@ document.addEventListener('DOMContentLoaded', function() {
393434 }
394435 } ) ;
395436
396- // Initialize with tags for the default leaderboard (bash-only)
437+ // Initialize with tags for the default leaderboard (Verified with bash-only data )
397438 updateTagsForLeaderboard ( 'bash-only' ) ;
398439
399440 // Set initial selection for main filters
@@ -411,14 +452,29 @@ document.addEventListener('DOMContentLoaded', function() {
411452 showLegacyCheckbox . addEventListener ( 'change' , updateTable ) ;
412453 }
413454
414- // Check for initial leaderboard visibility (in case landing directly on bash-only)
455+ // Wire up Verified-specific dropdowns
456+ const agentDropdown = document . getElementById ( 'agent-dropdown' ) ;
457+ if ( agentDropdown ) {
458+ agentDropdown . addEventListener ( 'change' , ( ) => {
459+ if ( typeof openLeaderboard === 'function' ) {
460+ openLeaderboard ( 'Verified' ) ;
461+ }
462+ } ) ;
463+ }
464+
465+ const modelsDropdown = document . getElementById ( 'models-dropdown' ) ;
466+ if ( modelsDropdown ) {
467+ modelsDropdown . addEventListener ( 'change' , updateTable ) ;
468+ }
469+
470+ // Check for initial leaderboard visibility
415471 setTimeout ( ( ) => {
416472 const activeLeaderboard = document . querySelector ( '.tabcontent.active' ) ;
417473 if ( activeLeaderboard ) {
418474 const leaderboardId = activeLeaderboard . id ;
419475 const leaderboardName = leaderboardId . replace ( 'leaderboard-' , '' ) ;
420476 updateFilterVisibility ( leaderboardName ) ;
421- updateTagsForLeaderboard ( leaderboardName ) ; // Update tags for the initial leaderboard
477+ updateTagsForLeaderboard ( leaderboardName ) ;
422478 }
423479 } , 100 ) ;
424480} ) ;
@@ -429,10 +485,9 @@ function updateLeaderboardDescription(leaderboardName) {
429485 if ( ! textContainer ) return ;
430486
431487 const descriptions = {
432- 'bash-only ' : '<em>Bash Only </em> evaluates all LMs with <a href="https://github.com/SWE-agent/mini-swe-agent">mini-SWE-agent</a> on SWE-bench Verified (<a href="bash-only .html">details</a>).' ,
488+ 'verified ' : '<em>Verified </em> is a human-filtered subset of 500 instances. We use <a href="https://github.com/SWE-agent/mini-swe-agent">mini-SWE-agent</a> to evaluate all models with the same harness (<a href="verified .html">details</a>).' ,
433489 'multilingual' : '<em>Multilingual</em> features 300 tasks across 9 programming languages (<a href="multilingual-leaderboard.html">details</a>)' ,
434490 'lite' : '<em>Lite</em> is a subset of 300 instances for less costly evaluation (<a href="lite.html">details</a>)' ,
435- 'verified' : '<em>Verified</em> is a human-filtered subset of 500 instances (<a href="https://openai.com/index/introducing-swe-bench-verified/">details</a>)' ,
436491 'test' : '<em>Full</em> is a large benchmark made of 2000 instances (<a href="original.html">details</a>)' ,
437492 'multimodal' : '<em>Multimodal</em> features issues with visual elements (<a href="multimodal.html">details</a>)' ,
438493 } ;
0 commit comments