|
15 | 15 | ], |
16 | 16 | "logs": null, |
17 | 17 | "mini-swe-agent_version": "2.0.0", |
18 | | - "name": "Claude 4.5 Opus", |
| 18 | + "name": "Claude 4.5 Opus (high reasoning)", |
19 | 19 | "os_model": false, |
20 | 20 | "os_system": true, |
21 | 21 | "per_instance_details": { |
|
2544 | 2544 | ], |
2545 | 2545 | "logs": null, |
2546 | 2546 | "mini-swe-agent_version": "2.0.0", |
2547 | | - "name": "Gemini 3 Flash", |
| 2547 | + "name": "Gemini 3 Flash (high reasoning)", |
2548 | 2548 | "os_model": false, |
2549 | 2549 | "os_system": true, |
2550 | 2550 | "per_instance_details": { |
|
5073 | 5073 | ], |
5074 | 5074 | "logs": null, |
5075 | 5075 | "mini-swe-agent_version": "2.0.0", |
5076 | | - "name": "MiniMax M2.5", |
| 5076 | + "name": "MiniMax M2.5 (high reasoning)", |
5077 | 5077 | "os_model": false, |
5078 | 5078 | "os_system": true, |
5079 | 5079 | "per_instance_details": { |
|
15189 | 15189 | ], |
15190 | 15190 | "logs": null, |
15191 | 15191 | "mini-swe-agent_version": "2.0.0", |
15192 | | - "name": "GLM-5", |
| 15192 | + "name": "GLM-5 (high reasoning)", |
15193 | 15193 | "os_model": false, |
15194 | 15194 | "os_system": true, |
15195 | 15195 | "per_instance_details": { |
|
17718 | 17718 | ], |
17719 | 17719 | "logs": null, |
17720 | 17720 | "mini-swe-agent_version": "2.0.0", |
17721 | | - "name": "GPT-5-2", |
| 17721 | + "name": "GPT-5-2 (high reasoning)", |
17722 | 17722 | "os_model": false, |
17723 | 17723 | "os_system": true, |
17724 | 17724 | "per_instance_details": { |
|
20247 | 20247 | ], |
20248 | 20248 | "logs": "s3://swe-bench-submissions/bash-only/20251211_mini-v1.17.2_gpt-5.2-2025-12-11-high/logs", |
20249 | 20249 | "mini-swe-agent_version": "1.17.2", |
20250 | | - "name": "GPT-5.2 (2025-12-11)", |
| 20250 | + "name": "GPT-5.2 (2025-12-11) (high reasoning)", |
20251 | 20251 | "os_model": false, |
20252 | 20252 | "os_system": true, |
20253 | 20253 | "per_instance_details": { |
|
22776 | 22776 | ], |
22777 | 22777 | "logs": null, |
22778 | 22778 | "mini-swe-agent_version": "2.0.0", |
22779 | | - "name": "Claude 4.5 Sonnet", |
| 22779 | + "name": "Claude 4.5 Sonnet (high reasoning)", |
22780 | 22780 | "os_model": false, |
22781 | 22781 | "os_system": true, |
22782 | 22782 | "per_instance_details": { |
|
25305 | 25305 | ], |
25306 | 25306 | "logs": null, |
25307 | 25307 | "mini-swe-agent_version": "2.0.0", |
25308 | | - "name": "Kimi K2.5", |
| 25308 | + "name": "Kimi K2.5 (high reasoning)", |
25309 | 25309 | "os_model": false, |
25310 | 25310 | "os_system": true, |
25311 | 25311 | "per_instance_details": { |
|
30363 | 30363 | ], |
30364 | 30364 | "logs": null, |
30365 | 30365 | "mini-swe-agent_version": "2.0.0", |
30366 | | - "name": "DeepSeek V3.2", |
| 30366 | + "name": "DeepSeek V3.2 (high reasoning)", |
30367 | 30367 | "os_model": false, |
30368 | 30368 | "os_system": true, |
30369 | 30369 | "per_instance_details": { |
|
40454 | 40454 | ], |
40455 | 40455 | "logs": null, |
40456 | 40456 | "mini-swe-agent_version": "2.0.0", |
40457 | | - "name": "Claude 4.5 Haiku", |
| 40457 | + "name": "Claude 4.5 Haiku (high reasoning)", |
40458 | 40458 | "os_model": false, |
40459 | 40459 | "os_system": true, |
40460 | 40460 | "per_instance_details": { |
@@ -113459,7 +113459,7 @@ |
113459 | 113459 | ], |
113460 | 113460 | "logs": "s3://swe-bench-submissions/multilingual/20260213_mini-v2.0.0a0_gpt-5-2-high/logs", |
113461 | 113461 | "mini-swe-agent_version": "2.0.0a0", |
113462 | | - "name": "GPT-5.2", |
| 113462 | + "name": "GPT-5.2 (high reasoning)", |
113463 | 113463 | "os_model": false, |
113464 | 113464 | "os_system": true, |
113465 | 113465 | "per_instance_details": { |
@@ -120273,7 +120273,7 @@ |
120273 | 120273 | ], |
120274 | 120274 | "logs": null, |
120275 | 120275 | "mini-swe-agent_version": "2.0.0", |
120276 | | - "name": "mini-SWE-agent + Claude 4.5 Opus", |
| 120276 | + "name": "mini-SWE-agent + Claude 4.5 Opus (high reasoning)", |
120277 | 120277 | "os_model": false, |
120278 | 120278 | "os_system": true, |
120279 | 120279 | "per_instance_details": { |
@@ -122831,7 +122831,7 @@ |
122831 | 122831 | ], |
122832 | 122832 | "logs": null, |
122833 | 122833 | "mini-swe-agent_version": "2.0.0", |
122834 | | - "name": "mini-SWE-agent + Gemini 3 Flash", |
| 122834 | + "name": "mini-SWE-agent + Gemini 3 Flash (high reasoning)", |
122835 | 122835 | "os_model": false, |
122836 | 122836 | "os_system": true, |
122837 | 122837 | "per_instance_details": { |
@@ -125361,7 +125361,7 @@ |
125361 | 125361 | ], |
125362 | 125362 | "logs": null, |
125363 | 125363 | "mini-swe-agent_version": "2.0.0", |
125364 | | - "name": "mini-SWE-agent + MiniMax M2.5", |
| 125364 | + "name": "mini-SWE-agent + MiniMax M2.5 (high reasoning)", |
125365 | 125365 | "os_model": false, |
125366 | 125366 | "os_system": true, |
125367 | 125367 | "per_instance_details": { |
@@ -135765,7 +135765,7 @@ |
135765 | 135765 | ], |
135766 | 135766 | "logs": null, |
135767 | 135767 | "mini-swe-agent_version": "2.0.0", |
135768 | | - "name": "mini-SWE-agent + GLM-5", |
| 135768 | + "name": "mini-SWE-agent + GLM-5 (high reasoning)", |
135769 | 135769 | "os_model": false, |
135770 | 135770 | "os_system": true, |
135771 | 135771 | "per_instance_details": { |
@@ -138295,7 +138295,7 @@ |
138295 | 138295 | ], |
138296 | 138296 | "logs": null, |
138297 | 138297 | "mini-swe-agent_version": "2.0.0", |
138298 | | - "name": "mini-SWE-agent + GPT-5-2", |
| 138298 | + "name": "mini-SWE-agent + GPT-5-2 (high reasoning)", |
138299 | 138299 | "os_model": false, |
138300 | 138300 | "os_system": true, |
138301 | 138301 | "per_instance_details": { |
@@ -140874,7 +140874,7 @@ |
140874 | 140874 | ], |
140875 | 140875 | "logs": "s3://swe-bench-submissions/verified/20251211_mini-v1.17.2_gpt-5.2-2025-12-11-high/logs", |
140876 | 140876 | "mini-swe-agent_version": "1.17.2", |
140877 | | - "name": "mini-SWE-agent + GPT-5.2 (2025-12-11)", |
| 140877 | + "name": "mini-SWE-agent + GPT-5.2 (2025-12-11) (high reasoning)", |
140878 | 140878 | "os_model": false, |
140879 | 140879 | "os_system": true, |
140880 | 140880 | "per_instance_details": { |
@@ -143404,7 +143404,7 @@ |
143404 | 143404 | ], |
143405 | 143405 | "logs": null, |
143406 | 143406 | "mini-swe-agent_version": "2.0.0", |
143407 | | - "name": "mini-SWE-agent + Claude 4.5 Sonnet", |
| 143407 | + "name": "mini-SWE-agent + Claude 4.5 Sonnet (high reasoning)", |
143408 | 143408 | "os_model": false, |
143409 | 143409 | "os_system": true, |
143410 | 143410 | "per_instance_details": { |
@@ -146088,7 +146088,7 @@ |
146088 | 146088 | ], |
146089 | 146089 | "logs": null, |
146090 | 146090 | "mini-swe-agent_version": "2.0.0", |
146091 | | - "name": "mini-SWE-agent + Kimi K2.5", |
| 146091 | + "name": "mini-SWE-agent + Kimi K2.5 (high reasoning)", |
146092 | 146092 | "os_model": false, |
146093 | 146093 | "os_system": true, |
146094 | 146094 | "per_instance_details": { |
@@ -151298,7 +151298,7 @@ |
151298 | 151298 | ], |
151299 | 151299 | "logs": null, |
151300 | 151300 | "mini-swe-agent_version": "2.0.0", |
151301 | | - "name": "mini-SWE-agent + DeepSeek V3.2", |
| 151301 | + "name": "mini-SWE-agent + DeepSeek V3.2 (high reasoning)", |
151302 | 151302 | "os_model": false, |
151303 | 151303 | "os_system": true, |
151304 | 151304 | "per_instance_details": { |
@@ -161506,7 +161506,7 @@ |
161506 | 161506 | ], |
161507 | 161507 | "logs": null, |
161508 | 161508 | "mini-swe-agent_version": "2.0.0", |
161509 | | - "name": "mini-SWE-agent + Claude 4.5 Haiku", |
| 161509 | + "name": "mini-SWE-agent + Claude 4.5 Haiku (high reasoning)", |
161510 | 161510 | "os_model": false, |
161511 | 161511 | "os_system": true, |
161512 | 161512 | "per_instance_details": { |
|
0 commit comments