truefoundry · LordGameleo · Apr 1, 2026 · Mar 30, 2026
diff --git a/providers/deepinfra/NousResearch/Hermes-3-Llama-3.1-70B.yaml b/providers/deepinfra/NousResearch/Hermes-3-Llama-3.1-70B.yaml
@@ -11,8 +11,15 @@ features:
 limits:
     context_window: 131072
     max_input_tokens: 131072
+    max_output_tokens: 131072
     max_tokens: 131072
+modalities:
+    input:
+        - text
+    output:
+        - text
 mode: chat
 model: NousResearch/Hermes-3-Llama-3.1-70B
 sources:
     - https://deepinfra.com/NousResearch/Hermes-3-Llama-3.1-70B/api
+status: active
diff --git a/providers/deepinfra/Qwen/Qwen2.5-72B-Instruct.yaml b/providers/deepinfra/Qwen/Qwen2.5-72B-Instruct.yaml
@@ -7,6 +7,7 @@ features:
     - tool_choice
     - structured_output
     - prompt_caching
+    - json_output
 limits:
     context_window: 32768
     max_input_tokens: 32768
@@ -22,3 +23,4 @@ model: Qwen/Qwen2.5-72B-Instruct
 sources:
     - https://deepinfra.com/Qwen/Qwen2.5-72B-Instruct/api
     - https://huggingface.co/Qwen/Qwen2.5-72B-Instruct
+status: active
diff --git a/providers/deepinfra/Qwen/Qwen2.5-VL-32B-Instruct.yaml b/providers/deepinfra/Qwen/Qwen2.5-VL-32B-Instruct.yaml
@@ -20,3 +20,4 @@ model: Qwen/Qwen2.5-VL-32B-Instruct
 sources:
     - https://deepinfra.com/Qwen/Qwen2.5-VL-32B-Instruct/api
     - https://deepinfra.com/docs/advanced/function_calling
+status: active
diff --git a/providers/deepinfra/Qwen/Qwen3-14B.yaml b/providers/deepinfra/Qwen/Qwen3-14B.yaml
@@ -7,11 +7,18 @@ features:
     - tool_choice
     - system_messages
     - structured_output
+    - json_output
 limits:
     context_window: 40960
+modalities:
+    input:
+        - text
+    output:
+        - text
 mode: chat
 model: Qwen/Qwen3-14B
 sources:
     - https://deepinfra.com/Qwen/Qwen3-14B
     - https://deepinfra.com/Qwen/Qwen3-14B/api
+status: active
 thinking: true
diff --git a/providers/deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507.yaml b/providers/deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507.yaml
@@ -6,6 +6,7 @@ features:
     - function_calling
     - tool_choice
     - structured_output
+    - json_output
     - prompt_caching
 limits:
     context_window: 262144
@@ -23,3 +24,4 @@ sources:
     - https://deepinfra.com/Qwen/Qwen3-235B-A22B-Instruct-2507/api
     - https://api.deepinfra.com/models/Qwen/Qwen3-235B-A22B-Instruct-2507
     - https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507
+status: active
diff --git a/providers/deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507.yaml b/providers/deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507.yaml
@@ -22,4 +22,5 @@ model: Qwen/Qwen3-235B-A22B-Thinking-2507
 sources:
     - https://deepinfra.com/Qwen/Qwen3-235B-A22B-Thinking-2507
     - https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507
+status: active
 thinking: true
diff --git a/providers/deepinfra/Qwen/Qwen3-30B-A3B.yaml b/providers/deepinfra/Qwen/Qwen3-30B-A3B.yaml
@@ -9,6 +9,7 @@ features:
     - prompt_caching
 limits:
     context_window: 40960
+    max_output_tokens: 40960
     max_tokens: 40960
 modalities:
     input:

diff --git a/providers/deepinfra/Qwen/Qwen3-32B.yaml b/providers/deepinfra/Qwen/Qwen3-32B.yaml
@@ -8,8 +8,8 @@ features:
 limits:
     context_window: 40960
     max_input_tokens: 40960
-    max_output_tokens: 40960
-    max_tokens: 40960
+    max_output_tokens: 16384
+    max_tokens: 16384
 modalities:
     input:
         - text
@@ -22,4 +22,5 @@ sources:
     - https://deepinfra.com/Qwen/Qwen3-32B/api
     - https://deepinfra.com/docs/advanced/max_tokens_limit
     - https://huggingface.co/Qwen/Qwen3-32B
+status: active
 thinking: true
diff --git a/providers/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.yaml b/providers/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.yaml
@@ -22,3 +22,4 @@ sources:
     - https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct
     - https://deepinfra.com/docs/advanced/max_tokens_limit
     - https://artificialanalysis.ai/models/qwen3-coder-480b-a35b-instruct/providers
+status: active
diff --git a/providers/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct.yaml b/providers/deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct.yaml
@@ -21,3 +21,4 @@ model: Qwen/Qwen3-Coder-480B-A35B-Instruct
 sources:
     - https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct
     - https://deepinfra.com/Qwen/Qwen3-Coder-480B-A35B-Instruct/api
+status: active
diff --git a/providers/deepinfra/Qwen/Qwen3-Max-Thinking.yaml b/providers/deepinfra/Qwen/Qwen3-Max-Thinking.yaml
@@ -20,7 +20,10 @@ costs:
               - cost_per_token: 0.000015
                 from: 128000
 features:
+    - function_calling
+    - json_output
     - prompt_caching
+    - structured_output
 limits:
     context_window: 256000
     max_output_tokens: 16384

diff --git a/providers/deepinfra/Qwen/Qwen3-Max.yaml b/providers/deepinfra/Qwen/Qwen3-Max.yaml
@@ -7,6 +7,8 @@ costs:
           cache_read:
               - cost_per_token: 4.8e-7
                 from: 32000
+              - cost_per_token: 6.e-7
+                from: 128000
           input:
               - cost_per_token: 0.0000024
                 from: 32000
@@ -19,7 +21,9 @@ costs:
                 from: 128000
 features:
     - function_calling
+    - json_output
     - prompt_caching
+    - structured_output
     - tool_choice
 limits:
     context_window: 262144
@@ -37,4 +41,4 @@ sources:
     - https://www.alibabacloud.com/help/en/model-studio/models
     - https://www.alibabacloud.com/help/en/model-studio/billing-for-model-studio
     - https://openrouter.ai/qwen/qwen3-max/api
-thinking: true
+status: active
diff --git a/providers/deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct.yaml b/providers/deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct.yaml
@@ -8,8 +8,8 @@ features:
 limits:
     context_window: 262144
     max_input_tokens: 262144
-    max_output_tokens: 262144
-    max_tokens: 262144
+    max_output_tokens: 16384
+    max_tokens: 16384
 modalities:
     input:
         - text

diff --git a/providers/deepinfra/Qwen/Qwen3-VL-235B-A22B-Instruct.yaml b/providers/deepinfra/Qwen/Qwen3-VL-235B-A22B-Instruct.yaml
@@ -4,6 +4,7 @@ costs:
       output_cost_per_token: 8.8e-7
       region: "*"
 features:
+    - function_calling
     - prompt_caching
 limits:
     context_window: 262144
@@ -15,6 +16,8 @@ modalities:
         - text
         - image
         - video
+    output:
+        - text
 mode: chat
 model: Qwen/Qwen3-VL-235B-A22B-Instruct
 sources:

diff --git a/providers/deepinfra/Qwen/Qwen3-VL-30B-A3B-Instruct.yaml b/providers/deepinfra/Qwen/Qwen3-VL-30B-A3B-Instruct.yaml
@@ -24,4 +24,5 @@ sources:
     - https://deepinfra.com/Qwen/Qwen3-VL-30B-A3B-Instruct/api
     - https://deepinfra.com/docs/advanced/max_tokens_limit
     - https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct
+status: active
 thinking: true
diff --git a/providers/deepinfra/Sao10K/L3.1-70B-Euryale-v2.2.yaml b/providers/deepinfra/Sao10K/L3.1-70B-Euryale-v2.2.yaml
@@ -5,9 +5,16 @@ costs:
 features:
     - structured_output
     - prompt_caching
+    - json_output
 limits:
     context_window: 131072
+modalities:
+    input:
+        - text
+    output:
+        - text
 mode: chat
 model: Sao10K/L3.1-70B-Euryale-v2.2
 sources:
     - https://deepinfra.com/Sao10K/L3.1-70B-Euryale-v2.2
+status: active
diff --git a/providers/deepinfra/Sao10K/L3.3-70B-Euryale-v2.3.yaml b/providers/deepinfra/Sao10K/L3.3-70B-Euryale-v2.3.yaml
@@ -2,13 +2,20 @@ costs:
     - input_cost_per_token: 8.5e-7
       output_cost_per_token: 8.5e-7
       region: "*"
+features:
+    - json_output
+    - structured_output
 limits:
     context_window: 131072
     max_input_tokens: 131072
-    max_output_tokens: 131072
-    max_tokens: 131072
+modalities:
+    input:
+        - text
+    output:
+        - text
 mode: chat
 model: Sao10K/L3.3-70B-Euryale-v2.3
 sources:
     - https://deepinfra.com/Sao10K/L3.3-70B-Euryale-v2.3
     - https://deepinfra.com/Sao10K/L3.3-70B-Euryale-v2.3/api
+status: active
diff --git a/providers/deepinfra/allenai/Olmo-3.1-32B-Instruct.yaml b/providers/deepinfra/allenai/Olmo-3.1-32B-Instruct.yaml
@@ -17,3 +17,4 @@ model: allenai/Olmo-3.1-32B-Instruct
 sources:
     - https://deepinfra.com/allenai/Olmo-3.1-32B-Instruct
     - https://huggingface.co/allenai/Olmo-3.1-32B-Instruct
+status: active
diff --git a/providers/deepinfra/anthropic/claude-4-sonnet.yaml b/providers/deepinfra/anthropic/claude-4-sonnet.yaml
@@ -27,4 +27,7 @@ sources:
     - https://deepinfra.com/anthropic/claude-4-sonnet
     - https://platform.claude.com/docs/en/docs/about-claude/models
     - https://platform.claude.com/docs/en/docs/about-claude/pricing
+status: active
+supportedModes:
+    - chat
 thinking: true
diff --git a/providers/deepinfra/deepseek-ai/DeepSeek-OCR.yaml b/providers/deepinfra/deepseek-ai/DeepSeek-OCR.yaml
@@ -19,3 +19,4 @@ sources:
     - https://deepinfra.com/deepseek-ai/DeepSeek-OCR
     - https://huggingface.co/deepseek-ai/DeepSeek-OCR
     - https://github.com/deepseek-ai/DeepSeek-OCR
+status: active
diff --git a/providers/deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo.yaml b/providers/deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo.yaml
@@ -6,6 +6,7 @@ features:
     - function_calling
     - tool_choice
     - structured_output
+    - json_output
 limits:
     context_window: 32768
     max_output_tokens: 32768
@@ -20,4 +21,5 @@ model: deepseek-ai/DeepSeek-R1-0528-Turbo
 sources:
     - https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528-Turbo/api
     - https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528-Turbo
+status: active
 thinking: true
diff --git a/providers/deepinfra/deepseek-ai/DeepSeek-R1-0528.yaml b/providers/deepinfra/deepseek-ai/DeepSeek-R1-0528.yaml
@@ -22,4 +22,5 @@ model: deepseek-ai/DeepSeek-R1-0528
 sources:
     - https://deepinfra.com/deepseek-ai/DeepSeek-R1-0528/api
     - https://huggingface.co/deepseek-ai/DeepSeek-R1-0528/raw/main/config.json
+status: active
 thinking: true
diff --git a/providers/deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.yaml b/providers/deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.yaml
@@ -2,6 +2,10 @@ costs:
     - input_cost_per_token: 7.e-7
       output_cost_per_token: 8.e-7
       region: "*"
+features:
+    - function_calling
+    - json_output
+    - structured_output
 limits:
     context_window: 131072
     max_input_tokens: 131072

diff --git a/providers/deepinfra/deepseek-ai/DeepSeek-V3-0324.yaml b/providers/deepinfra/deepseek-ai/DeepSeek-V3-0324.yaml
@@ -6,11 +6,11 @@ costs:
 features:
     - prompt_caching
     - function_calling
+    - structured_output
+    - json_output
 limits:
     context_window: 163840
     max_input_tokens: 163840
-    max_output_tokens: 163840
-    max_tokens: 163840
 modalities:
     input:
         - text
@@ -20,3 +20,4 @@ mode: chat
 model: deepseek-ai/DeepSeek-V3-0324
 sources:
     - https://deepinfra.com/deepseek-ai/DeepSeek-V3-0324/api
+status: active
diff --git a/providers/deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus.yaml b/providers/deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus.yaml
@@ -10,9 +10,15 @@ limits:
     max_input_tokens: 163840
     max_output_tokens: 163840
     max_tokens: 163840
+modalities:
+    input:
+        - text
+    output:
+        - text
 mode: chat
 model: deepseek-ai/DeepSeek-V3.1-Terminus
 sources:
     - https://deepinfra.com/deepseek-ai/DeepSeek-V3.1-Terminus/api
     - https://openrouter.ai/deepseek/deepseek-v3.1-terminus/api
+status: active
 thinking: true
diff --git a/providers/deepinfra/deepseek-ai/DeepSeek-V3.1.yaml b/providers/deepinfra/deepseek-ai/DeepSeek-V3.1.yaml
@@ -4,15 +4,24 @@ costs:
       output_cost_per_token: 7.9e-7
       region: "*"
 features:
+    - function_calling
     - prompt_caching
+    - structured_output
+    - json_output
 limits:
     context_window: 163840
     max_input_tokens: 163840
     max_output_tokens: 32768
     max_tokens: 32768
+modalities:
+    input:
+        - text
+    output:
+        - text
 mode: chat
 model: deepseek-ai/DeepSeek-V3.1
 sources:
     - https://deepinfra.com/deepseek-ai/DeepSeek-V3.1/api
     - https://deepinfra.com/docs/advanced/max_tokens_limit
+status: active
 thinking: true
diff --git a/providers/deepinfra/deepseek-ai/DeepSeek-V3.2.yaml b/providers/deepinfra/deepseek-ai/DeepSeek-V3.2.yaml
@@ -7,6 +7,7 @@ features:
     - prompt_caching
     - function_calling
     - structured_output
+    - json_output
 limits:
     context_window: 163840
     max_input_tokens: 163840

diff --git a/providers/deepinfra/deepseek-ai/Janus-Pro-1B.yaml b/providers/deepinfra/deepseek-ai/Janus-Pro-1B.yaml
@@ -1,10 +1,12 @@
 costs:
     - input_cost_per_image: 0.0005
       region: "*"
-limits:
-    max_input_tokens: 2600
 modalities:
     input:
+        - text
+        - image
+    output:
+        - text
         - image
 mode: image
 model: deepseek-ai/Janus-Pro-1B

diff --git a/providers/deepinfra/google/gemini-2.5-flash.yaml b/providers/deepinfra/google/gemini-2.5-flash.yaml
@@ -6,9 +6,12 @@ features:
     - function_calling
     - tool_choice
     - structured_output
+    - json_output
 limits:
     context_window: 1000000
     max_input_tokens: 1000000
+    max_output_tokens: 65536
+    max_tokens: 65536
 modalities:
     input:
         - text
@@ -20,4 +23,5 @@ model: google/gemini-2.5-flash
 sources:
     - https://deepinfra.com/google/gemini-2.5-flash
     - https://deepinfra.com/google/gemini-2.5-flash/api
+status: active
 thinking: true
diff --git a/providers/deepinfra/google/gemma-3-4b-it.yaml b/providers/deepinfra/google/gemma-3-4b-it.yaml
@@ -23,3 +23,4 @@ sources:
     - https://deepinfra.com/google/gemma-3-4b-it/api
     - https://deepinfra.com/docs/advanced/max_tokens_limit
     - https://ai.google.dev/gemma/docs/core
+status: active
diff --git a/providers/deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct.yaml b/providers/deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct.yaml
@@ -5,6 +5,9 @@ costs:
 features:
     - function_calling
     - system_messages
+    - json_output
+    - structured_output
+    - prompt_caching
 limits:
     context_window: 131072
     max_input_tokens: 131072

diff --git a/providers/deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo.yaml b/providers/deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo.yaml
@@ -21,3 +21,4 @@ sources:
     - https://deepinfra.com/meta-llama/Llama-3.3-70B-Instruct-Turbo/api
     - https://deepinfra.com/docs/advanced/max_tokens_limit
     - https://deepinfra.com/docs/advanced/function_calling
+status: active