diff --git a/app/_indices/ai-gateway.yaml b/app/_indices/ai-gateway.yaml index 6a84d4d7f5..1a0da14de8 100644 --- a/app/_indices/ai-gateway.yaml +++ b/app/_indices/ai-gateway.yaml @@ -78,7 +78,7 @@ sections: - path: /ai-gateway/ai-providers/**/* - title: MCP traffic gateway items: - - path: /mcp/ + - path: /ai-gateway/mcp/ - title: Secure MCP traffic description: Secure GitHub MCP Server traffic with Kong Gateway and {{site.ai_gateway}} url: /mcp/secure-mcp-traffic/ diff --git a/app/_landing_pages/ai-gateway-2.yaml b/app/_landing_pages/ai-gateway-2.yaml new file mode 100644 index 0000000000..1253a152d9 --- /dev/null +++ b/app/_landing_pages/ai-gateway-2.yaml @@ -0,0 +1,597 @@ +metadata: + title: "{{site.ai_gateway_name}}" + content_type: landing_page + description: This page is an introduction to {{site.ai_gateway}}. + products: + - ai-gateway + - gateway + works_on: + - on-prem + - konnect + tags: + - ai + +rows: + - header: + type: h1 + text: "{{site.ai_gateway}}" + sub_text: Connectivity and governance layer for modern AI-native applications built on top of {{site.base_gateway}} + - columns: + - blocks: + - type: structured_text + config: + header: + text: "Introducing {{site.ai_gateway}}" + blocks: + - type: text + text: | + As AI adoption accelerates, applications are evolving beyond basic LLM calls into complex, multi-actor systems-including user apps, agents, orchestration layers, and context servers that interact with foundation models in real time. + + To support this shift, developers are adopting protocols like Model Context Protocol (MCP) and Agent2Agent (A2A) to standardize how components exchange tools, data, and decisions. + + But infrastructure often falls behind, with challenges around authentication, rate limiting, data security, observability, and constant provider changes. + + {{site.ai_gateway}} addresses these challenges with a high-performance control plane that secures, governs, and observes AI-native systems end to end. In this model, an [**AI Policy**](/ai-gateway/entities/ai-policy/) {% new_in 2.0 %} is a first-class entity that applies governance behavior at a chosen scope (such as an [AI Model](/ai-gateway/entities/ai-model/), [AI Agent](/ai-gateway/entities/ai-agent/), [AI MCP Server](/ai-gateway/entities/ai-mcp-server/), [AI Consumer](/ai-gateway/entities/ai-consumer/), or [AI Consumer Group](/ai-gateway/entities/ai-consumer-group/)). Whether serving LLM traffic, exposing structured context via MCP, or coordinating agents through A2A, {{site.ai_gateway}} ensures scalable, secure, and reliable AI infrastructure. + + + - blocks: + - type: image + config: + url: /assets/images/gateway/ai-gateway-overview.svg + alt_text: Overview of AI gateway + + - columns: + - blocks: + - type: structured_text + config: + header: + text: "Quickstart" + blocks: + - type: text + text: | + [Sign up for {{site.konnect_short_name}}](https://konghq.com/products/kong-konnect/register?utm_medium=referral&utm_source=docs&utm_content=ai-gateway) to get started with {{site.ai_gateway}}. + + Or, launch a [demo instance](/gateway/quickstart-reference/#ai-gateway-quickstart) of {{site.ai_gateway}} running on-prem: + ```sh + curl -Ls https://get.konghq.com/ai | bash + ``` + + - columns: + - blocks: + - type: card + config: + title: LLM quickstart + description: Proxy your first model through {{site.ai_gateway}} with a guided setup. + icon: /assets/icons/llm-quickstart.svg + cta: + url: /ai-gateway/get-started/ + align: end + - blocks: + - type: card + config: + title: MCP quickstart + description: Expose and observe your first tool server over Model Context Protocol. + icon: /assets/icons/mcp-quickstart.svg + cta: + url: /ai-gateway/mcp/ + align: end + - blocks: + - type: card + config: + title: A2A quickstart + description: Route and secure agent-to-agent traffic with protocol-aware observability. + icon: /assets/icons/a2a-quickstart.svg + cta: + url: /ai-gateway/a2a/ + align: end + + - header: + type: h2 + text: Use cases + description: | + Build with {{site.ai_gateway}} and see how to implement common patterns. + column_count: 2 + columns: + - blocks: + - type: card + config: + title: Claude SSO integration + description: Secure Claude with single sign-on authentication through {{site.ai_gateway}}. + icon: /assets/icons/security.svg + cta: + url: /cookbooks/claude-sso/ + align: end + - blocks: + - type: card + config: + title: Basic LLM routing + description: Route requests across multiple LLM providers with failover and load balancing. + icon: /assets/icons/network.svg + cta: + url: /cookbooks/basic-llm-routing/ + align: end + - blocks: + - type: card + config: + title: Voice AI endpoints + description: Proxy voice-to-text and text-to-speech AI services with governance. + icon: /assets/icons/ai.svg + cta: + url: /cookbooks/voice-ai/ + align: end + - blocks: + - type: card + config: + title: External MCP servers + description: Expose and govern tools from external Model Context Protocol servers. + icon: /assets/icons/mcp.svg + cta: + url: /cookbooks/external-mcp-server/ + align: end + + - header: + type: h2 + text: Three traffic types, unified control + description: | + Govern, secure, and observe all AI traffic through dedicated AI Gateway entities {% new_in 2.0 %}. + AI Models manage LLM traffic, AI MCP Servers expose and control tools, and AI Agents route A2A traffic. + Each includes built-in authentication, policy enforcement, and observability. + column_count: 3 + columns: + - blocks: + - type: card + config: + title: LLM traffic + description: Route LLM requests through a provider-agnostic Universal API. Load-balance across providers, transform requests and responses, enforce policies, and collect usage analytics. + icon: /assets/icons/plugins/universal-api.svg + cta: + url: /ai-gateway/entities/ai-model/ + align: end + - blocks: + - type: card + config: + title: MCP traffic + description: Expose and govern tool traffic over Model Context Protocol. Control which agents access which tools, enforce rate limits, authenticate callers, and observe all tool invocations. + icon: /assets/icons/mcp.svg + cta: + url: /ai-gateway/mcp/ + align: end + - blocks: + - type: card + config: + title: A2A traffic + description: Route Agent-to-Agent traffic with protocol-aware security and observability. Rewrite agent cards, extract task state, stream events, and emit structured telemetry. + icon: /assets/icons/plugins/ai-a2a-proxy.png + cta: + url: /ai-gateway/a2a/ + align: end + + - columns: + - blocks: + - type: structured_text + config: + header: + text: "Universal API" + blocks: + - type: text + text: | + Kong's {{site.ai_gateway}} Universal API is configured through entity resources, primarily [AI Models](/ai-gateway/entities/ai-model/) and [AI Providers](/ai-gateway/entities/ai-provider/), to provide a single, standardized interface for interacting with models across multiple providers. + + - [**Easy to use**](/ai-gateway/entities/ai-model/): Define one [AI Model](/ai-gateway/entities/ai-model/) and expose a stable client-facing endpoint. + + - [**Load balancing**](/ai-gateway/load-balancing/): Distribute requests across target models for performance and cost efficiency. + + - [**Retry and fallback**](/ai-gateway/load-balancing/#retry-and-fallback): Route requests based on performance, cost, or availability. + + - [**Policy integration**](/ai-gateway/entities/ai-policy/): Attach [AI Policies](/ai-gateway/entities/ai-policy/) for auth, guardrails, and transformations. + + - blocks: + - type: image + config: + url: /assets/images/gateway/universal-api.svg + alt_text: Overview of AI gateway + + - header: + type: h2 + text: "{{site.ai_gateway}} providers" + description: | + {{site.ai_gateway}} routes AI requests through [provider-agnostic APIs](./#universal-api) by combining [AI Providers](/ai-gateway/entities/ai-provider/) and [AI Models](/ai-gateway/entities/ai-model/) {% new_in 2.0 %}. + [AI Providers](/ai-gateway/entities/ai-provider/) store upstream connectivity and credentials, while [AI Models](/ai-gateway/entities/ai-model/) reference Providers to expose stable client-facing endpoints and routing behavior. + column_count: 4 + columns: + - blocks: + - type: icon_card + config: + title: OpenAI + icon: /assets/icons/openai.svg + cta: + url: /ai-gateway/ai-providers/openai/ + - blocks: + - type: icon_card + config: + title: Anthropic + icon: /assets/icons/anthropic.svg + cta: + url: /ai-gateway/ai-providers/anthropic/ + - blocks: + - type: icon_card + config: + title: Azure AI + icon: /assets/icons/azure.svg + cta: + url: /ai-gateway/ai-providers/azure/ + - blocks: + - type: icon_card + config: + title: More... + icon: /assets/icons/dots.svg + cta: + url: /ai-gateway/ai-providers/ + + - header: + type: h2 + text: "Deploy {{site.ai_gateway}}" + columns: + - header: + type: h3 + text: "Tools to manage {{site.ai_gateway}} {% new_in 2.0 %}" + blocks: + - type: structured_text + config: + blocks: + - type: unordered_list + items: + - "[{{site.ai_gateway}} editor](https://cloud.konghq.com/ai-gateway): GUI for managing all your {{site.ai_gateway}} resources in one place." + # - "[decK](/deck/): Manage {{site.ai_gateway}} and {{site.base_gateway}} configuration through declarative state files." + - "[Control Plane Config API](/api/konnect/control-planes-config/): Manage {{site.ai_gateway}} resources within {{site.konnect_short_name}} Control Planes via an API." + - "[kongctl](/kongctl/): Use Kong's swiss-army knife command line tool for managing and interacting with {{site.ai_gateway}} resources and configurations within {{site.konnect_short_name}}." + - header: + type: h3 + text: Deployment checklist + blocks: + - type: structured_text + config: + blocks: + - type: unordered_list + items: + - "[{{site.ai_gateway}} resource sizing guidelines](/ai-gateway/resource-sizing-guidelines-ai/): Review recommended resource allocation guidelines for {{site.ai_gateway}}." + - "[Deployment topologies](/gateway/deployment-topologies/): Learn about the different ways to deploy {{ site.base_gateway }}." + - "[Hosting options](/gateway/topology-hosting-options/): Decide where you want to host your Data Plane nodes, and whether you want Kong to host them or host them yourself." + + # - columns: + # - blocks: + # - type: card + # config: + # title: AI Model reference + # description: Use an AI Model to define a client-facing AI endpoint with capabilities, formats, and routing behavior. + # icon: /assets/icons/model.svg + # cta: + # url: /ai-gateway/entities/ai-model/ + # align: end + # - blocks: + # - type: card + # config: + # title: AI Provider reference + # description: Use an AI Provider to configure upstream LLM connectivity and authentication, then reuse it across AI Models. + # icon: /assets/icons/provider.svg + # cta: + # url: /ai-gateway/entities/ai-provider/ + # align: end + + - header: + type: h2 + text: "Manage {{site.ai_gateway}} entities" + description: | + {{site.ai_gateway}} is built around AI entities. Create Models, Providers, Agents, and MCP Servers to manage your AI traffic. Attach AI Policies to enforce authentication, rate limiting, guardrails, transformations, and governance across any entity. + column_count: 2 + columns: + - blocks: + - type: card + config: + title: AI Entities {% new_in 2.0 %} + description: Learn about AI Models, AI Providers, AI Agents, AI MCP Servers, and AI Consumers. + cta: + url: /ai-gateway/entities/ + align: end + - blocks: + - type: card + config: + title: AI Policies {% new_in 2.0 %} + description: Attach governance behavior for authentication, guardrails, transformations, and more. + cta: + url: /ai-gateway/entities/ai-policy/ + align: end + + - columns: + - blocks: + - type: structured_text + config: + header: + text: "{{site.ai_gateway}} in {{site.konnect_short_name}}" + blocks: + - type: text + text: | + {{site.konnect_short_name}} provides a [unified control plane](https://cloud.konghq.com/ai-manager) to create, manage, and monitor LLMs + using the {{site.konnect_short_name}} platform. + + Key features include: + * **Routing and [load balancing](/ai-gateway/load-balancing/)**: Configure [AI Models](/ai-gateway/entities/ai-model/) and `target_models` routing across [AI Providers](/ai-gateway/entities/ai-provider/). + * **Streaming and authentication**: Enable streaming responses on [AI Models](/ai-gateway/entities/ai-model/), [AI Agents](/ai-gateway/entities/ai-agent/), and [AI MCP Servers](/ai-gateway/entities/ai-mcp-server/); enforce auth through [AI Policies](/ai-gateway/entities/ai-policy/). + * **Access control**: Use [AI Consumers](/ai-gateway/entities/ai-consumer/) and [AI Consumer Groups](/ai-gateway/entities/ai-consumer-group/), plus ACL fields on [AI Models](/ai-gateway/entities/ai-model/), [AI Agents](/ai-gateway/entities/ai-agent/), and [AI MCP Servers](/ai-gateway/entities/ai-mcp-server/). + * **Usage analytics**: Monitor request and token volumes, track error rates, and measure average latency with historical comparisons. + * **Visual traffic maps**: Explore interactive maps that show how requests flow between clients, entities, and upstreams in real time. + + - blocks: + - type: image + config: + url: /assets/images/konnect/ai-manager.png + alt_text: "{{site.ai_gateway}} Dashboard in Konnect" + + - header: + type: h2 + text: "Governance" + description: | + {{site.ai_gateway}} provides policy-managed governance capabilities attached to [AI Models](/ai-gateway/entities/ai-model/), [AI Agents](/ai-gateway/entities/ai-agent/), [AI MCP Servers](/ai-gateway/entities/ai-mcp-server/), [AI Consumers](/ai-gateway/entities/ai-consumer/), and [AI Consumer Groups](/ai-gateway/entities/ai-consumer-group/). Control how sensitive data flows to AI providers, enforce content safety, transform prompts, and manage how requests are processed. + - header: + type: h3 + text: "Data governance" + description: | + {{site.ai_gateway}} enforces governance on outgoing AI prompts through allow/deny lists, blocking unauthorized requests with 4xx responses. It also provides built-in PII sanitization, automatically detecting and redacting sensitive data across 20 categories and 9 languages. Running privately and self-hosted for full control and compliance, {{site.ai_gateway}} ensures consistent protection without burdening developers, which helps simplify AI adoption at scale. + + For more information, see the full list of [Data Governance](/ai-gateway/ai-data-gov/) capabilities. + columns: + - blocks: + - type: plugin + config: + slug: ai-prompt-guard + - blocks: + - type: plugin + config: + slug: ai-semantic-prompt-guard + - blocks: + - type: plugin + config: + slug: ai-sanitizer + + - header: + type: h3 + text: "Prompt engineering" + description: | + AI systems are built around prompts, and manipulating those prompts is important for successful adoption of the technologies. + Prompt engineering is the methodology of manipulating the linguistic inputs that guide the AI system. + {{site.ai_gateway}} supports policy-managed prompt capabilities that allow you to set defaults and manipulate prompts as they pass through [AI Model](/ai-gateway/entities/ai-model/) or [AI Agent](/ai-gateway/entities/ai-agent/) traffic. + columns: + - blocks: + - type: plugin + config: + slug: ai-prompt-template + - blocks: + - type: plugin + config: + slug: ai-prompt-decorator + + - header: + type: h3 + text: "Guardrails and content safety" + description: | + As a platform owner, you may need to moderate all user request content against reputable services to comply with specific sensitive categories when proxying Large Language Model (LLM) traffic. + {{site.ai_gateway}} provides built-in capabilities to handle content moderation and ensure content safety, that help you enforce compliance and protect your users across AI-powered applications. + column_count: 3 + columns: + - blocks: + - type: plugin + config: + slug: ai-azure-content-safety + - blocks: + - type: plugin + config: + slug: ai-aws-guardrails + - blocks: + - type: plugin + config: + slug: ai-gcp-model-armor + - blocks: + - type: plugin + config: + slug: ai-semantic-prompt-guard + - blocks: + - type: plugin + config: + slug: ai-semantic-response-guard + - blocks: + - type: plugin + config: + slug: ai-lakera-guard + icon: ai-lakera.png + - blocks: + - type: plugin + config: + slug: ai-custom-guardrail + icon: ai-custom-guardrail.png + + - header: + type: h3 + text: "Request transformations" + description: | + {{site.ai_gateway}} allows you to use AI technology to augment other API traffic. + One example is routing API responses through an AI language translation prompt before returning it to the client. + {{site.ai_gateway}} provides two policies that can be used in conjunction with other upstream API services to weave AI capabilities into API request processing. + These policies can be configured independently of AI Proxy. + columns: + - blocks: + - type: plugin + config: + slug: ai-request-transformer + - blocks: + - type: plugin + config: + slug: ai-response-transformer + + + - header: + type: h2 + text: "Automated RAG" + column_count: 1 + columns: + - blocks: + - type: structured_text + config: + blocks: + - type: text + text: | + LLMs are only as reliable as the data they can access. When faced with incomplete information, they often produce confident yet incorrect responses known as “hallucinations.” + These hallucinations occur when LLMs lack the necessary domain knowledge. + To address this, developers use the **Retrieval-augmented Generation (RAG)** approach, which enriches models with relevant data pulled from vector databases. + + While standard RAG workflows are resource-heavy, as they require teams to generate embeddings and manually curate them in vector databases, Kong's **AI RAG Injector** policy automates this entire process. + Instead of embedding RAG logic into every application individually, platform teams can inject vetted data into prompts directly at the gateway layer without any manual interventions. + + - column_count: 2 + columns: + - blocks: + - type: plugin + config: + slug: ai-rag-injector + + - header: + type: h2 + text: "Load balancing" + description: | + {{site.ai_gateway}}'s load balancer routes requests across AI models to optimize for speed, cost, and reliability. + It supports algorithms like consistent hashing, lowest-latency, usage-based, round-robin, and semantic matching, with built-in retries and fallback for resilience. + + The balancer dynamically selects models based on real-time performance and prompt relevance, and works across mixed environments including OpenAI, Mistral, and Llama models. + columns: + - blocks: + - type: card + config: + title: Load balancing + description: Learn about the load balancing algorithms available for {{site.ai_gateway}}. + icon: /assets/icons/load-balance.svg + cta: + url: /ai-gateway/load-balancing/ + align: end + - blocks: + - type: card + config: + title: Retry and fallback + description: Learn about how {{site.ai_gateway}} load balancers handle retry and fallback. + icon: /assets/icons/redo.svg + cta: + url: /ai-gateway/load-balancing/#retry-and-fallback + align: end + - header: + type: h2 + text: "LLM cost control" + description: | + The {{site.ai_gateway}} helps reduce LLM usage costs by giving you control over how prompts are built and routed. + You can compress and structure prompts efficiently using AI Compressor, RAG Injector, and AI Prompt Decorator policies. + For further savings, you can use AI Proxy Advanced to route requests across OpenAI models based on semantic similarity. + columns: + - blocks: + - type: plugin + config: + slug: ai-prompt-compressor + - blocks: + - type: card + config: + title: Meter, bill, and monetize the entire AI connectivity data path + description: Track LLM token usage across models and prompt types for accurate billing and cost control. Create pricing plans based on input, output, and system token consumption, then automate invoicing with Stripe or ERP integrations. + icon: /assets/icons/analytics.svg + cta: + url: /metering-and-billing/ + align: end + - blocks: + - type: card + config: + title: Save LLM usage costs with semantic load balancing + description: Use semantic load balancing to optimize LLM usage and reduce costs by intelligently routing chat requests across multiple OpenAI models based on semantic similarity. + icon: /assets/icons/money.svg + cta: + url: /how-to/use-semantic-load-balancing + align: end + - header: + type: h2 + text: "Observability and metrics" + description: | + {{site.ai_gateway}} provides multiple approaches to monitor LLM traffic and operations. + Track token usage, latency, and costs through audit logs and metrics exporters. + Instrument request flows with OpenTelemetry to trace [AI Model](/ai-gateway/entities/ai-model/), [AI MCP Server](/ai-gateway/entities/ai-mcp-server/), and [AI Agent](/ai-gateway/entities/ai-agent/) traffic across your infrastructure. + Use {{site.konnect_short_name}} Advanced Analytics for pre-built dashboards, or integrate with your existing observability stack. + column_count: 3 + columns: + - blocks: + - type: card + config: + title: Audit log + description: Learn about {{site.ai_gateway}} logging capabilities. + icon: /assets/icons/audit.svg + cta: + url: /ai-gateway/ai-audit-log-reference/ + align: end + - blocks: + - type: card + config: + title: '{{site.konnect_short_name}} {{site.observability}}' + description: Visualize LLM metrics in {{site.konnect_short_name}}. + icon: /assets/icons/analytics.svg + cta: + url: /observability/ + align: end + - blocks: + - type: card + config: + title: LLM metrics + description: Expose and visualize LLM metrics. + icon: /assets/icons/monitor.svg + cta: + url: /ai-gateway/monitor-ai-llm-metrics/ + align: end + - blocks: + - type: card + config: + title: Gen AI OTLP span attributes + description: Per-request OpenTelemetry span attributes for AI traffic. + icon: /assets/icons/opentelemetry.svg + cta: + url: /ai-gateway/llm-open-telemetry/ + align: end + - blocks: + - type: card + config: + title: Gen AI OTLP metrics + description: Aggregated OpenTelemetry metrics for AI, MCP, and A2A traffic. + icon: /assets/icons/opentelemetry.svg + cta: + url: /ai-gateway/ai-otel-metrics/ + align: end + + - header: + type: h2 + text: How-to Guides + + columns: + - blocks: + - type: how_to_list + config: + tags: + - ai + quantity: 5 + allow_empty: true + + - header: + text: "Frequently Asked Questions" + type: h2 + columns: + - blocks: + - type: faqs + config: + - q: Is {{site.ai_gateway}} {% new_in 2.0 %} available for all deployment modes? + a: | + {{site.ai_gateway}} capabilities (AI, MCP, and A2A traffic management) are available across [deployment modes](/gateway/deployment-topologies/), including {{site.konnect_short_name}}, self-hosted traditional, hybrid, and DB-less, and on Kubernetes via the [{{site.kic_product_name}}](/kubernetes-ingress-controller/). + + - q: Why should I use {{site.ai_gateway}} {% new_in 2.0 %} instead of adding the LLM's API behind {{site.base_gateway}}? + a: | + If you just add an LLM's API behind {{site.base_gateway}}, you can only interact at the API level with internal traffic. + With {{site.ai_gateway}} AI Policies and runtime components, {{site.base_gateway}} can understand the prompts that are being sent through the gateway. + AI Policies can inspect the body and provide more specific AI capabilities to your traffic. diff --git a/app/_landing_pages/ai-gateway.yaml b/app/_landing_pages/ai-gateway.yaml index bcdde59694..49e11f3936 100644 --- a/app/_landing_pages/ai-gateway.yaml +++ b/app/_landing_pages/ai-gateway.yaml @@ -31,7 +31,7 @@ rows: But infrastructure often falls behind, with challenges around authentication, rate limiting, data security, observability, and constant provider changes. - {{site.ai_gateway}} addresses these challenges with a high-performance control plane that secures, governs, and observes AI-native systems end to end. Whether serving LLM traffic, exposing structured context via MCP, or coordinating agents through A2A, {{site.ai_gateway}} ensures scalable, secure, and reliable AI infrastructure. + {{site.ai_gateway}} addresses these challenges with a high-performance control plane that secures, governs, and observes AI-native systems end to end. In this model, an [**AI Policy**](/ai-gateway/entities/ai-policy/) {% new_in 2.0 %} is a first-class entity that applies governance behavior at a chosen scope (such as an [AI Model](/ai-gateway/entities/ai-model/), [AI Agent](/ai-gateway/entities/ai-agent/), [AI MCP Server](/ai-gateway/entities/ai-mcp-server/), [AI Consumer](/ai-gateway/entities/ai-consumer/), or [AI Consumer Group](/ai-gateway/entities/ai-consumer-group/)). Whether serving LLM traffic, exposing structured context via MCP, or coordinating agents through A2A, {{site.ai_gateway}} ensures scalable, secure, and reliable AI infrastructure. - blocks: @@ -61,7 +61,7 @@ rows: - type: card config: title: Get started - description: Run the {{site.base_gateway}} quickstart and enable the AI Proxy plugin. + description: Proxy your first LLM through {{site.ai_gateway}} with our guided setup. icon: /assets/icons/rocket.svg cta: url: /ai-gateway/get-started/ @@ -69,37 +69,73 @@ rows: - blocks: - type: card config: - title: Video tutorials - description: Learn how to use AI plugins with video tutorials. - icon: /assets/icons/graduation.svg + title: AI Gateway entities {% new_in 2.0 %} + description: Learn the AI Agents, AI MCP Servers, and how resources map to each gateway type. + icon: /assets/icons/linked-services.svg cta: - url: https://konghq.com/products/kong-ai-gateway#videos + url: /ai-gateway/entities/ align: end - blocks: - type: card config: - title: AI plugins - description: Learn about all the AI plugins. + title: AI Policies {% new_in 2.0 %} + description: Attach policy-managed governance behavior to AI Models, AI Agents, AI MCP Servers, AI Consumers, and AI Consumer Groups. icon: /assets/icons/plug.svg cta: - url: /plugins/?category=ai + url: /ai-gateway/entities/ai-policy/ align: end - blocks: - type: card config: - title: Cookbooks + title: AI Cookbooks description: End-to-end recipes for building real-world AI scenarios. icon: /assets/icons/cookbooks/ai.svg cta: url: /cookbooks/ align: end + - header: + type: h2 + text: Three traffic types, unified control + description: | + Govern, secure, and observe all AI traffic through dedicated AI Gateway entities {% new_in 2.0 %}. + AI Models manage LLM traffic, AI MCP Servers expose and control tools, and AI Agents route A2A traffic. + Each includes built-in authentication, policy enforcement, and observability. + column_count: 3 + columns: + - blocks: + - type: card + config: + title: LLM traffic + description: Route LLM requests through a provider-agnostic Universal API. Load-balance across providers, transform requests and responses, enforce policies, and collect usage analytics. + icon: /assets/icons/plugins/universal-api.svg + cta: + url: /ai-gateway/entities/ai-model/ + align: end + - blocks: + - type: card + config: + title: MCP traffic + description: Expose and govern tool traffic over Model Context Protocol. Control which agents access which tools, enforce rate limits, authenticate callers, and observe all tool invocations. + icon: /assets/icons/mcp.svg + cta: + url: /ai-gateway/mcp/ + align: end + - blocks: + - type: card + config: + title: A2A traffic + description: Route Agent-to-Agent traffic with protocol-aware security and observability. Rewrite agent cards, extract task state, stream events, and emit structured telemetry. + icon: /assets/icons/plugins/ai-a2a-proxy.png + cta: + url: /ai-gateway/a2a/ + align: end - header: type: h2 text: "{{site.ai_gateway}} providers" description: | - Kong AI Gateway routes AI requests to various providers through a [provider-agnostic API](./#universal-api). - This normalized API layer provides multiple benefits: client applications stay decoupled from provider-specific APIs, credentials are managed centrally, and request routing can be dynamic to optimize for cost, latency, or availability. + {{site.ai_gateway}} routes AI requests through [provider-agnostic APIs](./#universal-api) by combining [AI Providers](/ai-gateway/entities/ai-provider/) and [AI Models](/ai-gateway/entities/ai-model/) {% new_in 2.0 %}. + [AI Providers](/ai-gateway/entities/ai-provider/) store upstream connectivity and credentials, while [AI Models](/ai-gateway/entities/ai-model/) reference Providers to expose stable client-facing endpoints and routing behavior. column_count: 4 columns: - blocks: @@ -143,11 +179,11 @@ rows: using the {{site.konnect_short_name}} platform. Key features include: - * **Routing and [load balancing](/ai-gateway/load-balancing/)**: Assign Gateway Services and define how traffic is distributed across models. - * **Streaming and authentication**: Enable streaming responses and manage authentication through the {{site.ai_gateway}}. - * **Access control**: Create and apply access tiers to control how clients interact with LLMs. + * **Routing and [load balancing](/ai-gateway/load-balancing/)**: Configure [AI Models](/ai-gateway/entities/ai-model/) and `target_models` routing across [AI Providers](/ai-gateway/entities/ai-provider/). + * **Streaming and authentication**: Enable streaming responses on [AI Models](/ai-gateway/entities/ai-model/), [AI Agents](/ai-gateway/entities/ai-agent/), and [AI MCP Servers](/ai-gateway/entities/ai-mcp-server/); enforce auth through [AI Policies](/ai-gateway/entities/ai-policy/). + * **Access control**: Use [AI Consumers](/ai-gateway/entities/ai-consumer/) and [AI Consumer Groups](/ai-gateway/entities/ai-consumer-group/), plus ACL fields on [AI Models](/ai-gateway/entities/ai-model/), [AI Agents](/ai-gateway/entities/ai-agent/), and [AI MCP Servers](/ai-gateway/entities/ai-mcp-server/). * **Usage analytics**: Monitor request and token volumes, track error rates, and measure average latency with historical comparisons. - * **Visual traffic maps**: Explore interactive maps that show how requests flow between clients and models in real time. + * **Visual traffic maps**: Explore interactive maps that show how requests flow between clients, entities, and upstreams in real time. - blocks: - type: image @@ -171,41 +207,49 @@ rows: - "[Hosting options](/gateway/topology-hosting-options/): Decide where you want to host your Data Plane nodes, and whether you want Kong to host them or host them yourself." - header: type: h2 - text: "Tools to manage {{site.ai_gateway}}" + text: "Tools to manage {{site.ai_gateway}} {% new_in 2.0 %}" blocks: - type: structured_text config: blocks: - type: unordered_list items: - - "[{{site.ai_gateway}} editor](https://cloud.konghq.com/ai-manager): GUI for managing all your {{site.ai_gateway}} resources in one place." + - "[{{site.ai_gateway}} editor](https://cloud.konghq.com/ai-gateway): GUI for managing all your {{site.ai_gateway}} resources in one place." - "[decK](/deck/): Manage {{site.ai_gateway}} and {{site.base_gateway}} configuration through declarative state files." - - "[Terraform](/terraform/): Manage infrastructure as code and automated deployments to streamline setup and configuration of {{site.konnect_short_name}} and {{site.base_gateway}}." - - "[KIC](/kubernetes-ingress-controller/): Manage ingress traffic and routing rules for your services." - - "[{{site.base_gateway}} Admin API](/api/gateway/admin-ee/): Manage on-prem {{site.base_gateway}} entities via an API." - - "[Control Plane Config API](/api/konnect/control-planes-config/): Manage {{site.base_gateway}} entities within {{site.konnect_short_name}} Control Planes via an API." + - "[Control Plane Config API](/api/konnect/control-planes-config/): Manage {{site.ai_gateway}} resources within {{site.konnect_short_name}} Control Planes via an API." + - "[kongctl](/kongctl/): Use Kong's swiss-army knife command line tool for managing and interacting with {{site.ai_gateway}} resources and configurations within {{site.konnect_short_name}}." - header: type: h2 text: "{{site.ai_gateway}} capabilities" description: | - You can enable the {{site.ai_gateway}} features through a set of modern and specialized plugins, using the same model you use for any other {{site.base_gateway}} plugin. - When deployed alongside existing {{site.base_gateway}} plugins, {{site.base_gateway}} users can quickly assemble a sophisticated AI management platform without custom code or deploying new and unfamiliar tools. + You can enable {{site.ai_gateway}} capabilities through policy-managed features and AI-specific runtime components. + AI Policies let you apply reusable behavior across [AI Models](/ai-gateway/entities/ai-model/), [AI Agents](/ai-gateway/entities/ai-agent/), [AI MCP Servers](/ai-gateway/entities/ai-mcp-server/), [AI Consumers](/ai-gateway/entities/ai-consumer/), and [AI Consumer Groups](/ai-gateway/entities/ai-consumer-group/) without custom code. + For AI Policy scopes and attachment details, see the [AI Policy reference](/ai-gateway/entities/ai-policy/). column_count: 3 columns: - blocks: - type: card config: - title: Universal API - description: Route client requests to various AI providers + title: AI Models {% new_in 2.0 %} + description: Define client-facing AI endpoints, capabilities, and routing behavior. icon: /assets/icons/plugins/universal-api.svg cta: - url: ./#universal-api + url: /ai-gateway/entities/ai-model/ + align: end + - blocks: + - type: card + config: + title: AI Providers {% new_in 2.0 %} + description: Use AI Provider to configure LLM provider connectivity, credentials, and provider types. + icon: /assets/icons/plugins/ai-proxy-advanced.png + cta: + url: /ai-gateway/entities/ai-provider/ align: end - blocks: - type: card config: title: Rate limiting - description: Manage traffic to your LLM API + description: Apply rate-limiting policies to AI Models, AI Agents, AI MCP Servers, AI Consumers, and AI Consumer Groups. icon: /assets/icons/plugins/ai-rate-limiting-advanced.png cta: url: /plugins/ai-rate-limiting-advanced/ @@ -231,8 +275,8 @@ rows: - blocks: - type: card config: - title: MCP traffic gateway - description: Gain control and visibility over AI agent infrastructure with {{site.ai_gateway}}-driven MCP capabilities + title: AI MCP Server governance + description: Control and observe MCP tool traffic with authentication, access policies, and telemetry. icon: /assets/icons/mcp.svg cta: url: /mcp @@ -240,8 +284,8 @@ rows: - blocks: - type: card config: - title: A2A traffic gateway - description: Secure, govern, and observe agent-to-agent (A2A) traffic with {{site.ai_gateway}}'s A2A protocol support + title: AI Agent routing and telemetry + description: Secure and observe agent-to-agent traffic with protocol-aware routing and analytics. icon: /assets/icons/plugins/ai-a2a-proxy.png cta: url: /ai-gateway/a2a/ @@ -249,7 +293,7 @@ rows: - blocks: - type: card config: - title: Automated RAG injection + title: AI RAG injection description: Automatically embed RAG logic into your workflows icon: /assets/icons/plugins/ai-rag-injector.png cta: @@ -259,7 +303,7 @@ rows: - type: card config: title: Data governance - description: Use AI plugins to control AI data and usage + description: Use AI policies to control AI data and usage icon: /assets/icons/security.svg cta: url: ./#data-governance @@ -268,7 +312,7 @@ rows: - type: card config: title: Guardrails - description: Inspect requests and configure content safety and moderation + description: Attach guardrail policies to inspect requests and enforce content safety. icon: /assets/icons/lock.svg cta: url: ./#guardrails-and-content-safety @@ -277,7 +321,7 @@ rows: - type: card config: title: Prompt engineering - description: Create prompt templates and manipulate client prompts + description: Apply AI Prompt policies that template and transform client prompts. icon: /assets/icons/code.svg cta: url: ./#prompt-engineering @@ -295,7 +339,7 @@ rows: - type: card config: title: Audit log - description: Learn about {{site.ai_gateway}} logging capabilities + description: Track entity-level policy decisions and AI, MCP, and A2A request activity. icon: /assets/icons/audit.svg cta: url: /ai-gateway/ai-audit-log-reference/ @@ -395,15 +439,15 @@ rows: blocks: - type: text text: | - Kong's {{site.ai_gateway}} Universal API, delivered through the [AI Proxy](/plugins/ai-proxy/) and [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugins, simplifies AI model integration by providing a single, standardized interface for interacting with models across multiple providers. + Kong's {{site.ai_gateway}} Universal API is configured through entity resources, primarily [AI Models](/ai-gateway/entities/ai-model/) and [AI Providers](/ai-gateway/entities/ai-provider/), to provide a single, standardized interface for interacting with models across multiple providers. - - [**Easy to use**](/plugins/ai-proxy/examples/openai-chat-route/): Configure once and access any AI model with minimal integration effort. + - [**Easy to use**](/ai-gateway/entities/ai-model/): Define one [AI Model](/ai-gateway/entities/ai-model/) and expose a stable client-facing endpoint. - - [**Load balancing**](/plugins/ai-proxy-advanced/#load-balancing): Automatically distribute AI requests across multiple models or providers for optimal performance and cost efficiency. + - [**Load balancing**](/ai-gateway/load-balancing/): Distribute requests across target models for performance and cost efficiency. - - [**Retry and fallback**](/plugins/ai-proxy-advanced/#retry-and-fallback): Optimize AI requests based on model performance, cost, or other factors. + - [**Retry and fallback**](/ai-gateway/load-balancing/#retry-and-fallback): Route requests based on performance, cost, or availability. - - [**Cross-plugin integration**](/how-to/visualize-ai-gateway-metrics-with-kibana/): Leverage AI in non-AI API workflows through other Kong Gateway plugins. + - [**Policy integration**](/ai-gateway/entities/ai-policy/): Attach [AI Policies](/ai-gateway/entities/ai-policy/) for auth, guardrails, and transformations. - blocks: - type: image @@ -412,13 +456,23 @@ rows: alt_text: Overview of AI gateway - columns: - blocks: - - type: plugin + - type: card config: - slug: ai-proxy + title: AI Model reference + description: Use an AI Model to define a client-facing AI endpoint with capabilities, formats, and routing behavior. + icon: /assets/icons/model.svg + cta: + url: /ai-gateway/entities/ai-model/ + align: end - blocks: - - type: plugin + - type: card config: - slug: ai-proxy-advanced + title: AI Provider reference + description: Use an AI Provider to configure upstream LLM connectivity and authentication, then reuse it across AI Models. + icon: /assets/icons/provider.svg + cta: + url: /ai-gateway/entities/ai-provider/ + align: end - header: type: h2 @@ -439,14 +493,14 @@ rows: blocks: - type: text text: | - {{site.ai_gateway}} helps mitigate these challenges by offering a suite of plugins that extend beyond basic AI traffic management. + {{site.ai_gateway}} helps mitigate these challenges by offering policy-managed capabilities attached to [AI Models](/ai-gateway/entities/ai-model/), [AI Agents](/ai-gateway/entities/ai-agent/), [AI MCP Servers](/ai-gateway/entities/ai-mcp-server/), [AI Consumers](/ai-gateway/entities/ai-consumer/), and [AI Consumer Groups](/ai-gateway/entities/ai-consumer-group/). - [**Data governance**](./#data-governance): Control how sensitive information is handled and shared with AI models. - [**Prompt engineering**](./#prompt-engineering): Customize and optimize prompts to deliver consistent, high-quality AI outputs. - [**Guardrails and content safety**](./#guardrails-and-content-safety): Enforce policies to prevent inappropriate, unsafe, or non-compliant responses. - - [**Automated RAG injection**](./#automated-rag): Seamlessly inject relevant, vetted data into AI prompts without manual RAG implementations. + - [**Automated RAG injection**](./#automated-rag): Inject relevant, vetted data into AI prompts without manual RAG implementations. - [**Load balancing**](./#load-balancing): Distribute AI traffic efficiently across multiple model endpoints to ensure performance and reliability. - - [**LLM cost control**](./#llm-cost-control): Use the AI Compressor, RAG Injector, and Prompt Decorator to compress and structure prompts efficiently. Combine with AI Proxy Advanced to route requests across OpenAI models by semantic similarity—optimizing for cost and performance. + - [**LLM cost control**](./#llm-cost-control): policies to compress and structure prompts efficiently. Combine with AI Proxy Advanced to route requests across OpenAI models by semantic similarity—optimizing for cost and performance. - header: type: h3 text: "Data governance" @@ -474,7 +528,7 @@ rows: description: | AI systems are built around prompts, and manipulating those prompts is important for successful adoption of the technologies. Prompt engineering is the methodology of manipulating the linguistic inputs that guide the AI system. - {{site.ai_gateway}} supports a set of plugins that allow you to create a simplified and enhanced experience by setting default prompts or manipulating prompts from clients as they pass through the gateway. + {{site.ai_gateway}} supports policy-managed prompt capabilities that allow you to set defaults and manipulate prompts as they pass through [AI Model](/ai-gateway/entities/ai-model/) or [AI Agent](/ai-gateway/entities/ai-agent/) traffic. columns: - blocks: - type: plugin @@ -489,7 +543,7 @@ rows: type: h3 text: "Guardrails and content safety" description: | - As a platform owner, you may need to moderate all user request content against reputable services to comply with specific sensitive categories when proxying Large Language Model (LLM) traffic. + As a platform owner, you may need to moderate all user request content against reputable services to comply with specific sensitive categories when proxying Large Language Model (LLM) traffic. {{site.ai_gateway}} provides built-in capabilities to handle content moderation and ensure content safety, that help you enforce compliance and protect your users across AI-powered applications. column_count: 3 columns: @@ -523,15 +577,6 @@ rows: config: slug: ai-custom-guardrail icon: ai-custom-guardrail.png - - blocks: - - type: card - config: - title: Amazon Bedrock guardrails - description: Include your Amazon Bedrock guardrails configuration in AI Proxy requests - icon: /assets/icons/bedrock.svg - cta: - url: /plugins/ai-proxy/#supported-native-llm-formats - align: end - header: type: h3 @@ -539,8 +584,8 @@ rows: description: | {{site.ai_gateway}} allows you to use AI technology to augment other API traffic. One example is routing API responses through an AI language translation prompt before returning it to the client. - {{site.ai_gateway}} provides two plugins that can be used in conjunction with other upstream API services to weave AI capabilities into API request processing. - These plugins can be configured independently of the AI Proxy plugin. + {{site.ai_gateway}} provides two policies that can be used in conjunction with other upstream API services to weave AI capabilities into API request processing. + These policies can be configured independently of AI Proxy. columns: - blocks: - type: plugin @@ -563,12 +608,15 @@ rows: blocks: - type: text text: | - LLMs are only as reliable as the data they can access. When faced with incomplete information, they often produce confident yet incorrect responses known as “hallucinations.” + LLMs are only as reliable as the data they can access. When faced with incomplete information, they often produce confident yet incorrect responses known as “hallucinations.” These hallucinations occur when LLMs lack the necessary domain knowledge. To address this, developers use the **Retrieval-augmented Generation (RAG)** approach, which enriches models with relevant data pulled from vector databases. - While standard RAG workflows are resource-heavy, as they require teams to generate embeddings and manually curate them in vector databases, Kong's **AI RAG Injector** plugin automates this entire process. + While standard RAG workflows are resource-heavy, as they require teams to generate embeddings and manually curate them in vector databases, Kong's **AI RAG Injector** policy automates this entire process. Instead of embedding RAG logic into every application individually, platform teams can inject vetted data into prompts directly at the gateway layer without any manual interventions. + + - column_count: 2 + columns: - blocks: - type: plugin config: @@ -578,9 +626,9 @@ rows: type: h3 text: "Load balancing" description: | - {{site.ai_gateway}}'s load balancer routes requests across AI models to optimize for speed, cost, and reliability. - It supports algorithms like consistent hashing, lowest-latency, usage-based, round-robin, and semantic matching, with built-in retries and fallback for resilience {% new_in 3.10 %}. - + {{site.ai_gateway}}'s load balancer routes requests across AI models to optimize for speed, cost, and reliability. + It supports algorithms like consistent hashing, lowest-latency, usage-based, round-robin, and semantic matching, with built-in retries and fallback for resilience. + The balancer dynamically selects models based on real-time performance and prompt relevance, and works across mixed environments including OpenAI, Mistral, and Llama models. columns: - blocks: @@ -605,8 +653,8 @@ rows: type: h3 text: "LLM cost control" description: | - The {{site.ai_gateway}} helps reduce LLM usage costs by giving you control over how prompts are built and routed. - You can compress and structure prompts efficiently using the AI Compressor, RAG Injector, and AI Prompt Decorator plugins. + The {{site.ai_gateway}} helps reduce LLM usage costs by giving you control over how prompts are built and routed. + You can compress and structure prompts efficiently using AI Compressor, RAG Injector, and AI Prompt Decorator policies. For further savings, you can use AI Proxy Advanced to route requests across OpenAI models based on semantic similarity. columns: - blocks: @@ -635,9 +683,9 @@ rows: type: h3 text: "Observability and metrics" description: | - {{site.ai_gateway}} provides multiple approaches to monitor LLM traffic and operations. - Track token usage, latency, and costs through audit logs and metrics exporters. - Instrument request flows with OpenTelemetry to trace prompts and responses across your infrastructure. + {{site.ai_gateway}} provides multiple approaches to monitor LLM traffic and operations. + Track token usage, latency, and costs through audit logs and metrics exporters. + Instrument request flows with OpenTelemetry to trace [AI Model](/ai-gateway/entities/ai-model/), [AI MCP Server](/ai-gateway/entities/ai-mcp-server/), and [AI Agent](/ai-gateway/entities/ai-agent/) traffic across your infrastructure. Use {{site.konnect_short_name}} Advanced Analytics for pre-built dashboards, or integrate with your existing observability stack. column_count: 3 columns: @@ -707,12 +755,12 @@ rows: - blocks: - type: faqs config: - - q: Is {{site.ai_gateway}} available for all deployment modes? + - q: Is {{site.ai_gateway}} {% new_in 2.0 %} available for all deployment modes? a: | - Yes, AI plugins are supported in all [deployment modes](/gateway/deployment-topologies/), including {{site.konnect_short_name}}, self-hosted traditional, hybrid, and DB-less, and on Kubernetes via the [{{site.kic_product_name}}](/kubernetes-ingress-controller/). + {{site.ai_gateway}} capabilities (AI, MCP, and A2A traffic management) are available across [deployment modes](/gateway/deployment-topologies/), including {{site.konnect_short_name}}, self-hosted traditional, hybrid, and DB-less, and on Kubernetes via the [{{site.kic_product_name}}](/kubernetes-ingress-controller/). - - q: Why should I use {{site.ai_gateway}} instead of adding the LLM's API behind {{site.base_gateway}}? + - q: Why should I use {{site.ai_gateway}} {% new_in 2.0 %} instead of adding the LLM's API behind {{site.base_gateway}}? a: | If you just add an LLM's API behind {{site.base_gateway}}, you can only interact at the API level with internal traffic. - With AI plugins, {{site.base_gateway}} can understand the prompts that are being sent through the gateway. - The plugins can inspect the body and provide more specific AI capabilities to your traffic. + With {{site.ai_gateway}} AI Policies and runtime components, {{site.base_gateway}} can understand the prompts that are being sent through the gateway. + AI Policies can inspect the body and provide more specific AI capabilities to your traffic. diff --git a/app/_landing_pages/ai-gateway/a2a.yaml b/app/_landing_pages/ai-gateway/a2a.yaml index 5c165879fa..44b723fcae 100644 --- a/app/_landing_pages/ai-gateway/a2a.yaml +++ b/app/_landing_pages/ai-gateway/a2a.yaml @@ -27,7 +27,7 @@ rows: config: | The [Agent-to-Agent (A2A)](https://a2aproject.github.io/A2A/) protocol defines how AI agents communicate with each other over HTTP using JSON-RPC and REST bindings. As agent-to-agent communication moves into production, teams need visibility into A2A traffic and control over how it flows. - {{site.ai_gateway}} can act as a transparent proxy for A2A traffic. The [AI A2A Proxy](/plugins/ai-a2a-proxy/) plugin auto-detects A2A requests, extracts task metadata, rewrites agent card URLs, and feeds structured metrics into the Konnect analytics pipeline and [OpenTelemetry](/plugins/opentelemetry/) tracing. + {{site.ai_gateway}} acts as a control and observability layer for A2A traffic, enabling you to route agent-to-agent requests, extract task metadata, rewrite agent card URLs, and feed structured metrics into the Konnect analytics pipeline. In {{site.ai_gateway}} {% new_in 2.0 %}, you configure A2A traffic using [AI Agents](/ai-gateway/entities/ai-agent/) and attach [AI Policies](/ai-gateway/entities/ai-policy/) for authentication, access control, and observability. - blocks: - type: image @@ -41,55 +41,52 @@ rows: config: header: type: h2 - text: "Proxy A2A Traffic" + text: "Proxy A2A traffic via {{site.ai_gateway}} {% new_in 2.0 %}" blocks: - type: text text: | - The AI A2A Proxy plugin records A2A protocol metadata so you can analyze how agent-to-agent requests are processed. + Create [AI Agent](/ai-gateway/entities/ai-agent/) entities to proxy your A2A endpoints through {{site.ai_gateway}} to unlock observability into agent communication. + - type: card + config: + icon: /assets/icons/linked-services.svg + title: AI Agent entity + description: Proxy A2A traffic using the AI Agent in {{site.ai_gateway}}. + ctas: + - text: AI Agent reference + url: "/ai-gateway/entities/ai-agent/" + - text: AI Policy reference + url: "/ai-gateway/entities/ai-policy/" - blocks: - type: structured_text config: header: - type: h4 - text: "Secure A2A endpoints" + type: h2 + text: "Secure and govern A2A traffic" blocks: - type: text text: | - The AI A2A Proxy plugin handles A2A protocol concerns independently of authentication. Apply any {{site.base_gateway}} authentication plugin to the same service or route to secure your A2A endpoints. - - - columns: - - blocks: - - type: card - config: - icon: /assets/icons/ai.svg - title: Proxy and observe A2A traffic - description: | - Export A2A metrics and traces with the AI A2A Proxy plugin and OpenTelemetry. - ctas: - - text: AI A2A Proxy plugin overview - url: "/plugins/ai-a2a-proxy/" - - text: Proxy A2A agents through AI Gateway - url: "/how-to/proxy-a2a-agents/" - - blocks: + Secure access to your A2A agents by attaching [AI Policies](/ai-gateway/entities/ai-policy/) to your [AI Agent](/ai-gateway/entities/ai-agent/) entities for authentication and traffic control. - type: card config: icon: /assets/icons/lock.svg - title: Secure A2A endpoints - description: Apply authentication to A2A routes using standard gateway plugins. + title: Secure and govern with Policies + description: Secure A2A agents and control access with Policies. ctas: - - text: Secure A2A endpoints with OpenID Connect and Okta - url: "/how-to/secure-a2a-endpoints-with-oidc/" - - text: Secure A2A endpoints with Key Authentication - url: "/how-to/secure-a2a-endpoints/" + - text: OpenID Connect + url: "/plugins/openid-connect/" + - text: Rate Limiting + url: "/plugins/?category=traffic-control" + - text: Authentication plugins + url: "/plugins/?category=authentication" - header: type: h2 - text: "A2A traffic observability" + text: "Observe A2A traffic" description: | {{site.ai_gateway}} records A2A protocol traffic data so you can analyze how agent-to-agent requests are processed and resolved. - Audit logs capture task IDs, JSON-RPC method calls, payloads, latencies, and errors. - OpenTelemetry spans record task state, context IDs, TTFB, SSE event counts, and response sizes. - - Log plugins (File Log, HTTP Log, TCP Log, and others) consume the structured `ai.a2a` namespace emitted by the AI A2A Proxy plugin. + - Metrics track A2A-specific signals and performance indicators over time. column_count: 3 columns: - blocks: @@ -101,15 +98,6 @@ rows: cta: url: /ai-gateway/ai-audit-log-reference/#ai-a2a-proxy-logs align: end - - blocks: - - type: card - config: - title: Logging plugins - description: Send A2A traffic data to File Log, HTTP Log, TCP Log, and other destinations. - icon: /assets/icons/audit.svg - cta: - url: /plugins/?category=logging - align: end - blocks: - type: card config: @@ -138,38 +126,14 @@ rows: url: /observability/explorer/?tab=agentic-usage#metrics align: end - - header: - type: h2 - text: "Govern A2A traffic" - description: | - Use {{site.base_gateway}} plugins to control how A2A traffic flows through the gateway. - Rate limiting, traffic control, and request transformation plugins work with A2A routes the same way they work with any other proxied traffic. - column_count: 2 - columns: - - blocks: - - type: card - config: - title: Rate limit A2A traffic - description: Apply rate limiting to A2A routes using standard gateway plugins. - cta: - url: /how-to/rate-limit-a2a-traffic/ - align: end - - blocks: - - type: card - config: - title: Limit A2A request size - description: Use the Request Size Limiting plugin to restrict the size of A2A requests and responses - cta: - url: /how-to/limit-a2a-request-size/ - align: end - - header: - type: h2 - text: A2A how-to guides - columns: - - blocks: - - type: how_to_list - config: - tags: - - a2a - quantity: 5 - allow_empty: true \ No newline at end of file + # - header: + # type: h2 + # text: A2A how-to guides + # columns: + # - blocks: + # - type: how_to_list + # config: + # tags: + # - a2a + # quantity: 5 + # allow_empty: true diff --git a/app/_landing_pages/ai-gateway/mcp.yaml b/app/_landing_pages/ai-gateway/mcp.yaml new file mode 100644 index 0000000000..a3dc14e4b7 --- /dev/null +++ b/app/_landing_pages/ai-gateway/mcp.yaml @@ -0,0 +1,148 @@ +metadata: + title: "MCP Traffic Gateway" + content_type: landing_page + description: This page is an introduction to MCP Traffic Gateway capabilities in {{site.ai_gateway}}. + products: + - ai-gateway + - gateway + works_on: + - on-prem + - konnect + breadcrumbs: + - /ai-gateway/ + tags: + - ai + - mcp + + +rows: + - header: + type: h1 + text: "A trust and control layer for proxying traffic to MCP servers" + sub_text: Gain control and visibility over AI agent infrastructure with {{site.ai_gateway}}-driven MCP capabilities + + - header: + type: h2 + text: Bring MCP servers to production securely with {{site.ai_gateway}} + columns: + - blocks: + - type: text + config: | + AI agents are rapidly becoming core components of modern software, driving the need for structured, reliable interfaces to access tools and data. The Model Context Protocol (MCP) addresses this by enabling agents to reason, plan, and act across services. However, scaling MCP in remote, distributed environments introduces new operational challenges. + + {{site.ai_gateway}} enables teams to manage remote MCP traffic with enterprise-grade security, performance, authentication, context propagation, load balancing, and observability. In {{site.ai_gateway}} {% new_in 2.0 %}, you configure MCP traffic using [AI MCP Servers](/ai-gateway/entities/ai-mcp-server/) and attach [AI Policies](/ai-gateway/entities/ai-policy/) for authentication, access control, and observability. + + Learn how to: + - [Create MCP servers from API specs](#create-mcp-servers-from-api-specs-new-in-2-0) + - [Secure and govern MCP servers](#secure-and-govern-mcp-servers) + - [Observe MCP traffic](#observe-mcp-traffic) + - blocks: + - type: image + config: + url: /assets/images/gateway/mcp-architecture.svg + alt_text: Overview of AI gateway + + - columns: + - blocks: + - type: structured_text + config: + header: + type: h2 + text: "Generate MCP servers from API specs {% new_in 2.0 %}" + blocks: + - type: text + text: | + Turn any API into an MCP server using the [AI MCP Server](/ai-gateway/entities/ai-mcp-server/) entity. This approach does not require an LLM and provides full control over production workloads. + + The AI MCP Server entity: + - Converts API schemas into MCP-compatible tool definitions + - Aggregates multiple APIs into a single MCP server endpoint + - Supports serverless deployments for dynamic tool generation + - Integrates with AI assistants like Claude Desktop and other MCP clients + - type: card + config: + icon: /assets/icons/linked-services.svg + title: AI MCP Server entity + description: Generate an AI MCP Server from an API spec to expose tools and services over MCP in {{site.ai_gateway}}. + ctas: + - text: AI MCP Server reference + url: "/ai-gateway/entities/ai-mcp-server/" + - text: AI Policy reference + url: "/ai-gateway/entities/ai-policy/" + - blocks: + - type: structured_text + config: + header: + type: h2 + text: "Secure and govern MCP servers" + blocks: + - type: text + text: | + Attach [AI Policies](/ai-gateway/entities/ai-policy/) to your [AI MCP Server](/ai-gateway/entities/ai-mcp-server/) entities to apply security, governance, and observability controls across your MCP infrastructure. + + Use AI Policies and Kong Gateway plugins to: + - Secure access with the MCP OAuth2 policy or other authentication methods + - Monitor MCP traffic using AI metrics and AI audit logs + - Enforce access controls for MCP tool usage + - Govern usage with rate limiting and traffic control plugins + - type: card + config: + icon: /assets/icons/lock.svg + title: Security and governance with Policies + description: Secure MCP servers and govern traffic with AI Policies. + ctas: + - text: MCP OAuth2 policy + url: "/ai-gateway/entities/ai-policy/" + - text: Rate Limiting + url: "/plugins/rate-limiting/" + - text: Observability + url: "/ai-gateway/ai-audit-log-reference/#ai-mcp-logs" + + - header: + type: h2 + text: "Observe MCP traffic" + columns: + - blocks: + - type: structured_text + config: + blocks: + - type: text + text: | + {{site.ai_gateway}} records detailed Model Context Protocol (MCP) traffic data so you can analyze how requests are processed and resolved. + - Logs capture session IDs, JSON-RPC method calls, payloads, latencies, and errors. + - Metrics track latency, response sizes, and error counts over time, giving you a complete view of MCP server performance and behavior. + - columns: + - blocks: + - type: card + config: + title: MCP traffic audit log + description: Learn about {{site.ai_gateway}} logging capabilities for MCP traffic. + cta: + url: /ai-gateway/ai-audit-log-reference/#ai-mcp-logs + align: end + - blocks: + - type: card + config: + title: MCP traffic metrics + description: Expose and visualize LLM metrics for MCP traffic. + cta: + url: /ai-gateway/monitor-ai-llm-metrics/#mcp-traffic-metrics + align: end + + - header: + type: h2 + text: "MCP Registry (tech preview)" + columns: + - blocks: + - type: structured_text + config: + blocks: + - type: text + text: | + You can catalog your MCP servers in {{site.konnect_short_name}} {{site.konnect_catalog}}. + This provides an internal catalog in {{site.konnect_short_name}} of your MCP servers. + - type: button + config: + text: "Enable MCP Registry in {{site.konnect_short_name}} Labs" + url: /catalog/mcp-registry/ + diff --git a/app/_redirects b/app/_redirects index 0ce045e25b..e4aba520dd 100644 --- a/app/_redirects +++ b/app/_redirects @@ -367,3 +367,7 @@ # Spec renames /api/konnect/api-builder/v3/ /api/konnect/api-catalog/v3/ 301 /api/konnect/api-builder/ /api/konnect/api-catalog/ 301 + +# MCP landing page + +/mcp/ /ai-gateway/mcp/ \ No newline at end of file diff --git a/app/ai-gateway/load-balancing.md b/app/ai-gateway/load-balancing.md index a0ea831146..69642d2f2a 100644 --- a/app/ai-gateway/load-balancing.md +++ b/app/ai-gateway/load-balancing.md @@ -33,13 +33,13 @@ related_resources: {{site.ai_gateway}} provides load balancing capabilities to distribute requests across multiple LLM models. You can use these features to improve fault tolerance, optimize resource utilization, and balance traffic across your AI systems. -In {{site.ai_gateway}} 2.0.0 and later, load balancing is configured on the [Model entity](/ai-gateway/entities/ai-model/) through `config.balancer` and `target_models`. +In {{site.ai_gateway}} {% new_in 2.0 %}, load balancing is configured on the [Model entity](/ai-gateway/entities/ai-model/) through `config.balancer` and `target_models`. diff --git a/app/assets/icons/a2a-quickstart.svg b/app/assets/icons/a2a-quickstart.svg new file mode 100644 index 0000000000..3cef1cd005 --- /dev/null +++ b/app/assets/icons/a2a-quickstart.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/app/assets/icons/entity.svg b/app/assets/icons/entity.svg new file mode 100644 index 0000000000..a6f51d0d0b --- /dev/null +++ b/app/assets/icons/entity.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/app/assets/icons/llm-quickstart.svg b/app/assets/icons/llm-quickstart.svg new file mode 100644 index 0000000000..26f1b38f79 --- /dev/null +++ b/app/assets/icons/llm-quickstart.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/app/assets/icons/mcp-quickstart.svg b/app/assets/icons/mcp-quickstart.svg new file mode 100644 index 0000000000..bf1fb75e20 --- /dev/null +++ b/app/assets/icons/mcp-quickstart.svg @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/app/assets/icons/model.svg b/app/assets/icons/model.svg new file mode 100644 index 0000000000..7761caf3dc --- /dev/null +++ b/app/assets/icons/model.svg @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/app/assets/icons/provider.svg b/app/assets/icons/provider.svg new file mode 100644 index 0000000000..1896f114b3 --- /dev/null +++ b/app/assets/icons/provider.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/app/assets/images/ai-gateway/a2a.svg b/app/assets/images/ai-gateway/a2a.svg index 53816940cd..bbfa04e351 100644 --- a/app/assets/images/ai-gateway/a2a.svg +++ b/app/assets/images/ai-gateway/a2a.svg @@ -1,136 +1,85 @@ - - - - - - - - - - -Kong AI Gateway - - - -AI A2A Proxy - - -Protocol detection - - -URL rewriting - - -OTel tracing - - -SSE streaming - - -Task extraction - - -Analytics pipeline - - - - -JSON-RPC - - -REST - - -SSE - - -A2A CLIENTS - - -A2A client -Orchestration agent - - -A2A client -Task manager - - -A2A client -Monitoring dashboard - - -UPSTREAM AGENTS - - -Research agent -Search, summarize, cite - - -Code agent -Generate, review, deploy - - -Data agent -Query, transform, report - - - - - - - - - - - - - - -Task tracking - - - - -OTel spans - - - - -Konnect analytics - - - - -Agent-to-Agent protocol - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + +Agent-to-Agent protocol + +A2A CLIENTS + + + +A2A client +Orchestration agent + + + +A2A client +Task manager + + + +A2A client +Monitoring dashboard +UPSTREAM AGENTS + + + +Research agent +Search, summarize, cite + + + +Code agent +Generate, review, deploy + + + +Data agent +Query, transform, report + + + +Kong AI Gateway + + +AI A2A Proxy + + +Protocol detection + + +URL rewriting + + +OTel tracing + + +SSE streaming + + +Task extraction + + +Analytics pipeline + + +JSON-RPC + +REST + +SSE + + + + + +Task tracking + + + +OTel spans + + + +Konnect analytics \ No newline at end of file diff --git a/app/assets/images/gateway/ai-gateway-overview.svg b/app/assets/images/gateway/ai-gateway-overview.svg index 485b3f7f07..6f6832f4bb 100644 --- a/app/assets/images/gateway/ai-gateway-overview.svg +++ b/app/assets/images/gateway/ai-gateway-overview.svg @@ -1,742 +1,239 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + +KONG AI GATEWAY +One gateway for LLM, MCP & A2A traffic. +APPS & AGENTS +AI GATEWAY +DESTINATIONS + + + + + +Web apps + + +Mobile + + +AI agents + + +IDE & copilots + + +Services + + +Workflows + + +KONG +AI Gateway +All Kong Policies + + + +AI Governance + + + +AI Observability + + + +AI Credentials + + + +AI Traffic Control + + + +AI Load Balancing + + + +AI Retries + + + +Universal API + + + +AI Prompt Guard + + + +AI Flow & Transformations + + + +AI Semantic Caching + + + +AI Semantic Prompt Guard + + + +AI Prompt Template + + + +AI Prompt Decorator + + + +AI Azure Content Safety + + + +AI Rate Limiting Advanced + + +LLM Providers +LLM · A2A + + +MCP Servers +MCP + + + +OpenAI + + + + + + + + + + + + +Anthropic + + + + + + + + + + + + + + + + + + + + + + + +Azure + + + +Bedrock + + + +Vertex + + + +GCP + + + +Mistral + + + +DeepSeek + + + +Qwen + + + + +DashScope + + + + +Alibaba Cloud + + + + +Cerebras + + + +Databricks + + + +Ollama + + + +vLLM + + + +GitHub + + + +Slack + + + + + + + + +Drive + + + + +Notion + + + +Jira + + + + Playwright Streamline Icon: https://streamlinehq.com + + + + + + + + +Playwright + +TRAFFIC TYPESLLMModel inference & completionsMCPTool & data context callsA2AAgent-to-agent exchange + \ No newline at end of file diff --git a/app/assets/images/gateway/mcp-architecture.svg b/app/assets/images/gateway/mcp-architecture.svg index 9d78ba5e38..33da01dd11 100644 --- a/app/assets/images/gateway/mcp-architecture.svg +++ b/app/assets/images/gateway/mcp-architecture.svg @@ -1,249 +1,103 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + +Context / +tool calls + +Prompts / +tool calls + +Tool call execution + + + +MCP Client + + + +Upstream APIs + + + +MCP Server + +LLM + + + +OpenAI + + + + + + + + + + + + +Anthropic + + + +Mistral + + + + + + + + + + + + + + + + + + + + + + + +Azure + + + +Bedrock + + + +Vertex + + + +GCP + + + +DeepSeek + + + +Ollama + + + + +Proxy + + +Proxy + +Kong AI Gateway + \ No newline at end of file diff --git a/app/assets/images/gateway/universal-api.svg b/app/assets/images/gateway/universal-api.svg index ea80bb9a73..a52ec4ed1c 100644 --- a/app/assets/images/gateway/universal-api.svg +++ b/app/assets/images/gateway/universal-api.svg @@ -1,168 +1,80 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + +Universal API + + + +AI PROVIDERS, MODELS & TOOLS + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Kong AI Gateway +One API for every model, tool & agent + \ No newline at end of file