diff --git a/cmd/thv/app/audit_trifecta.go b/cmd/thv/app/audit_trifecta.go
new file mode 100644
index 0000000000..0f87c76389
--- /dev/null
+++ b/cmd/thv/app/audit_trifecta.go
@@ -0,0 +1,385 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package app
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"slices"
+	"strings"
+	"text/tabwriter"
+
+	"github.com/spf13/cobra"
+
+	"github.com/stacklok/toolhive/pkg/config"
+	llmclient "github.com/stacklok/toolhive/pkg/llm/client"
+	"github.com/stacklok/toolhive/pkg/toxicflow"
+)
+
+// auditTrifectaCmd is hidden while the heuristics are being calibrated. It
+// audits a group (a set of MCP servers sharing one agent context) for
+// "lethal trifecta" risk: the co-location of private-data access, untrusted-
+// content exposure, and an exfiltration path.
+var auditTrifectaCmd = &cobra.Command{
+	Use:    "audit-trifecta [group]",
+	Short:  "Audit a group of MCP servers for lethal-trifecta risk (experimental)",
+	Hidden: true,
+	Long: `Audit a ToolHive group for "lethal trifecta" risk.
+
+The lethal trifecta is the co-location, within a single agent context, of:
+  - access to private data,
+  - exposure to untrusted content (a prompt-injection vector), and
+  - the ability to exfiltrate data externally.
+
+Because every server in a group shares the model's context, a toxic flow exists
+whenever the group contains all three. This command classifies each server from
+its permission profile and registry metadata and reports the group verdict.
+
+ToolHive can assess data access and egress with reasonable confidence (they come
+from the permission profile) but untrusted-content exposure poorly; expect an
+"indeterminate" verdict until you classify or override the relevant servers.
+
+Examples:
+  # Audit one group
+  thv audit-trifecta research
+
+  # Audit every group as JSON
+  thv audit-trifecta --all --format json
+
+  # Apply overrides for mis-classified servers
+  thv audit-trifecta research --overrides ./trifecta-overrides.json`,
+	Args:              cobra.MaximumNArgs(1),
+	RunE:              auditTrifectaCmdFunc,
+	ValidArgsFunction: completeTrifectaGroup,
+}
+
+// completeTrifectaGroup completes the group argument with known group names.
+func completeTrifectaGroup(cmd *cobra.Command, args []string, _ string) ([]string, cobra.ShellCompDirective) {
+	if len(args) != 0 {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	names, err := toxicflow.ListGroupNames(cmd.Context())
+	if err != nil {
+		return nil, cobra.ShellCompDirectiveNoFileComp
+	}
+	return names, cobra.ShellCompDirectiveNoFileComp
+}
+
+var (
+	auditTrifectaAll       bool
+	auditTrifectaFormat    string
+	auditTrifectaExplain   bool
+	auditTrifectaOverrides string
+	auditTrifectaLive      bool
+	auditTrifectaLLMModel  string
+	auditTrifectaLLMURL    string
+	auditTrifectaLLMKey    string
+	auditTrifectaLLMProxy  bool
+)
+
+// trifectaLLMKeyEnv is the fallback source for the LLM API key so it need not
+// be passed on the command line.
+const trifectaLLMKeyEnv = "THV_AUDIT_LLM_API_KEY"
+
+func init() {
+	AddFormatFlag(auditTrifectaCmd, &auditTrifectaFormat, FormatJSON, FormatText)
+	auditTrifectaCmd.Flags().BoolVar(&auditTrifectaAll, "all", false, "Audit every group")
+	auditTrifectaCmd.Flags().BoolVar(&auditTrifectaExplain, "explain", false, "Show the evidence behind each finding")
+	auditTrifectaCmd.Flags().StringVar(&auditTrifectaOverrides, "overrides", "",
+		"Path to a JSON file of role overrides for mis-classified servers")
+	auditTrifectaCmd.Flags().BoolVar(&auditTrifectaLive, "live", false,
+		"Probe running servers for live tool annotations (openWorldHint)")
+	auditTrifectaCmd.Flags().StringVar(&auditTrifectaLLMModel, "llm-model", "",
+		"LLM model for untrusted-content inference (falls back to keyword search if unset)")
+	auditTrifectaCmd.Flags().StringVar(&auditTrifectaLLMURL, "llm-base-url", "",
+		"OpenAI-compatible base URL for the LLM (e.g. https://api.openai.com/v1)")
+	auditTrifectaCmd.Flags().StringVar(&auditTrifectaLLMKey, "llm-api-key", "",
+		"API key for the LLM (or set "+trifectaLLMKeyEnv+")")
+	auditTrifectaCmd.Flags().BoolVar(&auditTrifectaLLMProxy, "llm-proxy", false,
+		"Use the running `thv llm` proxy for inference (handles auth; needs --llm-model)")
+	auditTrifectaCmd.PreRunE = ValidateFormat(&auditTrifectaFormat, FormatJSON, FormatText)
+}
+
+// buildTrifectaInference selects the inference strategy: the `thv llm` proxy
+// when --llm-proxy is set, else an LLM when a model and base URL are supplied
+// (the API key is optional), else the offline keyword search. A partial LLM
+// config falls back to keywords with a warning so a missing setting never
+// silently disables inference the user asked for.
+func buildTrifectaInference() (toxicflow.SourceInference, error) {
+	// Prefer the running `thv llm` proxy when asked: it injects auth and
+	// forwards to the configured gateway, so no key or base URL is needed here.
+	if auditTrifectaLLMProxy {
+		return llmProxyInference()
+	}
+
+	key := auditTrifectaLLMKey
+	if key == "" {
+		key = os.Getenv(trifectaLLMKeyEnv)
+	}
+	model, baseURL := auditTrifectaLLMModel, auditTrifectaLLMURL
+
+	// An explicit LLM needs a model and base URL; the key is optional (for
+	// keyless local models). With nothing set, fall back to keyword search.
+	switch {
+	case model == "" && baseURL == "" && key == "":
+		return toxicflow.NewKeywordInference(), nil
+	case model == "" || baseURL == "":
+		fmt.Fprintln(os.Stderr,
+			"warning: incomplete LLM config (need --llm-model and --llm-base-url, or --llm-proxy); using keyword search")
+		return toxicflow.NewKeywordInference(), nil
+	}
+
+	c, err := llmclient.New(llmclient.Config{Model: model, BaseURL: baseURL, APIKey: key})
+	if err != nil {
+		return nil, fmt.Errorf("configure LLM: %w", err)
+	}
+	return toxicflow.NewLLMInference(c), nil
+}
+
+// llmProxyInference builds an inference backend that talks to the local
+// `thv llm` reverse proxy. The proxy must be running (`thv llm proxy`); it
+// injects the OIDC token and forwards to the configured gateway.
+func llmProxyInference() (toxicflow.SourceInference, error) {
+	if auditTrifectaLLMModel == "" {
+		return nil, fmt.Errorf("--llm-model is required with --llm-proxy")
+	}
+	if auditTrifectaLLMURL != "" || auditTrifectaLLMKey != "" {
+		fmt.Fprintln(os.Stderr, "warning: --llm-base-url/--llm-api-key are ignored with --llm-proxy")
+	}
+
+	llmCfg := config.NewDefaultProvider().GetConfig().LLM
+	if !llmCfg.IsConfigured() {
+		return nil, fmt.Errorf("no thv llm gateway configured: run \"thv llm config set\" and start the proxy with \"thv llm proxy\"")
+	}
+
+	baseURL := llmCfg.ProxyBaseURL()
+	fmt.Fprintf(os.Stderr, "using thv llm proxy at %s (model %s)\n", baseURL, auditTrifectaLLMModel)
+
+	c, err := llmclient.New(llmclient.Config{Model: auditTrifectaLLMModel, BaseURL: baseURL})
+	if err != nil {
+		return nil, fmt.Errorf("configure LLM proxy: %w", err)
+	}
+	return toxicflow.NewLLMInference(c), nil
+}
+
+func auditTrifectaCmdFunc(cmd *cobra.Command, args []string) error {
+	ctx := cmd.Context()
+
+	if !auditTrifectaAll && len(args) == 0 {
+		return fmt.Errorf("specify a group name or use --all")
+	}
+
+	overrides, err := loadTrifectaOverrides(auditTrifectaOverrides)
+	if err != nil {
+		return err
+	}
+
+	inference, err := buildTrifectaInference()
+	if err != nil {
+		return err
+	}
+
+	collector, err := toxicflow.NewCollector(ctx, inference, auditTrifectaLive)
+	if err != nil {
+		return fmt.Errorf("failed to set up audit: %w", err)
+	}
+
+	// Resolve the groups to audit. Validate a named group exists so a typo is
+	// reported as "not found" rather than silently auditing zero servers and
+	// reading as a reassuring "no toxic flow".
+	known, err := toxicflow.ListGroupNames(ctx)
+	if err != nil {
+		return err
+	}
+	groupNames := known
+	if !auditTrifectaAll {
+		if !slices.Contains(known, args[0]) {
+			return fmt.Errorf("group %q not found (run \"thv group list\")", args[0])
+		}
+		groupNames = []string{args[0]}
+	}
+
+	results := make([]toxicflow.GroupAssessment, 0, len(groupNames))
+	for _, g := range groupNames {
+		assessment, err := collector.AssessGroup(ctx, g, overrides)
+		if err != nil {
+			return fmt.Errorf("failed to audit group %q: %w", g, err)
+		}
+		results = append(results, assessment)
+	}
+
+	warnUnknownOverrideTargets(overrides, results)
+
+	if auditTrifectaFormat == FormatJSON {
+		return printTrifectaJSON(results)
+	}
+	printTrifectaText(results)
+	return nil
+}
+
+// loadTrifectaOverrides reads a JSON array of overrides from path, or returns
+// nil when no path is given.
+func loadTrifectaOverrides(path string) ([]toxicflow.Override, error) {
+	if path == "" {
+		return nil, nil
+	}
+	data, err := os.ReadFile(path) // #nosec G304 -- path is an operator-supplied CLI flag
+	if err != nil {
+		return nil, fmt.Errorf("failed to read overrides file: %w", err)
+	}
+	var overrides []toxicflow.Override
+	if err := json.Unmarshal(data, &overrides); err != nil {
+		return nil, fmt.Errorf("failed to parse overrides file: %w", err)
+	}
+	for _, o := range overrides {
+		if err := toxicflow.ValidateOverride(o); err != nil {
+			return nil, err
+		}
+	}
+	return overrides, nil
+}
+
+// warnUnknownOverrideTargets prints a warning for any override that names a
+// server not present in the audited groups, so a typo does not silently
+// fail open.
+func warnUnknownOverrideTargets(overrides []toxicflow.Override, results []toxicflow.GroupAssessment) {
+	seen := map[string]bool{}
+	for _, a := range results {
+		for _, s := range a.Servers {
+			seen[s.Name] = true
+		}
+	}
+	for _, o := range overrides {
+		if o.Server != "" && !seen[o.Server] {
+			fmt.Fprintf(os.Stderr, "warning: override targets server %q, which is not in the audited group(s)\n", o.Server)
+		}
+	}
+}
+
+func printTrifectaJSON(results []toxicflow.GroupAssessment) error {
+	if results == nil {
+		results = []toxicflow.GroupAssessment{}
+	}
+	enc := json.NewEncoder(os.Stdout)
+	enc.SetIndent("", "  ")
+	return enc.Encode(results)
+}
+
+func printTrifectaText(results []toxicflow.GroupAssessment) {
+	if len(results) == 0 {
+		fmt.Println("No groups to audit.")
+		return
+	}
+	for i, a := range results {
+		if i > 0 {
+			fmt.Println()
+		}
+		printGroupAssessment(a)
+	}
+}
+
+func printGroupAssessment(a toxicflow.GroupAssessment) {
+	fmt.Printf("Group: %s   verdict: %s\n", a.Group, strings.ToUpper(string(a.Verdict)))
+
+	if len(a.Servers) == 0 {
+		fmt.Println("  (no servers in group)")
+		return
+	}
+
+	overridden := false
+	w := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
+	_, _ = fmt.Fprintln(w, "  SERVER\tDATA\tSOURCE\tSINK")
+	for _, s := range a.Servers {
+		_, _ = fmt.Fprintf(w, "  %s\t%s\t%s\t%s\n",
+			s.Name,
+			cell(s.Finding(toxicflow.RoleData)),
+			cell(s.Finding(toxicflow.RoleSource)),
+			cell(s.Finding(toxicflow.RoleSink)))
+		overridden = overridden || anyOverridden(s)
+	}
+	_ = w.Flush()
+
+	if overridden {
+		fmt.Println("  * value set by override — re-run with --explain for the reason")
+	}
+
+	if auditTrifectaExplain {
+		printTrifectaEvidence(a)
+	}
+
+	fmt.Printf("  Private data    : %s\n", joinOrNone(a.DataHolders))
+	fmt.Printf("  Untrusted intake: %s\n", joinOrNone(a.Sources))
+	fmt.Printf("  Exfil sink      : %s\n", joinOrNone(a.Sinks))
+	if len(a.Unclassified) > 0 {
+		fmt.Printf("  Unclassified    : %s (untrusted-content exposure unknown)\n",
+			strings.Join(a.Unclassified, ", "))
+	}
+
+	fmt.Printf("\n  %s\n", trifectaVerdictMessage(a))
+}
+
+func printTrifectaEvidence(a toxicflow.GroupAssessment) {
+	fmt.Println("  evidence:")
+	for _, s := range a.Servers {
+		for _, role := range toxicflow.AllRoles {
+			f := s.Finding(role)
+			for _, ev := range f.Evidence {
+				fmt.Printf("    %s [%s]: %s\n", s.Name, role, ev)
+			}
+		}
+	}
+}
+
+// trifectaVerdictMessage returns an actionable one-liner for the verdict.
+func trifectaVerdictMessage(a toxicflow.GroupAssessment) string {
+	switch a.Verdict {
+	case toxicflow.VerdictPresent:
+		if len(a.SelfContainedFlow) > 0 {
+			return fmt.Sprintf("WARNING: Toxic flow present. %s hold(s) all three roles alone — "+
+				"a prompt injection there could read private data and exfiltrate it. "+
+				"Cheapest fix: tighten that server's permission profile (restrict egress, "+
+				"drop mounts), no regrouping needed.", strings.Join(a.SelfContainedFlow, ", "))
+		}
+		return "WARNING: Toxic flow present. A prompt injection via an untrusted-content " +
+			"server could read private data and exfiltrate it. Split the group so the " +
+			"untrusted-content server no longer shares a context with the data/exfil servers."
+	case toxicflow.VerdictPossible:
+		return "WARNING: Possible toxic flow. All three roles are present, some at low " +
+			"confidence. Review the contributing servers above."
+	case toxicflow.VerdictIndeterminate:
+		return "REVIEW: Indeterminate. Data and exfil paths exist, but untrusted-content " +
+			"exposure could not be determined. Classify or override the unclassified " +
+			"servers to resolve."
+	case toxicflow.VerdictNone:
+		return "OK: No toxic flow. At least one leg is confidently absent."
+	default:
+		return ""
+	}
+}
+
+// cell renders a finding's confidence for the table, marking overrides with a *.
+func cell(f toxicflow.RoleFinding) string {
+	if f.Overridden {
+		return string(f.Confidence) + "*"
+	}
+	return string(f.Confidence)
+}
+
+// anyOverridden reports whether any of the server's findings was overridden.
+func anyOverridden(s toxicflow.ServerAssessment) bool {
+	for _, role := range toxicflow.AllRoles {
+		if s.Finding(role).Overridden {
+			return true
+		}
+	}
+	return false
+}
+
+func joinOrNone(items []string) string {
+	if len(items) == 0 {
+		return "(none)"
+	}
+	return strings.Join(items, ", ")
+}
diff --git a/cmd/thv/app/commands.go b/cmd/thv/app/commands.go
index c575c00de0..03e1857f49 100644
--- a/cmd/thv/app/commands.go
+++ b/cmd/thv/app/commands.go
@@ -76,6 +76,7 @@ func NewRootCmd(enableUpdates bool) *cobra.Command {
 	rootCmd.AddCommand(skillCmd)
 	rootCmd.AddCommand(statusCmd)
 	rootCmd.AddCommand(tuiCmd)
+	rootCmd.AddCommand(auditTrifectaCmd)
 
 	// Silence printing the usage on error
 	rootCmd.SilenceUsage = true
diff --git a/pkg/llm/client/client.go b/pkg/llm/client/client.go
new file mode 100644
index 0000000000..ea6d47a2cf
--- /dev/null
+++ b/pkg/llm/client/client.go
@@ -0,0 +1,129 @@
+// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+// Package client is a minimal, dependency-free client for OpenAI-compatible
+// chat-completion endpoints. It targets the widely-implemented
+// POST /chat/completions shape, so it works with OpenAI, most LLM gateways,
+// and local servers (Ollama, vLLM, llama.cpp) by varying the base URL — which
+// also lets callers keep traffic on-host when sending data to a model is a
+// concern.
+package client
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+)
+
+// Config configures a Client. Model and BaseURL are required; APIKey is
+// optional so the client can target an auth-injecting proxy (e.g. the
+// `thv llm` localhost proxy) or a keyless local model.
+type Config struct {
+	// Model is the model identifier (e.g. "gpt-4o-mini").
+	Model string
+	// BaseURL is the API root (e.g. "https://api.openai.com/v1"); the client
+	// appends "/chat/completions".
+	BaseURL string
+	// APIKey, when non-empty, is sent as a Bearer token. Leave empty when the
+	// endpoint injects auth itself (a proxy) or needs none (a local model).
+	APIKey string
+	// HTTPClient is optional; a 30s-timeout client is used when nil.
+	HTTPClient *http.Client
+}
+
+// Client calls an OpenAI-compatible chat-completions endpoint.
+type Client struct {
+	model   string
+	baseURL string
+	apiKey  string
+	http    *http.Client
+}
+
+// New validates the config and returns a Client.
+func New(cfg Config) (*Client, error) {
+	if cfg.Model == "" {
+		return nil, errors.New("llm client: model is required")
+	}
+	if cfg.BaseURL == "" {
+		return nil, errors.New("llm client: base URL is required")
+	}
+	hc := cfg.HTTPClient
+	if hc == nil {
+		hc = &http.Client{Timeout: 30 * time.Second}
+	}
+	return &Client{
+		model:   cfg.Model,
+		baseURL: strings.TrimRight(cfg.BaseURL, "/"),
+		apiKey:  cfg.APIKey,
+		http:    hc,
+	}, nil
+}
+
+type chatMessage struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+type chatRequest struct {
+	Model       string        `json:"model"`
+	Messages    []chatMessage `json:"messages"`
+	Temperature float64       `json:"temperature"`
+}
+
+type chatResponse struct {
+	Choices []struct {
+		Message chatMessage `json:"message"`
+	} `json:"choices"`
+}
+
+// Complete sends a single-turn (system + user) chat completion and returns the
+// assistant message content. Temperature is fixed at 0 for reproducibility.
+func (c *Client) Complete(ctx context.Context, system, user string) (string, error) {
+	payload, err := json.Marshal(chatRequest{
+		Model: c.model,
+		Messages: []chatMessage{
+			{Role: "system", Content: system},
+			{Role: "user", Content: user},
+		},
+		Temperature: 0,
+	})
+	if err != nil {
+		return "", fmt.Errorf("marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/chat/completions", bytes.NewReader(payload))
+	if err != nil {
+		return "", fmt.Errorf("build request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	if c.apiKey != "" {
+		req.Header.Set("Authorization", "Bearer "+c.apiKey)
+	}
+
+	resp, err := c.http.Do(req)
+	if err != nil {
+		return "", fmt.Errorf("call llm: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
+		_, _ = io.Copy(io.Discard, resp.Body) // drain remainder so the connection can be reused
+		return "", fmt.Errorf("llm returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
+	}
+
+	var parsed chatResponse
+	if err := json.NewDecoder(resp.Body).Decode(&parsed); err != nil {
+		return "", fmt.Errorf("decode response: %w", err)
+	}
+	if len(parsed.Choices) == 0 {
+		return "", errors.New("llm returned no choices")
+	}
+	return parsed.Choices[0].Message.Content, nil
+}
diff --git a/pkg/llm/client/client_test.go b/pkg/llm/client/client_test.go
new file mode 100644
index 0000000000..33759ca5cd
--- /dev/null
+++ b/pkg/llm/client/client_test.go
@@ -0,0 +1,101 @@
+// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package client
+
+import (
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestNewValidation(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name    string
+		cfg     Config
+		wantErr bool
+	}{
+		{"valid", Config{Model: "m", BaseURL: "https://x/v1", APIKey: "k"}, false},
+		{"key is optional (proxy/local model)", Config{Model: "m", BaseURL: "https://x/v1"}, false},
+		{"missing model", Config{BaseURL: "https://x/v1", APIKey: "k"}, true},
+		{"missing base url", Config{Model: "m", APIKey: "k"}, true},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			_, err := New(tt.cfg)
+			if tt.wantErr {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+		})
+	}
+}
+
+func TestComplete(t *testing.T) {
+	t.Parallel()
+
+	var gotPath, gotAuth, gotModel string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotPath = r.URL.Path
+		gotAuth = r.Header.Get("Authorization")
+		body, _ := io.ReadAll(r.Body)
+		var req map[string]any
+		_ = json.Unmarshal(body, &req)
+		gotModel, _ = req["model"].(string)
+		_, _ = io.WriteString(w, `{"choices":[{"message":{"role":"assistant","content":"hello world"}}]}`)
+	}))
+	t.Cleanup(srv.Close)
+
+	c, err := New(Config{Model: "gpt-test", BaseURL: srv.URL, APIKey: "secret", HTTPClient: srv.Client()})
+	require.NoError(t, err)
+
+	out, err := c.Complete(t.Context(), "system", "user")
+	require.NoError(t, err)
+	assert.Equal(t, "hello world", out)
+	assert.Equal(t, "/chat/completions", gotPath)
+	assert.Equal(t, "Bearer secret", gotAuth)
+	assert.Equal(t, "gpt-test", gotModel)
+}
+
+func TestCompleteNoKeyOmitsAuthHeader(t *testing.T) {
+	t.Parallel()
+
+	var hadAuth bool
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_, hadAuth = r.Header["Authorization"]
+		_, _ = io.WriteString(w, `{"choices":[{"message":{"content":"ok"}}]}`)
+	}))
+	t.Cleanup(srv.Close)
+
+	c, err := New(Config{Model: "m", BaseURL: srv.URL, HTTPClient: srv.Client()})
+	require.NoError(t, err)
+
+	_, err = c.Complete(t.Context(), "s", "u")
+	require.NoError(t, err)
+	assert.False(t, hadAuth, "no Authorization header should be sent without an API key")
+}
+
+func TestCompleteErrorStatus(t *testing.T) {
+	t.Parallel()
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+		_, _ = io.WriteString(w, "boom")
+	}))
+	t.Cleanup(srv.Close)
+
+	c, err := New(Config{Model: "m", BaseURL: srv.URL, APIKey: "k", HTTPClient: srv.Client()})
+	require.NoError(t, err)
+
+	_, err = c.Complete(t.Context(), "s", "u")
+	require.Error(t, err)
+}
diff --git a/pkg/llm/config.go b/pkg/llm/config.go
index 9c2a0fb810..2f9f03af06 100644
--- a/pkg/llm/config.go
+++ b/pkg/llm/config.go
@@ -119,3 +119,10 @@ func (c *Config) EffectiveProxyPort() int {
 	}
 	return DefaultProxyListenPort
 }
+
+// ProxyBaseURL returns the OpenAI-compatible base URL of the localhost proxy
+// (e.g. "http://localhost:14000/v1"). Callers append the API path. This is the
+// single source of truth for the proxy's loopback base URL.
+func (c *Config) ProxyBaseURL() string {
+	return fmt.Sprintf("http://localhost:%d/v1", c.EffectiveProxyPort())
+}
diff --git a/pkg/llm/setup.go b/pkg/llm/setup.go
index aa2d9db018..b1aa9a2100 100644
--- a/pkg/llm/setup.go
+++ b/pkg/llm/setup.go
@@ -82,7 +82,7 @@ func Setup(
 		return err
 	}
 
-	proxyBaseURL := fmt.Sprintf("http://localhost:%d/v1", llmCfg.EffectiveProxyPort())
+	proxyBaseURL := llmCfg.ProxyBaseURL()
 
 	// Detect tools before login so we skip the interactive browser flow when
 	// there is nothing to configure. Login still runs before any files are
diff --git a/pkg/toxicflow/analyze.go b/pkg/toxicflow/analyze.go
new file mode 100644
index 0000000000..298ba1d37f
--- /dev/null
+++ b/pkg/toxicflow/analyze.go
@@ -0,0 +1,88 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package toxicflow
+
+// AnalyzeGroup folds per-server assessments into a group verdict. Because every
+// server in a group shares the one agent context, a toxic flow exists whenever
+// the group contains all three roles; the verdict reflects the weakest of the
+// three (the flow is only as strong as its least-confident leg). It is a pure
+// function.
+func AnalyzeGroup(group string, assessments []ServerAssessment) GroupAssessment {
+	ga := GroupAssessment{Group: group, Servers: assessments}
+
+	best := map[Role]Confidence{
+		RoleData:   ConfNone,
+		RoleSource: ConfNone,
+		RoleSink:   ConfNone,
+	}
+
+	for _, s := range assessments {
+		for _, role := range AllRoles {
+			f := s.Finding(role)
+			if f.Confidence.rank() > best[role].rank() {
+				best[role] = f.Confidence
+			}
+			if f.Confidence.atOrAbove(ConfPossible) {
+				switch role {
+				case RoleData:
+					ga.DataHolders = append(ga.DataHolders, s.Name)
+				case RoleSource:
+					ga.Sources = append(ga.Sources, s.Name)
+				case RoleSink:
+					ga.Sinks = append(ga.Sinks, s.Name)
+				}
+			}
+		}
+		if hasUnknownRole(s) {
+			ga.Unclassified = append(ga.Unclassified, s.Name)
+		}
+		if holdsAllRoles(s) {
+			ga.SelfContainedFlow = append(ga.SelfContainedFlow, s.Name)
+		}
+	}
+
+	ga.Verdict = verdict(best[RoleData], best[RoleSource], best[RoleSink])
+	return ga
+}
+
+// verdict derives the group conclusion from the strongest confidence found for
+// each role. The flow is gated by its weakest leg:
+//   - all legs likely        -> present
+//   - all legs >= possible    -> possible
+//   - weakest leg unknown     -> indeterminate (cannot confirm or rule out)
+//   - any leg confidently none -> none (flow broken)
+func verdict(data, source, sink Confidence) Verdict {
+	weakest := min(data.rank(), source.rank(), sink.rank())
+	switch {
+	case weakest >= ConfLikely.rank():
+		return VerdictPresent
+	case weakest >= ConfPossible.rank():
+		return VerdictPossible
+	case weakest >= ConfUnknown.rank():
+		return VerdictIndeterminate
+	default:
+		return VerdictNone
+	}
+}
+
+// hasUnknownRole reports whether any of the server's roles could not be assessed.
+func hasUnknownRole(s ServerAssessment) bool {
+	for _, role := range AllRoles {
+		if s.Finding(role).Confidence == ConfUnknown {
+			return true
+		}
+	}
+	return false
+}
+
+// holdsAllRoles reports whether a single server holds all three roles at
+// possible-or-likely confidence, forming a toxic flow on its own.
+func holdsAllRoles(s ServerAssessment) bool {
+	for _, role := range AllRoles {
+		if !s.Finding(role).Confidence.atOrAbove(ConfPossible) {
+			return false
+		}
+	}
+	return true
+}
diff --git a/pkg/toxicflow/analyze_test.go b/pkg/toxicflow/analyze_test.go
new file mode 100644
index 0000000000..54a7278e8c
--- /dev/null
+++ b/pkg/toxicflow/analyze_test.go
@@ -0,0 +1,127 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package toxicflow
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+// sa builds a server assessment with explicit confidences for each role.
+func sa(name string, data, source, sink Confidence) ServerAssessment {
+	return ServerAssessment{Name: name, Findings: map[Role]RoleFinding{
+		RoleData:   {Role: RoleData, Confidence: data},
+		RoleSource: {Role: RoleSource, Confidence: source},
+		RoleSink:   {Role: RoleSink, Confidence: sink},
+	}}
+}
+
+func TestAnalyzeGroupVerdict(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name        string
+		assessments []ServerAssessment
+		want        Verdict
+	}{
+		{
+			name:        "all roles likely on one server is present",
+			assessments: []ServerAssessment{sa("all", ConfLikely, ConfLikely, ConfLikely)},
+			want:        VerdictPresent,
+		},
+		{
+			name: "roles split across servers still forms present",
+			assessments: []ServerAssessment{
+				sa("data-and-sink", ConfLikely, ConfNone, ConfLikely),
+				sa("source", ConfNone, ConfLikely, ConfNone),
+			},
+			want: VerdictPresent,
+		},
+		{
+			name: "weakest leg only possible yields possible",
+			assessments: []ServerAssessment{
+				sa("data-sink", ConfLikely, ConfNone, ConfLikely),
+				sa("weak-source", ConfNone, ConfPossible, ConfNone),
+			},
+			want: VerdictPossible,
+		},
+		{
+			name: "unknown source with data and sink yields indeterminate",
+			assessments: []ServerAssessment{
+				sa("github", ConfLikely, ConfUnknown, ConfLikely),
+			},
+			want: VerdictIndeterminate,
+		},
+		{
+			name: "confident absence of a leg yields none",
+			assessments: []ServerAssessment{
+				sa("data-sink", ConfLikely, ConfNone, ConfLikely),
+				sa("inert", ConfNone, ConfNone, ConfNone),
+			},
+			want: VerdictNone,
+		},
+		{
+			name:        "empty group is none",
+			assessments: nil,
+			want:        VerdictNone,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			got := AnalyzeGroup("g", tt.assessments)
+			assert.Equal(t, tt.want, got.Verdict)
+		})
+	}
+}
+
+func TestAnalyzeGroupContributors(t *testing.T) {
+	t.Parallel()
+
+	got := AnalyzeGroup("research", []ServerAssessment{
+		sa("notes", ConfLikely, ConfNone, ConfNone),       // data only
+		sa("web-fetch", ConfNone, ConfLikely, ConfLikely), // source + sink
+		sa("calc", ConfNone, ConfNone, ConfNone),          // nothing
+	})
+
+	assert.Equal(t, []string{"notes"}, got.DataHolders)
+	assert.Equal(t, []string{"web-fetch"}, got.Sources)
+	assert.Equal(t, []string{"web-fetch"}, got.Sinks)
+	assert.Empty(t, got.Unclassified)
+}
+
+func TestAnalyzeGroupSelfContainedFlow(t *testing.T) {
+	t.Parallel()
+
+	// One server holds all three roles: a self-contained flow.
+	self := AnalyzeGroup("solo", []ServerAssessment{
+		sa("kitchen-sink", ConfLikely, ConfLikely, ConfLikely),
+		sa("inert", ConfNone, ConfNone, ConfNone),
+	})
+	assert.Equal(t, VerdictPresent, self.Verdict)
+	assert.Equal(t, []string{"kitchen-sink"}, self.SelfContainedFlow)
+
+	// Flow spread across three single-role servers: no self-contained flow.
+	spread := AnalyzeGroup("spread", []ServerAssessment{
+		sa("d", ConfLikely, ConfNone, ConfNone),
+		sa("s", ConfNone, ConfLikely, ConfNone),
+		sa("k", ConfNone, ConfNone, ConfLikely),
+	})
+	assert.Equal(t, VerdictPresent, spread.Verdict)
+	assert.Empty(t, spread.SelfContainedFlow)
+}
+
+func TestAnalyzeGroupUnclassified(t *testing.T) {
+	t.Parallel()
+
+	got := AnalyzeGroup("mixed", []ServerAssessment{
+		sa("github", ConfLikely, ConfUnknown, ConfLikely),
+		sa("notes", ConfLikely, ConfNone, ConfNone),
+	})
+
+	assert.Equal(t, VerdictIndeterminate, got.Verdict)
+	assert.Equal(t, []string{"github"}, got.Unclassified)
+}
diff --git a/pkg/toxicflow/classify.go b/pkg/toxicflow/classify.go
new file mode 100644
index 0000000000..8e261dc23e
--- /dev/null
+++ b/pkg/toxicflow/classify.go
@@ -0,0 +1,327 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package toxicflow
+
+import (
+	"fmt"
+	"maps"
+	"slices"
+	"strings"
+
+	registrytypes "github.com/stacklok/toolhive-core/registry/types"
+	"github.com/stacklok/toolhive/pkg/authz/authorizers"
+	"github.com/stacklok/toolhive/pkg/runner"
+)
+
+// ClassifyInput carries everything Classify needs for one server. All fields
+// are optional: with no inputs the data and sink roles resolve to "unknown".
+type ClassifyInput struct {
+	// Name is the workload name.
+	Name string
+	// Config is the saved RunConfig; nil when it could not be loaded.
+	Config *runner.RunConfig
+	// Metadata is the server's registry metadata; nil when unavailable.
+	Metadata registrytypes.ServerMetadata
+	// Annotations maps tool name to MCP annotations harvested from a live
+	// tools/list (used by --live); nil for static assessment.
+	Annotations map[string]*authorizers.ToolAnnotations
+	// Hints are raise-only suggestions from a SourceInference (keyword or LLM),
+	// folded after the profile-derived baseline and before overrides.
+	Hints []Hint
+	// Overrides force role confidence for this server.
+	Overrides []Override
+}
+
+// signal is a single piece of evidence supporting a role at some confidence.
+type signal struct {
+	conf   Confidence
+	reason string
+}
+
+// untrustedTagKeywords mark registry tags that suggest a server ingests
+// content the caller does not control (a prompt-injection vector).
+var untrustedTagKeywords = []string{
+	"web", "fetch", "browser", "search", "email", "rss",
+	"scrape", "crawl", "http", "social", "news", "feed",
+}
+
+// untrustedToolKeywords mark tool-name fragments suggesting the same.
+var untrustedToolKeywords = []string{
+	"fetch", "browse", "search", "crawl", "scrape", "navigate",
+	"read_url", "get_url", "read_email", "get_issue", "get_page",
+	"download", "url",
+}
+
+// untrustedTextKeywords are whole words in a server's registry description or
+// overview that suggest untrusted-content ingestion. Matched on word
+// boundaries (not substrings) and only ever raise the source role to
+// "possible" — free-text is non-authoritative.
+var untrustedTextKeywords = map[string]struct{}{
+	"web": {}, "internet": {}, "url": {}, "urls": {}, "fetch": {},
+	"fetches": {}, "fetching": {}, "browse": {}, "browsing": {}, "browser": {},
+	"scrape": {}, "scraping": {}, "crawl": {}, "crawling": {}, "rss": {},
+	"email": {}, "emails": {}, "website": {}, "websites": {}, "search": {},
+}
+
+// Classify assesses a single server's role in a potential toxic flow. It is a
+// pure function: given the same inputs it always produces the same assessment.
+func Classify(in ClassifyInput) ServerAssessment {
+	findings := map[Role]RoleFinding{
+		RoleData:   classifyData(in),
+		RoleSource: classifySource(in),
+		RoleSink:   classifySink(in),
+	}
+	// Inference hints fold raise-only (they can only strengthen a leg); explicit
+	// overrides are authoritative and applied last so they can also weaken one.
+	applyHints(findings, in.Hints)
+	applyOverrides(findings, in.Name, in.Overrides)
+	return ServerAssessment{Name: in.Name, Findings: findings}
+}
+
+// classifyData looks for access to private or sensitive data. The permission
+// profile (filesystem reads, injected secrets) is the authoritative signal, so
+// a confident "none" requires the run config: without it the role is "unknown"
+// rather than a misleading absence. Remoteness is deliberately NOT a data
+// signal — it is egress (a sink signal); treating it as data too would
+// double-count remote servers into two legs.
+func classifyData(in ClassifyInput) RoleFinding {
+	var sigs []signal
+
+	if in.Config != nil {
+		if p := in.Config.PermissionProfile; p != nil && len(p.Read) > 0 {
+			sigs = append(sigs, signal{ConfLikely,
+				fmt.Sprintf("filesystem read access (%d mount(s))", len(p.Read))})
+		}
+		if len(in.Config.Secrets) > 0 {
+			sigs = append(sigs, signal{ConfLikely,
+				fmt.Sprintf("%d secret(s) injected", len(in.Config.Secrets))})
+		}
+	}
+
+	if in.Metadata != nil {
+		for _, ev := range in.Metadata.GetEnvVars() {
+			if ev != nil && ev.Secret {
+				sigs = append(sigs, signal{ConfLikely,
+					fmt.Sprintf("requires secret env var %q", ev.Name)})
+			}
+		}
+	}
+
+	if len(sigs) > 0 {
+		return maxFinding(RoleData, sigs)
+	}
+	// Mounts and injected secrets live on the run config; only with it can we
+	// assert a confident absence. Metadata alone is not enough.
+	if in.Config != nil {
+		return RoleFinding{Role: RoleData, Confidence: ConfNone}
+	}
+	return RoleFinding{Role: RoleData, Confidence: ConfUnknown}
+}
+
+// classifySink looks for the ability to communicate externally. ToolHive's
+// runtime is open-egress-by-default: a nil permission profile, nil network
+// policy, or nil outbound policy all resolve to unrestricted egress at runtime
+// (see pkg/container/docker/squid.go and the default network profile). The
+// sink role therefore reaches a confident "none" ONLY on positive evidence of
+// closed egress; any unspecified policy is treated as open, never as safe.
+func classifySink(in ClassifyInput) RoleFinding {
+	// Without a run config, egress is unknowable from here.
+	if in.Config == nil {
+		return RoleFinding{Role: RoleSink, Confidence: ConfUnknown}
+	}
+
+	var sigs []signal
+	if in.Config.RemoteURL != "" {
+		sigs = append(sigs, signal{ConfLikely,
+			"remote server sends requests to an external endpoint"})
+	}
+
+	switch p := in.Config.PermissionProfile; p {
+	case nil:
+		sigs = append(sigs, signal{ConfPossible, "no permission profile; runtime egress is open by default"})
+	default:
+		if p.Privileged {
+			sigs = append(sigs, signal{ConfLikely, "privileged container (network controls bypassable)"})
+		}
+		switch n := p.Network; n {
+		case nil:
+			sigs = append(sigs, signal{ConfPossible, "network policy unspecified; runtime egress is open by default"})
+		default:
+			if n.Mode == "host" {
+				sigs = append(sigs, signal{ConfLikely, "host network mode (egress not controlled)"})
+			}
+			switch o := n.Outbound; {
+			case o == nil:
+				sigs = append(sigs, signal{ConfPossible, "outbound policy unspecified; runtime egress is open by default"})
+			case o.InsecureAllowAll:
+				sigs = append(sigs, signal{ConfLikely, "unrestricted outbound network access"})
+			case len(o.AllowHost) > 0 || len(o.AllowPort) > 0:
+				sigs = append(sigs, signal{ConfPossible,
+					fmt.Sprintf("restricted outbound access (%d host(s), %d port(s))", len(o.AllowHost), len(o.AllowPort))})
+			}
+			// Outbound present with no allow rules and not insecure-allow-all
+			// is the only positive evidence of closed egress: no signal added.
+		}
+	}
+
+	if len(sigs) == 0 {
+		return RoleFinding{Role: RoleSink, Confidence: ConfNone,
+			Evidence: []string{"outbound network access denied by permission profile"}}
+	}
+	return maxFinding(RoleSink, sigs)
+}
+
+// classifySource looks for exposure to untrusted content from the one signal it
+// can trust as evidence of presence: a live tools/list openWorldHint. Tag,
+// tool, and description heuristics and LLM judgements are non-authoritative and
+// arrive separately as raise-only Hints (see SourceInference / applyHints).
+//
+// Source classification is raise-only: it never returns a confident "none".
+// Untrusted-content exposure cannot be ruled out from a server's own tool list,
+// because annotations are advisory and a compromised server can simply
+// under-report openWorldHint to hide. Only an explicit operator override may
+// assert source "none".
+func classifySource(in ClassifyInput) RoleFinding {
+	var sigs []signal
+
+	// Iterate annotations in sorted key order so evidence is deterministic.
+	for _, name := range slices.Sorted(maps.Keys(in.Annotations)) {
+		a := in.Annotations[name]
+		if a != nil && a.OpenWorldHint != nil && *a.OpenWorldHint {
+			sigs = append(sigs, signal{ConfLikely, fmt.Sprintf("tool %q has openWorldHint", name)})
+		}
+	}
+
+	if len(sigs) > 0 {
+		return maxFinding(RoleSource, sigs)
+	}
+	return RoleFinding{Role: RoleSource, Confidence: ConfUnknown}
+}
+
+// maxFinding folds a non-empty signal set into a finding, taking the strongest
+// confidence and collecting every reason as evidence. Each classifier decides
+// its own "no signals" semantics (confident none vs unknown) before calling
+// this, because that decision differs per role.
+func maxFinding(role Role, sigs []signal) RoleFinding {
+	best := ConfNone
+	ev := make([]string, 0, len(sigs))
+	for _, s := range sigs {
+		ev = append(ev, s.reason)
+		if s.conf.rank() > best.rank() {
+			best = s.conf
+		}
+	}
+	return RoleFinding{Role: role, Confidence: best, Evidence: ev}
+}
+
+// applyHints folds raise-only inference hints into the findings. A hint takes
+// effect only when it would strengthen the role's confidence, so hints can
+// never weaken a leg or manufacture a confident "none". The hint reason is
+// prepended to the evidence.
+func applyHints(findings map[Role]RoleFinding, hints []Hint) {
+	for _, h := range hints {
+		if !isRole(h.Role) {
+			continue
+		}
+		// Clamp at the choke point: inference backends (keyword or LLM) are
+		// non-authoritative, so no hint may push a leg past "possible" — only
+		// the structured openWorldHint signal reaches "likely". This enforces
+		// the cap structurally rather than trusting each backend to honor it.
+		conf := h.Confidence
+		if conf.rank() > ConfPossible.rank() {
+			conf = ConfPossible
+		}
+		prev := findings[h.Role]
+		// Raise-only: a hint applies only when it strictly strengthens the leg.
+		// Hints run before overrides (see Classify), so prev is never overridden.
+		if conf.rank() <= prev.Confidence.rank() {
+			continue
+		}
+		findings[h.Role] = RoleFinding{
+			Role:       h.Role,
+			Confidence: conf,
+			Evidence:   append([]string{h.Reason}, prev.Evidence...),
+		}
+	}
+}
+
+// applyOverrides replaces findings for the named server with explicit overrides,
+// recording the reason as top-priority evidence.
+func applyOverrides(findings map[Role]RoleFinding, server string, overrides []Override) {
+	for _, ov := range overrides {
+		if ov.Server != "" && ov.Server != server {
+			continue
+		}
+		if !isRole(ov.Role) {
+			continue
+		}
+		reason := ov.Reason
+		if reason == "" {
+			reason = "(no reason given)"
+		}
+		prev := findings[ov.Role]
+		ev := append([]string{fmt.Sprintf("overridden to %s: %s", ov.Confidence, reason)}, prev.Evidence...)
+		findings[ov.Role] = RoleFinding{
+			Role:       ov.Role,
+			Confidence: ov.Confidence,
+			Evidence:   ev,
+			Overridden: true,
+		}
+	}
+}
+
+// matchKeyword returns the first value containing any keyword (case-insensitive),
+// or "" if none match.
+func matchKeyword(values, keywords []string) string {
+	for _, v := range values {
+		lower := strings.ToLower(v)
+		for _, kw := range keywords {
+			if strings.Contains(lower, kw) {
+				return v
+			}
+		}
+	}
+	return ""
+}
+
+// matchWord returns the first whole word in text that is in the keyword set
+// (case-insensitive), or "" if none match. Word-boundary matching avoids
+// substring false positives (e.g. "web" inside "cobweb").
+func matchWord(text string, keywords map[string]struct{}) string {
+	for _, field := range strings.Fields(strings.ToLower(text)) {
+		word := strings.Trim(field, ".,;:!?()[]{}\"'`")
+		if _, ok := keywords[word]; ok {
+			return word
+		}
+	}
+	return ""
+}
+
+// isRole reports whether r is one of the three recognized roles.
+func isRole(r Role) bool {
+	return r == RoleData || r == RoleSource || r == RoleSink
+}
+
+// isConfidence reports whether c is one of the four recognized levels.
+func isConfidence(c Confidence) bool {
+	return c == ConfNone || c == ConfUnknown || c == ConfPossible || c == ConfLikely
+}
+
+// ValidateOverride checks that an override is well-formed. A parse success only
+// means valid JSON; an unknown confidence would otherwise map to ConfNone and
+// silently zero a leg (the most dangerous direction for a security advisory),
+// so reject unknown values loudly. A reason is required for auditability.
+func ValidateOverride(o Override) error {
+	if !isRole(o.Role) {
+		return fmt.Errorf("override for server %q: invalid role %q (want data, source, or sink)", o.Server, o.Role)
+	}
+	if !isConfidence(o.Confidence) {
+		return fmt.Errorf("override for server %q: invalid confidence %q (want none, unknown, possible, or likely)",
+			o.Server, o.Confidence)
+	}
+	if strings.TrimSpace(o.Reason) == "" {
+		return fmt.Errorf("override for server %q role %q: a reason is required", o.Server, o.Role)
+	}
+	return nil
+}
diff --git a/pkg/toxicflow/classify_test.go b/pkg/toxicflow/classify_test.go
new file mode 100644
index 0000000000..9e415380e4
--- /dev/null
+++ b/pkg/toxicflow/classify_test.go
@@ -0,0 +1,306 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package toxicflow
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/stacklok/toolhive-core/permissions"
+	registrytypes "github.com/stacklok/toolhive-core/registry/types"
+	"github.com/stacklok/toolhive/pkg/authz/authorizers"
+	"github.com/stacklok/toolhive/pkg/runner"
+)
+
+func boolPtr(b bool) *bool { return &b }
+
+// noEgressProfile is the equivalent of the built-in "none" profile: a profile
+// that was inspected and grants no outbound access.
+func noEgressProfile() *permissions.Profile {
+	return &permissions.Profile{
+		Network: &permissions.NetworkPermissions{
+			Outbound: &permissions.OutboundNetworkPermissions{},
+		},
+	}
+}
+
+func imageMeta(tags, tools []string) registrytypes.ServerMetadata {
+	return &registrytypes.ImageMetadata{
+		BaseServerMetadata: registrytypes.BaseServerMetadata{
+			Tags:  tags,
+			Tools: tools,
+		},
+	}
+}
+
+func TestClassify(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name    string
+		input   ClassifyInput
+		want    map[Role]Confidence
+		wantOvr map[Role]bool // roles expected to be marked Overridden
+	}{
+		{
+			name: "filesystem read mount marks data likely",
+			input: ClassifyInput{
+				Name: "fs",
+				Config: &runner.RunConfig{PermissionProfile: &permissions.Profile{
+					Read:    []permissions.MountDeclaration{"/home/user/notes:/notes"},
+					Network: &permissions.NetworkPermissions{Outbound: &permissions.OutboundNetworkPermissions{}},
+				}},
+			},
+			want: map[Role]Confidence{RoleData: ConfLikely, RoleSink: ConfNone, RoleSource: ConfUnknown},
+		},
+		{
+			name: "injected secrets mark data likely",
+			input: ClassifyInput{
+				Name:   "svc",
+				Config: &runner.RunConfig{Secrets: []string{"api-key,target=API_KEY"}, PermissionProfile: noEgressProfile()},
+			},
+			want: map[Role]Confidence{RoleData: ConfLikely, RoleSink: ConfNone, RoleSource: ConfUnknown},
+		},
+		{
+			name: "no-egress profile yields confident none for data and sink",
+			input: ClassifyInput{
+				Name:   "inert",
+				Config: &runner.RunConfig{PermissionProfile: noEgressProfile()},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfNone, RoleSource: ConfUnknown},
+		},
+		{
+			name: "unrestricted egress marks sink likely",
+			input: ClassifyInput{
+				Name: "open",
+				Config: &runner.RunConfig{PermissionProfile: &permissions.Profile{
+					Network: &permissions.NetworkPermissions{Outbound: &permissions.OutboundNetworkPermissions{InsecureAllowAll: true}},
+				}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfLikely, RoleSource: ConfUnknown},
+		},
+		{
+			name: "host-restricted egress marks sink possible",
+			input: ClassifyInput{
+				Name: "scoped",
+				Config: &runner.RunConfig{PermissionProfile: &permissions.Profile{
+					Network: &permissions.NetworkPermissions{Outbound: &permissions.OutboundNetworkPermissions{AllowHost: []string{"api.example.com"}}},
+				}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfPossible, RoleSource: ConfUnknown},
+		},
+		{
+			name: "host network mode marks sink likely",
+			input: ClassifyInput{
+				Name: "hostnet",
+				Config: &runner.RunConfig{PermissionProfile: &permissions.Profile{
+					Network: &permissions.NetworkPermissions{Mode: "host", Outbound: &permissions.OutboundNetworkPermissions{}},
+				}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfLikely, RoleSource: ConfUnknown},
+		},
+		{
+			name: "privileged container marks sink likely",
+			input: ClassifyInput{
+				Name: "priv",
+				Config: &runner.RunConfig{PermissionProfile: &permissions.Profile{
+					Privileged: true,
+					Network:    &permissions.NetworkPermissions{Outbound: &permissions.OutboundNetworkPermissions{}},
+				}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfLikely, RoleSource: ConfUnknown},
+		},
+		{
+			name: "remote url marks sink likely but not data (no double-count)",
+			input: ClassifyInput{
+				Name:   "remote",
+				Config: &runner.RunConfig{RemoteURL: "https://mcp.example.com/sse", PermissionProfile: noEgressProfile()},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfLikely, RoleSource: ConfUnknown},
+		},
+		{
+			name: "nil network policy reads sink possible (open egress by default)",
+			input: ClassifyInput{
+				Name:   "nonet",
+				Config: &runner.RunConfig{PermissionProfile: &permissions.Profile{}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfPossible, RoleSource: ConfUnknown},
+		},
+		{
+			name: "nil permission profile reads sink possible (open egress by default)",
+			input: ClassifyInput{
+				Name:   "noprofile",
+				Config: &runner.RunConfig{},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfPossible, RoleSource: ConfUnknown},
+		},
+		{
+			name: "nil outbound policy reads sink possible (open egress by default)",
+			input: ClassifyInput{
+				Name: "nooutbound",
+				Config: &runner.RunConfig{PermissionProfile: &permissions.Profile{
+					Network: &permissions.NetworkPermissions{},
+				}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfPossible, RoleSource: ConfUnknown},
+		},
+		{
+			name: "metadata without a run config leaves data and sink unknown",
+			input: ClassifyInput{
+				Name:     "noconfig",
+				Metadata: imageMeta(nil, nil),
+			},
+			want: map[Role]Confidence{RoleData: ConfUnknown, RoleSink: ConfUnknown, RoleSource: ConfUnknown},
+		},
+		{
+			name: "source hint raises source to possible",
+			input: ClassifyInput{
+				Name:   "fetcher",
+				Config: &runner.RunConfig{PermissionProfile: noEgressProfile()},
+				Hints:  []Hint{{Role: RoleSource, Confidence: ConfPossible, Reason: "tag \"web\""}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfNone, RoleSource: ConfPossible},
+		},
+		{
+			name: "data hint raises remote data from none to possible",
+			input: ClassifyInput{
+				Name:   "remote",
+				Config: &runner.RunConfig{RemoteURL: "https://mcp.example.com/sse", PermissionProfile: noEgressProfile()},
+				Hints:  []Hint{{Role: RoleData, Confidence: ConfPossible, Reason: "LLM: holds account data"}},
+			},
+			want: map[Role]Confidence{RoleData: ConfPossible, RoleSink: ConfLikely, RoleSource: ConfUnknown},
+		},
+		{
+			name: "hint stronger than possible is clamped to possible (cap enforced at the seam)",
+			input: ClassifyInput{
+				Name:   "fetcher",
+				Config: &runner.RunConfig{PermissionProfile: noEgressProfile()},
+				Hints:  []Hint{{Role: RoleSource, Confidence: ConfLikely, Reason: "overconfident backend"}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfNone, RoleSource: ConfPossible},
+		},
+		{
+			name: "hint never lowers a stronger finding (raise-only)",
+			input: ClassifyInput{
+				Name:        "browser",
+				Config:      &runner.RunConfig{PermissionProfile: noEgressProfile()},
+				Annotations: map[string]*authorizers.ToolAnnotations{"browse": {OpenWorldHint: boolPtr(true)}},
+				Hints:       []Hint{{Role: RoleSource, Confidence: ConfPossible, Reason: "weaker hint"}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfNone, RoleSource: ConfLikely},
+		},
+		{
+			name: "openWorldHint annotation marks source likely",
+			input: ClassifyInput{
+				Name:        "browser",
+				Config:      &runner.RunConfig{PermissionProfile: noEgressProfile()},
+				Annotations: map[string]*authorizers.ToolAnnotations{"browse": {OpenWorldHint: boolPtr(true)}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfNone, RoleSource: ConfLikely},
+		},
+		{
+			name: "annotations without openWorldHint leave source unknown (a server cannot self-declare safe)",
+			input: ClassifyInput{
+				Name:        "calc",
+				Config:      &runner.RunConfig{PermissionProfile: noEgressProfile()},
+				Annotations: map[string]*authorizers.ToolAnnotations{"add": {OpenWorldHint: boolPtr(false)}},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfNone, RoleSource: ConfUnknown},
+		},
+		{
+			name:  "no inputs leave every role unknown",
+			input: ClassifyInput{Name: "ghost"},
+			want:  map[Role]Confidence{RoleData: ConfUnknown, RoleSink: ConfUnknown, RoleSource: ConfUnknown},
+		},
+		{
+			name: "override downgrades source to none even against a raising hint",
+			input: ClassifyInput{
+				Name:   "intranet-fetch",
+				Config: &runner.RunConfig{PermissionProfile: noEgressProfile()},
+				Hints:  []Hint{{Role: RoleSource, Confidence: ConfPossible, Reason: "tag \"web\""}},
+				Overrides: []Override{
+					{Server: "intranet-fetch", Role: RoleSource, Confidence: ConfNone, Reason: "first-party intranet only"},
+				},
+			},
+			want:    map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfNone, RoleSource: ConfNone},
+			wantOvr: map[Role]bool{RoleSource: true},
+		},
+		{
+			name: "override targeting a different server is ignored",
+			input: ClassifyInput{
+				Name:   "fetcher",
+				Config: &runner.RunConfig{PermissionProfile: noEgressProfile()},
+				Hints:  []Hint{{Role: RoleSource, Confidence: ConfPossible, Reason: "tag \"web\""}},
+				Overrides: []Override{
+					{Server: "other", Role: RoleSource, Confidence: ConfNone, Reason: "not this one"},
+				},
+			},
+			want: map[Role]Confidence{RoleData: ConfNone, RoleSink: ConfNone, RoleSource: ConfPossible},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			got := Classify(tt.input)
+			require.Equal(t, tt.input.Name, got.Name)
+			for role, wantConf := range tt.want {
+				assert.Equalf(t, wantConf, got.Finding(role).Confidence,
+					"role %s confidence", role)
+			}
+			for role, wantOvr := range tt.wantOvr {
+				assert.Equalf(t, wantOvr, got.Finding(role).Overridden,
+					"role %s overridden", role)
+			}
+		})
+	}
+}
+
+func TestValidateOverride(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name    string
+		ovr     Override
+		wantErr bool
+	}{
+		{
+			name: "valid override",
+			ovr:  Override{Server: "fetch", Role: RoleSource, Confidence: ConfNone, Reason: "intranet only"},
+		},
+		{
+			name: "wildcard server is allowed",
+			ovr:  Override{Server: "", Role: RoleSink, Confidence: ConfNone, Reason: "all egress audited"},
+		},
+		{
+			name:    "invalid role is rejected",
+			ovr:     Override{Server: "x", Role: "egress", Confidence: ConfNone, Reason: "r"},
+			wantErr: true,
+		},
+		{
+			name:    "invalid confidence is rejected",
+			ovr:     Override{Server: "x", Role: RoleSink, Confidence: "high", Reason: "r"},
+			wantErr: true,
+		},
+		{
+			name:    "missing reason is rejected",
+			ovr:     Override{Server: "x", Role: RoleSink, Confidence: ConfNone, Reason: "  "},
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			err := ValidateOverride(tt.ovr)
+			if tt.wantErr {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+		})
+	}
+}
diff --git a/pkg/toxicflow/collect.go b/pkg/toxicflow/collect.go
new file mode 100644
index 0000000000..7eca8f592c
--- /dev/null
+++ b/pkg/toxicflow/collect.go
@@ -0,0 +1,152 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package toxicflow
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	registrytypes "github.com/stacklok/toolhive-core/registry/types"
+	"github.com/stacklok/toolhive/pkg/container/runtime"
+	"github.com/stacklok/toolhive/pkg/core"
+	"github.com/stacklok/toolhive/pkg/groups"
+	"github.com/stacklok/toolhive/pkg/registry"
+	"github.com/stacklok/toolhive/pkg/runner"
+	"github.com/stacklok/toolhive/pkg/workloads"
+)
+
+// Collector gathers the inputs a toxic-flow assessment needs (run configs and
+// registry metadata) for the workloads in a group and runs the classifier over
+// them. Registry lookups are best-effort: a missing or unreachable registry
+// degrades classification accuracy but does not fail the assessment.
+type Collector struct {
+	workloads workloads.Manager
+	registry  registry.Provider // nil when the registry is unavailable
+	inference SourceInference
+	live      bool
+}
+
+// NewCollector builds a Collector backed by the local workloads manager and the
+// default registry provider. inference selects the untrusted-content strategy
+// (keyword or LLM); a nil inference defaults to KeywordInference. When live is
+// true, running (actively-proxied) servers are probed for live tool
+// annotations.
+func NewCollector(ctx context.Context, inference SourceInference, live bool) (*Collector, error) {
+	wm, err := workloads.NewManager(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("create workloads manager: %w", err)
+	}
+	// The registry is best-effort: without it, metadata-derived hints are
+	// skipped but profile-derived data/sink roles are unaffected.
+	reg, err := registry.GetDefaultProvider()
+	if err != nil {
+		slog.Debug("toxicflow: registry provider unavailable, continuing without metadata", "error", err)
+		reg = nil
+	}
+	if inference == nil {
+		inference = NewKeywordInference()
+	}
+	return &Collector{workloads: wm, registry: reg, inference: inference, live: live}, nil
+}
+
+// AssessGroup classifies every workload in the named group and returns the
+// group verdict. The overrides apply across the group (each override may target
+// a specific server or all servers).
+func (c *Collector) AssessGroup(ctx context.Context, group string, overrides []Override) (GroupAssessment, error) {
+	all, err := c.workloads.ListWorkloads(ctx, true)
+	if err != nil {
+		return GroupAssessment{}, fmt.Errorf("list workloads: %w", err)
+	}
+	members, err := workloads.FilterByGroup(all, group)
+	if err != nil {
+		return GroupAssessment{}, fmt.Errorf("filter workloads by group %q: %w", group, err)
+	}
+
+	assessments := make([]ServerAssessment, 0, len(members))
+	for _, w := range members {
+		in := ClassifyInput{Name: w.Name, Overrides: overrides}
+
+		if rc, err := runner.LoadState(ctx, w.Name); err == nil {
+			in.Config = rc
+		} else {
+			// Without a run config the profile-derived roles fall back to
+			// "unknown"; this is surfaced in the verdict, not an error.
+			slog.Debug("toxicflow: could not load run config", "workload", w.Name, "error", err)
+		}
+
+		if c.registry != nil {
+			if meta, err := c.lookupMetadata(in.Config, w); err == nil {
+				in.Metadata = meta
+			} else {
+				slog.Debug("toxicflow: no registry metadata", "workload", w.Name, "error", err)
+			}
+		}
+
+		// Untrusted-content (and remote data) hints from the configured strategy.
+		if hints, err := c.inference.Infer(ctx, profileFor(w, in.Metadata)); err == nil {
+			in.Hints = hints
+		} else {
+			// Inference is best-effort: on failure the source role stays
+			// "unknown" (the safe direction), never a confident "none".
+			slog.Debug("toxicflow: inference failed", "workload", w.Name, "error", err)
+		}
+
+		// When we are actively proxying this server, probe it live for the
+		// authoritative openWorldHint signal.
+		if c.live && w.Status == runtime.WorkloadStatusRunning && w.URL != "" {
+			if ann, err := probeAnnotations(ctx, w.URL, w.ProxyMode); err == nil {
+				in.Annotations = ann
+			} else {
+				slog.Debug("toxicflow: live probe failed", "workload", w.Name, "error", err)
+			}
+		}
+
+		assessments = append(assessments, Classify(in))
+	}
+
+	return AnalyzeGroup(group, assessments), nil
+}
+
+// profileFor builds the public, non-sensitive profile passed to the inference
+// strategy. It deliberately carries no permission profile, secrets, or runtime
+// config — only catalog metadata an LLM backend may safely receive.
+func profileFor(w core.Workload, meta registrytypes.ServerMetadata) ServerProfile {
+	p := ServerProfile{Name: w.Name, Remote: w.Remote}
+	if meta != nil {
+		p.Description = meta.GetDescription()
+		p.Overview = meta.GetOverview()
+		p.Tags = meta.GetTags()
+		p.Tools = meta.GetTools()
+	}
+	return p
+}
+
+// lookupMetadata resolves registry metadata, preferring the registry server
+// name recorded in the run config and falling back to the workload name.
+func (c *Collector) lookupMetadata(rc *runner.RunConfig, w core.Workload) (registrytypes.ServerMetadata, error) {
+	name := w.Name
+	if rc != nil && rc.RegistryServerName != "" {
+		name = rc.RegistryServerName
+	}
+	return c.registry.GetServer(name)
+}
+
+// ListGroupNames returns the names of all known groups, for auditing every
+// group at once.
+func ListGroupNames(ctx context.Context) ([]string, error) {
+	gm, err := groups.NewManager()
+	if err != nil {
+		return nil, fmt.Errorf("create groups manager: %w", err)
+	}
+	gs, err := gm.List(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("list groups: %w", err)
+	}
+	names := make([]string, 0, len(gs))
+	for _, g := range gs {
+		names = append(names, g.Name)
+	}
+	return names, nil
+}
diff --git a/pkg/toxicflow/collect_test.go b/pkg/toxicflow/collect_test.go
new file mode 100644
index 0000000000..fa022591df
--- /dev/null
+++ b/pkg/toxicflow/collect_test.go
@@ -0,0 +1,69 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package toxicflow
+
+import (
+	"testing"
+
+	"github.com/mark3labs/mcp-go/mcp"
+	"github.com/stretchr/testify/assert"
+
+	registrytypes "github.com/stacklok/toolhive-core/registry/types"
+	"github.com/stacklok/toolhive/pkg/core"
+)
+
+// TestProfileFor guards the security boundary: the profile sent to an inference
+// backend (potentially a remote LLM) must carry only public catalog metadata,
+// never anything derived from the run config, permission profile, or secrets.
+// ServerProfile having no such field is the structural guarantee; this test
+// pins the mapping and documents the intent.
+func TestProfileFor(t *testing.T) {
+	t.Parallel()
+
+	w := core.Workload{Name: "github", Remote: true}
+	meta := &registrytypes.ImageMetadata{BaseServerMetadata: registrytypes.BaseServerMetadata{
+		Description: "GitHub MCP server",
+		Overview:    "Access repositories and issues",
+		Tags:        []string{"git", "vcs"},
+		Tools:       []string{"get_issue", "create_pr"},
+	}}
+
+	got := profileFor(w, meta)
+
+	assert.Equal(t, ServerProfile{
+		Name:        "github",
+		Remote:      true,
+		Description: "GitHub MCP server",
+		Overview:    "Access repositories and issues",
+		Tags:        []string{"git", "vcs"},
+		Tools:       []string{"get_issue", "create_pr"},
+	}, got)
+}
+
+func TestProfileForNilMetadata(t *testing.T) {
+	t.Parallel()
+
+	got := profileFor(core.Workload{Name: "local", Remote: false}, nil)
+	assert.Equal(t, ServerProfile{Name: "local"}, got)
+}
+
+func TestMapAnnotations(t *testing.T) {
+	t.Parallel()
+
+	open := true
+	ro := true
+	tools := []mcp.Tool{
+		{Name: "fetch", Annotations: mcp.ToolAnnotation{OpenWorldHint: &open}},
+		{Name: "read", Annotations: mcp.ToolAnnotation{ReadOnlyHint: &ro}},
+	}
+
+	got := mapAnnotations(tools)
+
+	assert.Len(t, got, 2)
+	assert.NotNil(t, got["fetch"].OpenWorldHint)
+	assert.True(t, *got["fetch"].OpenWorldHint)
+	assert.NotNil(t, got["read"].ReadOnlyHint)
+	assert.True(t, *got["read"].ReadOnlyHint)
+	assert.Nil(t, got["read"].OpenWorldHint)
+}
diff --git a/pkg/toxicflow/inference.go b/pkg/toxicflow/inference.go
new file mode 100644
index 0000000000..a62cb86a11
--- /dev/null
+++ b/pkg/toxicflow/inference.go
@@ -0,0 +1,170 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package toxicflow
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+// ServerProfile is the public, non-sensitive view of a server passed to a
+// SourceInference. It deliberately excludes permission profiles, secrets, and
+// runtime config: an inference backend (especially a remote LLM) must never
+// receive sensitive data.
+type ServerProfile struct {
+	Name        string
+	Description string
+	Overview    string
+	Tags        []string
+	Tools       []string
+	Remote      bool
+}
+
+// Hint is a non-authoritative, raise-only suggestion from a SourceInference.
+// Hints can only raise a role's confidence (never lower it, never force a
+// confident "none"), so a wrong hint costs a spurious warning, not a missed
+// flow. Inference backends emit at most ConfPossible — the structured
+// openWorldHint is the only basis for a stronger source signal.
+type Hint struct {
+	Role       Role
+	Confidence Confidence
+	Reason     string
+}
+
+// SourceInference derives untrusted-content (and, where possible, private-data)
+// hints from a server's public metadata. Implementations must be safe to call
+// with partial profiles and must not return findings stronger than ConfPossible.
+type SourceInference interface {
+	Infer(ctx context.Context, p ServerProfile) ([]Hint, error)
+}
+
+// KeywordInference is the default, offline, deterministic strategy: it matches
+// tags, tool names, and description text against curated keyword lists.
+type KeywordInference struct{}
+
+// NewKeywordInference returns the keyword-based inference strategy.
+func NewKeywordInference() KeywordInference { return KeywordInference{} }
+
+// Infer implements SourceInference using keyword heuristics.
+func (KeywordInference) Infer(_ context.Context, p ServerProfile) ([]Hint, error) {
+	var hints []Hint
+	if tag := matchKeyword(p.Tags, untrustedTagKeywords); tag != "" {
+		hints = append(hints, Hint{RoleSource, ConfPossible,
+			fmt.Sprintf("tag %q suggests untrusted-content ingestion", tag)})
+	}
+	if tool := matchKeyword(p.Tools, untrustedToolKeywords); tool != "" {
+		hints = append(hints, Hint{RoleSource, ConfPossible,
+			fmt.Sprintf("tool %q suggests untrusted-content ingestion", tool)})
+	}
+	if word := matchWord(p.Description+" "+p.Overview, untrustedTextKeywords); word != "" {
+		hints = append(hints, Hint{RoleSource, ConfPossible,
+			fmt.Sprintf("description mentions %q (possible untrusted-content ingestion)", word)})
+	}
+	return hints, nil
+}
+
+// Completer is the minimal LLM contract LLMInference needs. It is satisfied by
+// pkg/llm/client.Client; defining it here keeps toxicflow free of an LLM-client
+// dependency and makes LLMInference unit-testable with a stub.
+type Completer interface {
+	Complete(ctx context.Context, system, user string) (string, error)
+}
+
+// LLMInference uses a language model to judge, from the public profile, whether
+// a server ingests untrusted content or exposes private data. Its output is
+// still capped at ConfPossible and folded raise-only, so a hallucination can
+// only over-warn — it can never produce a reassuring "no toxic flow".
+type LLMInference struct {
+	client Completer
+}
+
+// NewLLMInference returns an LLM-backed inference strategy.
+func NewLLMInference(c Completer) LLMInference { return LLMInference{client: c} }
+
+const sourceSystemPrompt = "You classify Model Context Protocol (MCP) servers for security review.\n" +
+	"Given a server's public metadata, decide two things:\n" +
+	"- untrusted_content: does the server ingest content an external party could influence " +
+	"(web pages, search results, emails, issue/PR bodies, arbitrary documents)? " +
+	"Internal-but-user-generated content (wikis, tickets, mailboxes) counts as untrusted.\n" +
+	"- private_data: does the server plausibly access private or sensitive data " +
+	"(a user's accounts, repositories, files, internal APIs)?\n" +
+	"Respond with ONLY a JSON object: " +
+	`{"untrusted_content": bool, "private_data": bool, "reason": "<one short sentence>"}.`
+
+type llmVerdict struct {
+	UntrustedContent bool   `json:"untrusted_content"`
+	PrivateData      bool   `json:"private_data"`
+	Reason           string `json:"reason"`
+}
+
+// Infer implements SourceInference by prompting the model and parsing its JSON.
+func (l LLMInference) Infer(ctx context.Context, p ServerProfile) ([]Hint, error) {
+	out, err := l.client.Complete(ctx, sourceSystemPrompt, buildProfilePrompt(p))
+	if err != nil {
+		return nil, fmt.Errorf("llm inference: %w", err)
+	}
+	verdict, err := parseLLMVerdict(out)
+	if err != nil {
+		return nil, err
+	}
+
+	var hints []Hint
+	if verdict.UntrustedContent {
+		hints = append(hints, Hint{RoleSource, ConfPossible, llmReason("untrusted content", verdict.Reason)})
+	}
+	if verdict.PrivateData {
+		hints = append(hints, Hint{RoleData, ConfPossible, llmReason("private data", verdict.Reason)})
+	}
+	return hints, nil
+}
+
+func buildProfilePrompt(p ServerProfile) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "name: %s\n", p.Name)
+	fmt.Fprintf(&b, "remote: %t\n", p.Remote)
+	if p.Description != "" {
+		fmt.Fprintf(&b, "description: %s\n", p.Description)
+	}
+	if p.Overview != "" {
+		fmt.Fprintf(&b, "overview: %s\n", p.Overview)
+	}
+	if len(p.Tags) > 0 {
+		fmt.Fprintf(&b, "tags: %s\n", strings.Join(p.Tags, ", "))
+	}
+	if len(p.Tools) > 0 {
+		fmt.Fprintf(&b, "tools: %s\n", strings.Join(p.Tools, ", "))
+	}
+	return b.String()
+}
+
+// parseLLMVerdict extracts the JSON object from the model output, tolerating
+// surrounding prose or code fences.
+func parseLLMVerdict(out string) (llmVerdict, error) {
+	start := strings.Index(out, "{")
+	end := strings.LastIndex(out, "}")
+	if start < 0 || end < start {
+		return llmVerdict{}, fmt.Errorf("llm response was not JSON: %q", truncate(out, 120))
+	}
+	var v llmVerdict
+	if err := json.Unmarshal([]byte(out[start:end+1]), &v); err != nil {
+		return llmVerdict{}, fmt.Errorf("parse llm response: %w", err)
+	}
+	return v, nil
+}
+
+func llmReason(kind, reason string) string {
+	if strings.TrimSpace(reason) == "" {
+		return "LLM judged this server to involve " + kind
+	}
+	return "LLM: " + reason
+}
+
+func truncate(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return s[:n] + "..."
+}
diff --git a/pkg/toxicflow/inference_test.go b/pkg/toxicflow/inference_test.go
new file mode 100644
index 0000000000..4b8f9144ca
--- /dev/null
+++ b/pkg/toxicflow/inference_test.go
@@ -0,0 +1,122 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package toxicflow
+
+import (
+	"context"
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestKeywordInferenceInfer(t *testing.T) {
+	t.Parallel()
+
+	ki := NewKeywordInference()
+	tests := []struct {
+		name       string
+		profile    ServerProfile
+		wantSource bool
+	}{
+		{"web tag", ServerProfile{Tags: []string{"web", "utility"}}, true},
+		{"fetch tool name", ServerProfile{Tools: []string{"fetch_page"}}, true},
+		{"description mentions web", ServerProfile{Description: "Fetches web pages and returns their content"}, true},
+		{"innocuous description", ServerProfile{Description: "Performs arithmetic calculations locally"}, false},
+		{"substring is not a match", ServerProfile{Description: "manages a cobweb of dependencies"}, false},
+		{"empty profile", ServerProfile{Name: "x"}, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			hints, err := ki.Infer(t.Context(), tt.profile)
+			require.NoError(t, err)
+			assert.Equal(t, tt.wantSource, hasHint(hints, RoleSource))
+			for _, h := range hints {
+				assert.Equal(t, ConfPossible, h.Confidence, "keyword hints must be possible")
+			}
+		})
+	}
+}
+
+type stubCompleter struct {
+	out string
+	err error
+}
+
+func (s stubCompleter) Complete(_ context.Context, _, _ string) (string, error) {
+	return s.out, s.err
+}
+
+func TestLLMInferenceInfer(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name       string
+		out        string
+		cErr       error
+		wantSource bool
+		wantData   bool
+		wantErr    bool
+	}{
+		{
+			name:       "both legs true",
+			out:        `{"untrusted_content":true,"private_data":true,"reason":"github account"}`,
+			wantSource: true, wantData: true,
+		},
+		{
+			name:       "only untrusted content",
+			out:        `{"untrusted_content":true,"private_data":false,"reason":"web search"}`,
+			wantSource: true,
+		},
+		{
+			name: "neither leg",
+			out:  `{"untrusted_content":false,"private_data":false,"reason":"local calculator"}`,
+		},
+		{
+			name:       "json wrapped in prose and fences",
+			out:        "Sure:\n```json\n{\"untrusted_content\":true,\"private_data\":false}\n```",
+			wantSource: true,
+		},
+		{
+			name:    "non-json response is an error",
+			out:     "I cannot help with that",
+			wantErr: true,
+		},
+		{
+			name:    "completer error propagates",
+			cErr:    errors.New("network down"),
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			li := NewLLMInference(stubCompleter{out: tt.out, err: tt.cErr})
+			hints, err := li.Infer(t.Context(), ServerProfile{Name: "x"})
+			if tt.wantErr {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			assert.Equal(t, tt.wantSource, hasHint(hints, RoleSource))
+			assert.Equal(t, tt.wantData, hasHint(hints, RoleData))
+			for _, h := range hints {
+				assert.Equal(t, ConfPossible, h.Confidence, "llm hints must be capped at possible")
+			}
+		})
+	}
+}
+
+func hasHint(hints []Hint, role Role) bool {
+	for _, h := range hints {
+		if h.Role == role {
+			return true
+		}
+	}
+	return false
+}
diff --git a/pkg/toxicflow/probe.go b/pkg/toxicflow/probe.go
new file mode 100644
index 0000000000..b418192c9e
--- /dev/null
+++ b/pkg/toxicflow/probe.go
@@ -0,0 +1,60 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+package toxicflow
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/mark3labs/mcp-go/mcp"
+
+	"github.com/stacklok/toolhive/pkg/authz/authorizers"
+	mcpclient "github.com/stacklok/toolhive/pkg/mcp/client"
+)
+
+// probeTimeout bounds a single live tools/list probe so a slow or wedged
+// server cannot hang the audit.
+const probeTimeout = 10 * time.Second
+
+// probeAnnotations connects to a running server through the ToolHive proxy and
+// reads its live tool annotations (notably openWorldHint), which are the
+// strongest available signal for the source role. It is best-effort: callers
+// treat any error as "no live data" rather than failing the audit.
+func probeAnnotations(ctx context.Context, serverURL, transport string) (map[string]*authorizers.ToolAnnotations, error) {
+	ctx, cancel := context.WithTimeout(ctx, probeTimeout)
+	defer cancel()
+
+	if transport == "" {
+		transport = mcpclient.TransportAuto
+	}
+	c, err := mcpclient.Connect(ctx, serverURL, transport, "toolhive-trifecta-audit")
+	if err != nil {
+		return nil, fmt.Errorf("connect to %s: %w", serverURL, err)
+	}
+	defer func() { _ = c.Close() }()
+
+	res, err := c.ListTools(ctx, mcp.ListToolsRequest{})
+	if err != nil {
+		return nil, fmt.Errorf("list tools: %w", err)
+	}
+	return mapAnnotations(res.Tools), nil
+}
+
+// mapAnnotations converts MCP tool annotations into the ToolHive representation
+// classifySource consumes. Kept separate so the field-by-field copy is unit
+// testable without a live server.
+func mapAnnotations(tools []mcp.Tool) map[string]*authorizers.ToolAnnotations {
+	annotations := make(map[string]*authorizers.ToolAnnotations, len(tools))
+	for i := range tools {
+		a := tools[i].Annotations
+		annotations[tools[i].Name] = &authorizers.ToolAnnotations{
+			ReadOnlyHint:    a.ReadOnlyHint,
+			DestructiveHint: a.DestructiveHint,
+			IdempotentHint:  a.IdempotentHint,
+			OpenWorldHint:   a.OpenWorldHint,
+		}
+	}
+	return annotations
+}
diff --git a/pkg/toxicflow/types.go b/pkg/toxicflow/types.go
new file mode 100644
index 0000000000..9013522ff0
--- /dev/null
+++ b/pkg/toxicflow/types.go
@@ -0,0 +1,147 @@
+// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+// Package toxicflow assesses a ToolHive group (a set of MCP servers) for
+// "lethal trifecta" risk: the co-location, within a single agent context, of
+// access to private data, exposure to untrusted content, and the ability to
+// exfiltrate. Following information-flow terminology, the hazard is modelled as
+// a toxic flow from an untrusted-content source, through a private-data holder,
+// to an exfiltration sink. Because every server in a group shares the one model
+// context, a flow exists whenever the group contains all three roles.
+//
+// The package separates a pure classifier (Classify) and a pure analyzer
+// (AnalyzeGroup) from the impure data collection (Collector) so the heuristics
+// can be unit-tested without a running ToolHive.
+//
+// ToolHive observes permission profiles, network egress, and registry metadata
+// — not the model's reasoning. It therefore classifies the data and sink roles
+// with reasonable confidence (they derive from the permission profile) but the
+// source role poorly: untrusted-content ingestion has no first-class signal, so
+// the source role defaults to "unknown" rather than "none".
+package toxicflow
+
+// Role is the part a server can play in a toxic flow. A single server may hold
+// more than one role (e.g. a web-fetch server is both a source and a sink).
+type Role string
+
+const (
+	// RoleData marks a server with access to private or sensitive data.
+	RoleData Role = "data"
+	// RoleSource marks a server that ingests potentially untrusted content,
+	// i.e. a prompt-injection delivery vector.
+	RoleSource Role = "source"
+	// RoleSink marks a server able to communicate externally (exfiltrate).
+	RoleSink Role = "sink"
+)
+
+// AllRoles is the canonical role order used for deterministic output.
+var AllRoles = []Role{RoleData, RoleSource, RoleSink}
+
+// Confidence expresses how strongly the evidence supports a role. The ordering
+// none < unknown < possible < likely is significant: "unknown" means ToolHive
+// lacked the inputs to rule the role in or out, whereas "none" is a confident
+// absence (we inspected the relevant inputs and found nothing).
+type Confidence string
+
+const (
+	// ConfNone is a confident absence — the inputs were available and clear.
+	ConfNone Confidence = "none"
+	// ConfUnknown means the role could not be assessed from available inputs.
+	ConfUnknown Confidence = "unknown"
+	// ConfPossible means weak/inferred evidence supports the role.
+	ConfPossible Confidence = "possible"
+	// ConfLikely means strong evidence supports the role.
+	ConfLikely Confidence = "likely"
+)
+
+// rank returns the ordinal of a confidence level for comparison.
+func (c Confidence) rank() int {
+	switch c {
+	case ConfLikely:
+		return 3
+	case ConfPossible:
+		return 2
+	case ConfUnknown:
+		return 1
+	case ConfNone:
+		return 0
+	default:
+		return 0
+	}
+}
+
+// atOrAbove reports whether c is as strong as, or stronger than, other.
+func (c Confidence) atOrAbove(other Confidence) bool {
+	return c.rank() >= other.rank()
+}
+
+// RoleFinding is the assessment of a single role for a single server.
+type RoleFinding struct {
+	Role       Role       `json:"role"`
+	Confidence Confidence `json:"confidence"`
+	// Evidence holds human-readable reasons supporting the confidence level.
+	Evidence []string `json:"evidence,omitempty"`
+	// Overridden is true when an explicit override set this finding.
+	Overridden bool `json:"overridden,omitempty"`
+}
+
+// ServerAssessment is the per-server result of classification.
+type ServerAssessment struct {
+	Name     string               `json:"name"`
+	Findings map[Role]RoleFinding `json:"findings"`
+}
+
+// Finding returns the finding for a role, defaulting to a ConfNone finding when
+// absent so callers never have to nil-check.
+func (s ServerAssessment) Finding(role Role) RoleFinding {
+	if f, ok := s.Findings[role]; ok {
+		return f
+	}
+	return RoleFinding{Role: role, Confidence: ConfNone}
+}
+
+// Verdict is the group-level conclusion.
+type Verdict string
+
+const (
+	// VerdictNone means at least one role is confidently absent, so no toxic
+	// flow can form in this group.
+	VerdictNone Verdict = "none"
+	// VerdictPossible means all three roles are present at possible-or-likely
+	// confidence, but not all reach "likely".
+	VerdictPossible Verdict = "possible"
+	// VerdictPresent means all three roles are present at "likely" confidence.
+	VerdictPresent Verdict = "present"
+	// VerdictIndeterminate means a role cannot be confirmed or ruled out
+	// (stuck at "unknown"), so the group may or may not form a toxic flow.
+	VerdictIndeterminate Verdict = "indeterminate"
+)
+
+// GroupAssessment is the result of analyzing a whole group.
+type GroupAssessment struct {
+	Group   string             `json:"group"`
+	Verdict Verdict            `json:"verdict"`
+	Servers []ServerAssessment `json:"servers"`
+	// Sources, DataHolders and Sinks list the servers contributing each role
+	// at possible-or-likely confidence — the attack path at group granularity.
+	Sources     []string `json:"sources,omitempty"`
+	DataHolders []string `json:"data_holders,omitempty"`
+	Sinks       []string `json:"sinks,omitempty"`
+	// Unclassified lists servers whose source role could not be assessed; these
+	// are why a verdict may be indeterminate.
+	Unclassified []string `json:"unclassified,omitempty"`
+	// SelfContainedFlow names servers that hold all three roles at
+	// possible-or-likely confidence on their own. Such a server forms a toxic
+	// flow by itself; the cheapest fix is to tighten its permission profile,
+	// not to split the group.
+	SelfContainedFlow []string `json:"self_contained_flow,omitempty"`
+}
+
+// Override forces a role's confidence for a named server, e.g. to correct a
+// mis-classified common server. A reason is required for auditability.
+type Override struct {
+	Server     string     `json:"server"`
+	Role       Role       `json:"role"`
+	Confidence Confidence `json:"confidence"`
+	Reason     string     `json:"reason"`
+}
diff --git a/skills/audit-trifecta/SKILL.md b/skills/audit-trifecta/SKILL.md
new file mode 100644
index 0000000000..80ec9fce4c
--- /dev/null
+++ b/skills/audit-trifecta/SKILL.md
@@ -0,0 +1,177 @@
+---
+name: audit-trifecta
+description: >-
+  Audit a ToolHive group of MCP servers for "lethal trifecta" / toxic-flow risk
+  — the co-location of private-data access, untrusted-content exposure, and an
+  exfiltration path in one agent context. Use when checking whether a group is
+  safe from prompt-injection data exfiltration, reviewing MCP server combinations
+  for security, interpreting `thv audit-trifecta` output, or writing overrides for
+  mis-classified servers. NOT for running/managing servers (use toolhive-cli-user)
+  or Kubernetes operator audits.
+license: Apache-2.0
+metadata:
+  version: 0.1.0
+---
+
+# Audit a ToolHive Group for Lethal-Trifecta Risk
+
+## What this checks
+
+The **lethal trifecta** is the co-location, in a single agent context, of three
+capabilities. ToolHive models them as roles in a **toxic flow**:
+
+- **data** — access to private/sensitive data (filesystem reads, secrets, authed backends)
+- **source** — exposure to untrusted content, a prompt-injection vector (web fetch, email, issues)
+- **sink** — the ability to exfiltrate (network egress, remote endpoints)
+
+A ToolHive **group** is a set of MCP servers that share one agent's context, so a
+toxic flow exists whenever the group contains all three roles: an injection via a
+*source* server can read a *data* server's secrets and send them out a *sink*.
+
+ToolHive assesses **data** and **sink** confidently (they come from the permission
+profile) but **source** poorly — untrusted-content exposure has no first-class
+signal. Expect `indeterminate` verdicts until source servers are classified or
+overridden.
+
+## Prerequisites
+
+- `thv` built with the `audit-trifecta` subcommand (currently hidden/experimental).
+- At least one group with workloads. List groups with `thv group list`.
+
+## Instructions
+
+1. **Run the audit** for the group in question:
+   ```bash
+   thv audit-trifecta <group>
+   thv audit-trifecta --all              # every group
+   thv audit-trifecta <group> --explain  # show the evidence behind each finding
+   ```
+
+   Improve accuracy two ways (both optional, both safe — they can only *raise*
+   suspicion, never produce a false "no toxic flow"):
+   ```bash
+   # Probe running (actively-proxied) servers for live tool annotations
+   thv audit-trifecta <group> --live
+
+   # Use an LLM to judge untrusted-content/private-data from descriptions
+   # (falls back to keyword search if model/base-url are unset)
+   thv audit-trifecta <group> \
+     --llm-model gpt-4o-mini --llm-base-url https://api.openai.com/v1
+   # API key via --llm-api-key or the THV_AUDIT_LLM_API_KEY env var
+   # (key is optional — omit it for keyless local models)
+
+   # Or reuse a running `thv llm` proxy (it injects auth; no key needed)
+   thv audit-trifecta <group> --llm-proxy --llm-model <model>
+   ```
+   The `--llm-base-url` must speak the OpenAI-compatible `/v1/chat/completions`
+   API. `--llm-proxy` points at the local `thv llm` reverse proxy (must be
+   running via `thv llm proxy`), which handles gateway auth for you.
+   The LLM only ever receives **public** metadata (name, description, tags, tool
+   names) — never permission profiles, secrets, or config. Point `--llm-base-url`
+   at a local model to keep traffic on-host.
+
+2. **Read the verdict** (see [Verdicts](#verdicts)). The text output shows a
+   per-server role table, then the contributing servers per role and a one-line
+   recommendation.
+
+3. **For `present` or `possible`** — there is a real or likely toxic flow.
+   Recommend remediation in this priority order (see [Remediation](#remediation)):
+   split the group → restrict egress on data-holders → drop the source.
+
+4. **For `indeterminate`** — data and sink exist but source is unknown for the
+   listed `Unclassified` servers. Investigate each: does it ingest content the
+   user does not control (web pages, emails, issue/PR bodies, arbitrary files)?
+   - If **yes**, it is a source → the verdict becomes `present`/`possible`; remediate.
+   - If **no**, write an override setting its `source` to `none` (see
+     [Overrides](#overrides)). The test for "no" is strict: the server ingests
+     **no content any external party can influence**. "First-party" is not
+     sufficient — an internal wiki, issue tracker, or mailbox is first-party
+     *infrastructure* but routinely carries attacker-influenced *content*
+     (a comment, a PR body, a forwarded email). Those are still sources.
+
+5. **For `none`** — at least one leg is confidently absent; the group is safe from
+   this class of attack. State that plainly.
+
+6. **Re-run with `--overrides`** after writing any overrides to confirm the new
+   verdict:
+   ```bash
+   thv audit-trifecta <group> --overrides ./trifecta-overrides.json
+   ```
+
+## Verdicts
+
+| Verdict | Meaning | Action |
+|---|---|---|
+| `present` | All three roles at high confidence | Remediate now |
+| `possible` | All three present, some at low confidence | Review contributors, likely remediate |
+| `indeterminate` | data + sink present, source unknown | Classify the unclassified servers or override |
+| `none` | A leg is confidently absent | Safe — no action |
+
+## Remediation
+
+Lead with the cheapest effective fix:
+
+1. **Split the group** — move the source server (or the data server) into its own
+   group so they no longer share one agent context. This breaks the flow
+   structurally and is usually the right answer.
+2. **Restrict egress** — tighten the sink server's permission profile to an
+   allowlist (`network.outbound.allow_host`) or `isolate_network`, so even a
+   successful injection cannot reach an exfiltration destination.
+3. **Drop the source** — remove the untrusted-content server from the group if it
+   is not essential.
+
+Do not recommend "just be careful" — the trifecta is an architecture problem; the
+fix is to remove one leg from the shared context.
+
+## Overrides
+
+Overrides correct mis-classified servers. They are top-priority evidence, so use
+them deliberately and always with a real `reason` (it is the audit trail).
+
+Write a JSON array to a file (e.g. `trifecta-overrides.json`):
+
+```json
+[
+  {
+    "server": "intranet-docs",
+    "role": "source",
+    "confidence": "none",
+    "reason": "reads only the first-party internal wiki, not untrusted content"
+  },
+  {
+    "server": "weather",
+    "role": "sink",
+    "confidence": "none",
+    "reason": "egress restricted to api.weather.gov, not an exfiltration channel"
+  }
+]
+```
+
+Fields:
+- `server` — workload name. An empty string applies the override to **every**
+  server in the group — a blunt instrument; prefer naming a specific server.
+- `role` — `data`, `source`, or `sink`.
+- `confidence` — `none`, `unknown`, `possible`, or `likely`.
+- `reason` — required justification; surfaced in `--explain` and the audit trail.
+
+Overrides are validated on load: an unknown `role`/`confidence` or a missing
+`reason` fails with an error rather than silently degrading a finding. Overridden
+values are marked with `*` in the default output.
+
+**Only override what you have verified.** Setting `source: none` on a server that
+*does* ingest untrusted content silently re-opens the trifecta. When in doubt,
+investigate rather than override.
+
+## Error Handling
+
+- **"specify a group name or use --all"** — pass a group name or `--all`.
+- **Empty / `(no servers in group)`** — the group has no workloads; check
+  `thv group list` and `thv list --group <group>`.
+- **Everything `indeterminate`** — expected when servers are not in the registry
+  and have no tool annotations; classify sources via overrides, or wait for live
+  annotation probing.
+
+## See Also
+
+- `toolhive-cli-user` skill — running, grouping, and configuring MCP servers.
+- `thv audit-trifecta --help` — full flag reference.