diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index a1a0d3f8d..56bb9e13c 100755 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -3,6 +3,7 @@ ## Release v0.114.0 ### New Features and Improvements +* Detect the `AI_AGENT` environment variable (Vercel `@vercel/detect-agent` convention) as a secondary fallback for the AI agent reported in the user agent, consulted only when the agents.md `AGENT` variable is unset or empty. An unrecognized `AGENT` or `AI_AGENT` value is now passed through as-is (sanitized to the user agent allowlist and capped at 64 characters) instead of being reported as `unknown`. Mirrors [databricks/databricks-sdk-go#1683](https://github.com/databricks/databricks-sdk-go/pull/1683). ### Breaking Changes diff --git a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java index 0b8e56f6a..22ba18aea 100644 --- a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java +++ b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java @@ -249,12 +249,21 @@ private static class KnownAgent { } } - // The agents.md standard env var. When set to a value we don't specifically - // recognize, detection falls back to "unknown". + // The agents.md standard env var. Consulted first when no explicit matcher + // fires. private static final String AGENT_ENV_VAR = "AGENT"; + // The Vercel @vercel/detect-agent convention env var. Consulted only as a + // secondary fallback when AGENT is unset or empty. + private static final String AI_AGENT_ENV_VAR = "AI_AGENT"; + + // Maximum length of a passed-through fallback agent value. Longer values are + // truncated to keep the user agent header bounded. + private static final int MAX_AGENT_FALLBACK_LEN = 64; + // Canonical list of known AI coding agents. - // Keep this list in sync with databricks-sdk-go and databricks-sdk-py. + // Keep this list, and the AGENT/AI_AGENT fallback handling in + // agentEnvFallback, in sync with databricks-sdk-go and databricks-sdk-py. // Agents are listed alphabetically by product name. private static List listKnownAgents() { return Arrays.asList( @@ -294,9 +303,8 @@ private static List listKnownAgents() { // stacked when one agent invokes another as a subagent (e.g. Claude Code // spawning a Cursor CLI subprocess), so the child process inherits env // vars from multiple layers. - // - Zero agents matched: if the agents.md standard AGENT env var is set to - // a known product name, return that product name. If it is set to any - // other non-empty value, return "unknown". Otherwise return "". + // - Zero agents matched: fall back to the generic AGENT / AI_AGENT env + // vars (see agentEnvFallback). // // Because explicit matchers win over AGENT, e.g. AGENT=cursor + CLAUDECODE=1 // yields "claude-code", and AGENT=goose + CLAUDECODE=1 also yields @@ -317,23 +325,29 @@ private static String lookupAgentProvider(Environment env) { if (matches.size() > 1) { return "multiple"; } - return agentEnvFallback(env, agents); + return agentEnvFallback(env); } - // agentEnvFallback honors the agents.md AGENT= standard. - // Returns the value if it matches a known product name, "unknown" if AGENT - // is set to any other non-empty value, and "" if AGENT is unset or empty. - private static String agentEnvFallback(Environment env, List agents) { + // agentEnvFallback honors the agents.md AGENT= standard, with the + // Vercel @vercel/detect-agent AI_AGENT convention as a secondary fallback. + // AGENT takes precedence when both are non-empty. + // + // The raw value is passed through (no coercion to "unknown"), but sanitized + // to satisfy the user agent allowlist and capped at MAX_AGENT_FALLBACK_LEN + // characters. Returns "" when both AGENT and AI_AGENT are unset or empty. + private static String agentEnvFallback(Environment env) { String v = env.get(AGENT_ENV_VAR); + if (v == null || v.isEmpty()) { + v = env.get(AI_AGENT_ENV_VAR); + } if (v == null || v.isEmpty()) { return ""; } - for (KnownAgent a : agents) { - if (a.product.equals(v)) { - return v; - } + v = sanitize(v); + if (v.length() > MAX_AGENT_FALLBACK_LEN) { + v = v.substring(0, MAX_AGENT_FALLBACK_LEN); } - return "unknown"; + return v; } // Thread-safe lazy initialization of agent provider detection diff --git a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java index 0631a52f0..2a2a5cfe7 100644 --- a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java +++ b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java @@ -309,7 +309,7 @@ public void testAgentProviderAgentEnvAmp() { @Test public void testAgentProviderAgentEnvCursor() { // AGENT=cursor with no cursor-specific env var. Falls through to the - // AGENT fallback and matches "cursor" as a known product name. + // AGENT fallback and is passed through unchanged. setupAgentEnv( new HashMap() { { @@ -362,23 +362,142 @@ public void testAgentProviderAmpBothMatchers() { } @Test - public void testAgentProviderAgentEnvUnknown() { + public void testAgentProviderAgentEnvUnrecognizedPassthrough() { + // An unrecognized AGENT value is passed through as-is (no longer coerced + // to "unknown"), after sanitization. setupAgentEnv( new HashMap() { { put("AGENT", "someweirdthing"); } }); - Assertions.assertTrue(UserAgent.asString().contains("agent/unknown")); + Assertions.assertTrue(UserAgent.asString().contains("agent/someweirdthing")); + Assertions.assertFalse(UserAgent.asString().contains("agent/unknown")); + } + + @Test + public void testAgentProviderAgentEnvVersionedPassthrough() { + // A versioned variant whose characters are all in the allowlist + // ([0-9A-Za-z_.+-]) is passed through unchanged. + setupAgentEnv( + new HashMap() { + { + put("AGENT", "my-tool-1.2.3"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/my-tool-1.2.3")); + } + + @Test + public void testAgentProviderAgentEnvSanitized() { + // Characters outside the user agent allowlist [0-9A-Za-z_.+-] become + // hyphens. + setupAgentEnv( + new HashMap() { + { + put("AGENT", "weird agent!@#name"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/weird-agent---name")); + } + + @Test + public void testAgentProviderAgentEnvTruncated() { + // Values longer than 64 characters are truncated to 64. + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < 100; i++) { + sb.append("a"); + } + String longValue = sb.toString(); + setupAgentEnv( + new HashMap() { + { + put("AGENT", longValue); + } + }); + StringBuilder expected = new StringBuilder("agent/"); + for (int i = 0; i < 64; i++) { + expected.append("a"); + } + String userAgent = UserAgent.asString(); + Assertions.assertTrue(userAgent.contains(expected.toString())); + // Must not contain a 65th 'a' after the prefix. + Assertions.assertFalse(userAgent.contains(expected.toString() + "a")); } @Test public void testAgentProviderAgentEnvEmpty() { - // AGENT="" should not trigger the unknown fallback. + // AGENT="" should not trigger the fallback. + setupAgentEnv( + new HashMap() { + { + put("AGENT", ""); + } + }); + Assertions.assertFalse(UserAgent.asString().contains("agent/")); + } + + @Test + public void testAgentProviderAiAgentFallback() { + // AI_AGENT is consulted when AGENT is unset. + setupAgentEnv( + new HashMap() { + { + put("AI_AGENT", "vercel-agent"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/vercel-agent")); + } + + @Test + public void testAgentProviderAgentWinsOverAiAgent() { + // AGENT takes precedence over AI_AGENT when both are non-empty. + setupAgentEnv( + new HashMap() { + { + put("AGENT", "primary"); + put("AI_AGENT", "secondary"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/primary")); + Assertions.assertFalse(UserAgent.asString().contains("agent/secondary")); + } + + @Test + public void testAgentProviderEmptyAgentFallsBackToAiAgent() { + // AGENT="" falls back to AI_AGENT. + setupAgentEnv( + new HashMap() { + { + put("AGENT", ""); + put("AI_AGENT", "secondary"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/secondary")); + } + + @Test + public void testAgentProviderExplicitMatcherWinsOverAiAgent() { + // An explicit matcher wins over AI_AGENT. + setupAgentEnv( + new HashMap() { + { + put("AI_AGENT", "vercel-agent"); + put("CLAUDECODE", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/claude-code")); + Assertions.assertFalse(UserAgent.asString().contains("agent/vercel-agent")); + } + + @Test + public void testAgentProviderBothEmptyReturnsNone() { + // Both AGENT and AI_AGENT empty yields no agent segment. setupAgentEnv( new HashMap() { { put("AGENT", ""); + put("AI_AGENT", ""); } }); Assertions.assertFalse(UserAgent.asString().contains("agent/"));