Skip to content

Commit baa9ae1

Browse files
authored
Update and fix config (#69)
1 parent f49fd99 commit baa9ae1

4 files changed

Lines changed: 238 additions & 13 deletions

File tree

aieng-eval-agents/aieng/agent_evals/configs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ class Configs(BaseSettings):
9696
validation_alias=AliasChoices("OPENAI_API_KEY", "GEMINI_API_KEY", "GOOGLE_API_KEY"),
9797
description="API key for OpenAI-compatible API (accepts OPENAI_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY).",
9898
)
99+
google_api_key: SecretStr = Field(
100+
validation_alias=AliasChoices("GEMINI_API_KEY", "GOOGLE_API_KEY"),
101+
description="API key for Google/Gemini API (accepts GEMINI_API_KEY or GOOGLE_API_KEY).",
102+
)
99103
default_planner_model: str = Field(
100104
default="gemini-2.5-pro",
101105
description="Model name for planning/complex reasoning tasks.",

aieng-eval-agents/aieng/agent_evals/tools/search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ async def google_search(query: str, model: str | None = None) -> dict[str, Any]:
303303
model = config.default_worker_model
304304

305305
return await _google_search_async(
306-
query, model=model, temperature=config.default_temperature, api_key=config.openai_api_key.get_secret_value()
306+
query, model=model, temperature=config.default_temperature, api_key=config.google_api_key.get_secret_value()
307307
)
308308

309309

@@ -369,7 +369,7 @@ async def google_search(query: str) -> dict[str, Any]:
369369
- **error** (str): Error message (error case only)
370370
"""
371371
return await _google_search_async(
372-
query, model=model, temperature=temperature, api_key=config.openai_api_key.get_secret_value()
372+
query, model=model, temperature=temperature, api_key=config.google_api_key.get_secret_value()
373373
)
374374

375375
return FunctionTool(func=google_search)
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
"""Tests for Configs and DatabaseConfig configuration models."""
2+
3+
import os
4+
5+
import pytest
6+
from aieng.agent_evals.configs import Configs, DatabaseConfig
7+
from pydantic import SecretStr, ValidationError
8+
9+
10+
def make_configs() -> Configs:
11+
"""Create Configs without loading any .env file.
12+
13+
Wraps ``Configs(_env_file=None)`` to avoid a Pyright false-positive:
14+
pydantic-settings accepts ``_env_file`` as a special init override but it
15+
is absent from the generated type stubs.
16+
"""
17+
return Configs(_env_file=None) # type: ignore[call-arg]
18+
19+
20+
class TestDatabaseConfig:
21+
"""Tests for DatabaseConfig and its build_uri() method."""
22+
23+
def test_build_uri_sqlite(self):
24+
"""SQLite URI with only driver and database is valid."""
25+
config = DatabaseConfig(driver="sqlite", database="/tmp/test.db")
26+
assert config.build_uri() == "sqlite:////tmp/test.db"
27+
28+
def test_build_uri_postgresql_with_credentials(self):
29+
"""PostgreSQL URI includes host, port, username, and password."""
30+
config = DatabaseConfig(
31+
driver="postgresql",
32+
username="user",
33+
password=SecretStr("secret"),
34+
host="localhost",
35+
port=5432,
36+
database="mydb",
37+
)
38+
assert config.build_uri() == "postgresql://user:secret@localhost:5432/mydb"
39+
40+
def test_build_uri_includes_query_params(self):
41+
"""Query parameters appear in the rendered URI."""
42+
config = DatabaseConfig(driver="sqlite", database="/tmp/test.db", query={"mode": "ro"})
43+
assert "mode=ro" in config.build_uri()
44+
45+
def test_build_uri_escapes_special_password_chars(self):
46+
"""Special characters in the password are URL-encoded, not exposed verbatim."""
47+
config = DatabaseConfig(
48+
driver="postgresql",
49+
username="user",
50+
password=SecretStr("p@ss/word"),
51+
host="localhost",
52+
port=5432,
53+
database="db",
54+
)
55+
uri = config.build_uri()
56+
assert "p@ss/word" not in uri # must be percent-encoded
57+
assert "user" in uri
58+
59+
def test_optional_fields_default_to_none(self):
60+
"""username, host, password, port, and database all default to None."""
61+
config = DatabaseConfig(driver="sqlite")
62+
assert config.username is None
63+
assert config.host is None
64+
assert config.password is None
65+
assert config.port is None
66+
assert config.database is None
67+
68+
def test_query_defaults_to_empty_dict(self):
69+
"""Query field defaults to an empty dict."""
70+
assert DatabaseConfig(driver="sqlite").query == {}
71+
72+
73+
class TestConfigsDefaults:
74+
"""Tests for default field values in Configs."""
75+
76+
@pytest.fixture(autouse=True)
77+
def _required_env(self, monkeypatch):
78+
"""Run with a fully isolated environment containing only required fields."""
79+
monkeypatch.setattr(os, "environ", {"OPENAI_API_KEY": "test-openai-key", "GEMINI_API_KEY": "test-google-key"})
80+
81+
def test_default_worker_model(self):
82+
"""default_worker_model is gemini-2.5-flash."""
83+
assert make_configs().default_worker_model == "gemini-2.5-flash"
84+
85+
def test_default_planner_model(self):
86+
"""default_planner_model is gemini-2.5-pro."""
87+
assert make_configs().default_planner_model == "gemini-2.5-pro"
88+
89+
def test_default_evaluator_model(self):
90+
"""default_evaluator_model is gemini-2.5-pro."""
91+
assert make_configs().default_evaluator_model == "gemini-2.5-pro"
92+
93+
def test_default_temperature(self):
94+
"""default_temperature is 1.0."""
95+
assert make_configs().default_temperature == 1.0
96+
97+
def test_default_evaluator_temperature(self):
98+
"""default_evaluator_temperature is 0.0."""
99+
assert make_configs().default_evaluator_temperature == 0.0
100+
101+
def test_default_openai_base_url(self):
102+
"""openai_base_url defaults to the Gemini googleapis endpoint."""
103+
assert "googleapis.com" in make_configs().openai_base_url
104+
105+
def test_optional_fields_default_none(self):
106+
"""All optional service fields default to None."""
107+
config = make_configs()
108+
assert config.aml_db is None
109+
assert config.report_generation_db is None
110+
assert config.langfuse_public_key is None
111+
assert config.langfuse_secret_key is None
112+
assert config.e2b_api_key is None
113+
114+
115+
class TestGoogleApiKey:
116+
"""Tests for the google_api_key field and its env var aliases."""
117+
118+
@pytest.fixture(autouse=True)
119+
def _required_env(self, monkeypatch):
120+
"""Run with a clean environment: only OPENAI_API_KEY set, no Google keys."""
121+
monkeypatch.setattr(os, "environ", {"OPENAI_API_KEY": "test-openai-key"})
122+
123+
def test_loaded_from_gemini_api_key(self, monkeypatch):
124+
"""google_api_key is populated from GEMINI_API_KEY."""
125+
monkeypatch.setenv("GEMINI_API_KEY", "my-gemini-key")
126+
assert make_configs().google_api_key.get_secret_value() == "my-gemini-key"
127+
128+
def test_loaded_from_google_api_key(self, monkeypatch):
129+
"""google_api_key is populated from GOOGLE_API_KEY."""
130+
monkeypatch.setenv("GOOGLE_API_KEY", "my-google-key")
131+
assert make_configs().google_api_key.get_secret_value() == "my-google-key"
132+
133+
def test_gemini_api_key_takes_priority_over_google_api_key(self, monkeypatch):
134+
"""GEMINI_API_KEY takes priority over GOOGLE_API_KEY when both are set."""
135+
monkeypatch.setenv("GEMINI_API_KEY", "gemini-key")
136+
monkeypatch.setenv("GOOGLE_API_KEY", "google-key")
137+
config = make_configs()
138+
assert config.google_api_key.get_secret_value() == "gemini-key"
139+
140+
def test_secret_value_not_exposed_in_repr(self, monkeypatch):
141+
"""SecretStr does not leak the raw key in repr or str."""
142+
monkeypatch.setenv("GEMINI_API_KEY", "super-secret-key")
143+
key = make_configs().google_api_key
144+
assert "super-secret-key" not in repr(key)
145+
assert "super-secret-key" not in str(key)
146+
147+
148+
class TestOpenAiApiKeyAliases:
149+
"""Tests for openai_api_key env var aliases."""
150+
151+
@pytest.fixture(autouse=True)
152+
def _clear_google_env(self, monkeypatch):
153+
monkeypatch.setattr(os, "environ", {})
154+
155+
def test_loaded_from_openai_api_key(self, monkeypatch):
156+
"""openai_api_key is loaded from OPENAI_API_KEY when it is set."""
157+
monkeypatch.setenv("OPENAI_API_KEY", "my-openai-key")
158+
monkeypatch.setenv("GEMINI_API_KEY", "test-google-key")
159+
config = make_configs()
160+
assert config.openai_api_key.get_secret_value() == "my-openai-key"
161+
162+
def test_loaded_from_gemini_api_key(self, monkeypatch):
163+
"""openai_api_key falls back to GEMINI_API_KEY when OPENAI_API_KEY is absent."""
164+
monkeypatch.setenv("GEMINI_API_KEY", "my-gemini-key")
165+
config = make_configs()
166+
assert config.openai_api_key.get_secret_value() == "my-gemini-key"
167+
168+
def test_loaded_from_google_api_key(self, monkeypatch):
169+
"""openai_api_key falls back to GOOGLE_API_KEY as the last alias."""
170+
monkeypatch.setenv("GOOGLE_API_KEY", "my-google-key")
171+
config = make_configs()
172+
assert config.openai_api_key.get_secret_value() == "my-google-key"
173+
174+
def test_missing_raises_validation_error(self):
175+
"""Configs raises ValidationError when no API key env var is set."""
176+
with pytest.raises(ValidationError):
177+
make_configs()
178+
179+
180+
class TestConfigsValidators:
181+
"""Tests for Configs field validators."""
182+
183+
@pytest.fixture(autouse=True)
184+
def _required_env(self, monkeypatch):
185+
monkeypatch.setattr(os, "environ", {"OPENAI_API_KEY": "test-openai-key", "GEMINI_API_KEY": "test-google-key"})
186+
187+
def test_langfuse_secret_key_valid(self, monkeypatch):
188+
"""langfuse_secret_key accepts values starting with 'sk-lf-'."""
189+
monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk-lf-valid-secret")
190+
config = make_configs()
191+
assert config.langfuse_secret_key is not None
192+
assert config.langfuse_secret_key.get_secret_value() == "sk-lf-valid-secret"
193+
194+
def test_langfuse_secret_key_invalid_prefix_raises(self, monkeypatch):
195+
"""langfuse_secret_key rejects values not starting with 'sk-lf-'."""
196+
monkeypatch.setenv("LANGFUSE_SECRET_KEY", "invalid-secret")
197+
with pytest.raises(ValidationError, match="sk-lf-"):
198+
make_configs()
199+
200+
def test_langfuse_secret_key_none_is_allowed(self):
201+
"""langfuse_secret_key accepts None (key not configured)."""
202+
assert make_configs().langfuse_secret_key is None
203+
204+
def test_e2b_api_key_valid(self, monkeypatch):
205+
"""e2b_api_key accepts values starting with 'e2b_'."""
206+
monkeypatch.setenv("E2B_API_KEY", "e2b_valid_key")
207+
config = make_configs()
208+
assert config.e2b_api_key is not None
209+
assert config.e2b_api_key.get_secret_value() == "e2b_valid_key"
210+
211+
def test_e2b_api_key_invalid_prefix_raises(self, monkeypatch):
212+
"""e2b_api_key rejects values not starting with 'e2b_'."""
213+
monkeypatch.setenv("E2B_API_KEY", "invalid_key")
214+
with pytest.raises(ValidationError, match="e2b_"):
215+
make_configs()
216+
217+
def test_e2b_api_key_none_is_allowed(self):
218+
"""e2b_api_key accepts None (key not configured)."""
219+
assert make_configs().e2b_api_key is None
220+
221+
def test_langfuse_public_key_valid_pattern(self, monkeypatch):
222+
"""langfuse_public_key accepts values matching 'pk-lf-*'."""
223+
monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "pk-lf-abc123")
224+
config = make_configs()
225+
assert config.langfuse_public_key == "pk-lf-abc123"
226+
227+
def test_langfuse_public_key_invalid_pattern_raises(self, monkeypatch):
228+
"""langfuse_public_key rejects values not matching 'pk-lf-*'."""
229+
monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "invalid-key")
230+
with pytest.raises(ValidationError):
231+
make_configs()

implementations/basics/01_why_evals.ipynb

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -122,17 +122,7 @@
122122
"cell_type": "markdown",
123123
"id": "summary",
124124
"metadata": {},
125-
"source": [
126-
"## Summary\n",
127-
"\n",
128-
"1. **Compounding errors** — 10-step agents need task-level evaluation, not just per-answer accuracy\n",
129-
"2. **Four dimensions** — outcome, tool quality, reasoning, cost-performance\n",
130-
"3. **Three grader types** — code-based (fast, deterministic), model-based (flexible, nuanced), human (gold-standard, for calibration)\n",
131-
"4. **Capability benchmarks** — hard problems as hills to climb; progress is measured, not estimated\n",
132-
"5. **Closed loop** — failure patterns map to prompt, tool, behavior, and reasoning changes\n",
133-
"\n",
134-
"**Next:** In Notebook 02, we'll use the shared evaluation harness to run these ideas in code."
135-
]
125+
"source": "## Summary\n\n1. **Compounding errors** — 10-step agents need task-level evaluation, not just per-answer accuracy\n2. **Four dimensions** — outcome, tool quality, reasoning, cost-performance\n3. **Three grader types** — code-based (fast, deterministic), model-based (flexible, nuanced), human (gold-standard, for calibration)\n4. **Capability benchmarks** — hard problems as hills to climb; progress is measured, not estimated\n5. **Closed loop** — failure patterns map to prompt, tool, behavior, and reasoning changes\n\n**Next:** In Notebook 02, we'll use the shared evaluation harness to run these ideas in code.\n\n---\n\n**Further Reading:** [Evaluating Agents — Google ADK Documentation](https://google.github.io/adk-docs/evaluate/) — official guide covering built-in evaluation tools for ADK-based agents, including test-file formats, metrics, and running evaluations with the CLI."
136126
}
137127
],
138128
"metadata": {

0 commit comments

Comments
 (0)