Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cmd/ans-ra/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ func run(cfgPath string) error {
// DNS verifier.
var dnsVerifier = selectDNSVerifier(cfg)

logger.Info().
Str("tlPublicBaseURL", cfg.TLClient.PublicBaseURL).
Str("tlBaseURL", cfg.TLClient.BaseURL).
Msg("transparency log endpoints configured")

// Auth.
authProvider, err := buildAuth(ctx, cfg)
if err != nil {
Expand All @@ -166,7 +171,8 @@ func run(cfgPath string) error {
KeyID: signerKeyID,
RaID: cfg.Signer.RaID,
}).WithDNSVerifier(dnsVerifier).
WithServerCertificateAuthority(serverCA)
WithServerCertificateAuthority(serverCA).
WithTLPublicBaseURL(cfg.TLClient.PublicBaseURL)
Copy link
Copy Markdown

@kperry-godaddy kperry-godaddy May 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No startup log of effective PublicBaseURL.

The TL outbox-worker startup log at lines 276-279 emits tlBaseURL (internal) but not PublicBaseURL. At 3am when oncall is debugging "badges point to the wrong place," they cannot tell pre-PR-23 from post-PR-23-but-misconfigured without SSH-ing the box and reading YAML.

Suggested addition near config load:

if cfg.TLClient.PublicBaseURL == "" {
    logger.Warn().Msg("tl-client.public-base-url is unset; _ans-badge DNS records will fall back to agent URL")
} else {
    logger.Info().
        Str("tlPublicBaseURL", cfg.TLClient.PublicBaseURL).
        Str("tlBaseURL", cfg.TLClient.BaseURL).
        Msg("transparency log endpoints configured")
}

Collapses MTTR for badge-verification incidents from 15-30 minutes (config inspection) to a 30-second log scan.


// HTTP.
r := chi.NewRouter()
Expand Down
3 changes: 3 additions & 0 deletions config/ra-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ store:

tl-client:
base-url: "http://ans-tl:18081"
# public-base-url: the externally-reachable TL URL used in _ans-badge
# DNS TXT records. Required — must be an https:// URL.
public-base-url: "https://tl.example.org"
api-key: "tl-internal-key"
timeout: 10s

Expand Down
3 changes: 3 additions & 0 deletions config/ra-local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ store:

tl-client:
base-url: "http://localhost:18081"
# public-base-url: the externally-reachable TL URL used in _ans-badge
# DNS TXT records. Required — must be an https:// URL.
public-base-url: "https://localhost:18081"
api-key: "tl-internal-key"
timeout: 10s

Expand Down
34 changes: 34 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package config
import (
"errors"
"fmt"
"net/url"
"strings"
"time"

Expand Down Expand Up @@ -139,6 +140,10 @@ type StoreSQLite struct {
type TLClient struct {
// BaseURL is the TL's listen URL, e.g. "http://localhost:18081".
BaseURL string `koanf:"base-url"`
// PublicBaseURL is the TL's externally-reachable URL used in
// _ans-badge DNS TXT records. Required — must be an https:// URL
// with no query string, fragment, or userinfo.
PublicBaseURL string `koanf:"public-base-url"`
Copy link
Copy Markdown

@kperry-godaddy kperry-godaddy May 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doc claims defaulting that doesn't exist in code.

The comment above this field says "When empty, defaults to BaseURL," but RAConfig.Validate() (lines 309-352) has no such defaulting. Operators who pull this PR without setting public-base-url in YAML keep emitting _ans-badge records pointing at the agent's own URL — the exact bug this PR is fixing.

Two clean options:

  • Required + validated (recommended): in Validate(), return an error when PublicBaseURL is empty, and reject anything that isn't https:// with no userinfo/query/fragment.
  • Default to BaseURL: assign c.TLClient.PublicBaseURL = c.TLClient.BaseURL when empty, so behavior matches the doc.

Either way, the field doc and the validator need to agree. The PR description's "Falls back to the agent endpoint URL when public-base-url is unset, preserving backwards compatibility" line should also be revised — the prior behavior was a bug, not a stable contract.

// APIKey is the bearer token the TL's static auth accepts.
APIKey string `koanf:"api-key"`
// Timeout is the per-request HTTP timeout.
Expand Down Expand Up @@ -342,6 +347,9 @@ func (c *RAConfig) Validate() error {
if c.TLClient.BaseURL == "" {
return errors.New("tl-client.base-url is required")
}
if err := validatePublicBaseURL(c.TLClient.PublicBaseURL); err != nil {
return err
}
if c.TLClient.Timeout <= 0 {
c.TLClient.Timeout = 10 * time.Second
}
Expand Down Expand Up @@ -412,3 +420,29 @@ func validateStore(s *Store) error {
}
return nil
}

func validatePublicBaseURL(raw string) error {
if raw == "" {
return errors.New("tl-client.public-base-url is required")
}
u, err := url.Parse(raw)
if err != nil {
return fmt.Errorf("tl-client.public-base-url: %w", err)
}
if u.Scheme != "https" {
return fmt.Errorf("tl-client.public-base-url must use https scheme, got %q", u.Scheme)
}
if u.Host == "" {
return errors.New("tl-client.public-base-url: missing host")
}
if u.User != nil {
return errors.New("tl-client.public-base-url: userinfo not allowed")
}
if u.RawQuery != "" {
return errors.New("tl-client.public-base-url: query string not allowed")
}
if u.Fragment != "" {
return errors.New("tl-client.public-base-url: fragment not allowed")
}
return nil
}
15 changes: 8 additions & 7 deletions internal/config/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@ func defaultRAConfig() *RAConfig {
SQLite: &StoreSQLite{Path: "./data/ra/ans.db"},
},
TLClient: TLClient{
BaseURL: "http://localhost:18081",
APIKey: "",
Timeout: 10 * time.Second,
BatchSize: 10,
PollInterval: 2 * time.Second,
MaxBackoff: 5 * time.Minute,
Disabled: false,
BaseURL: "http://localhost:18081",
PublicBaseURL: "https://localhost:18081",
APIKey: "",
Timeout: 10 * time.Second,
BatchSize: 10,
PollInterval: 2 * time.Second,
MaxBackoff: 5 * time.Minute,
Disabled: false,
},
Signer: SignerCfg{
KeyID: "ans-ra-signer",
Expand Down
20 changes: 17 additions & 3 deletions internal/domain/dnsrecords.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package domain

import "fmt"
import (
"fmt"
"net/url"
)

// DNSRecordType represents a DNS record type.
type DNSRecordType string
Expand Down Expand Up @@ -34,7 +37,12 @@ type ExpectedDNSRecord struct {
// ComputeRequiredDNSRecords generates the DNS records an operator must create
// for a given agent registration. The RA does not create these records — the
// operator manages their own DNS. The RA only verifies they exist.
func ComputeRequiredDNSRecords(reg *AgentRegistration) []ExpectedDNSRecord {
//
// tlPublicBaseURL is the externally-reachable Transparency Log URL used in
// the _ans-badge record (e.g. "https://tl.example.org"). When non-empty the
// badge url= field points to the TL badge endpoint for this agent; when
// empty it falls back to the agent's own endpoint URL.
func ComputeRequiredDNSRecords(reg *AgentRegistration, tlPublicBaseURL string) []ExpectedDNSRecord {
fqdn := reg.FQDN()
// Version is emitted as a bare semver string ("1.2.0"). The
// `v`-prefixed form only appears inside the ANS name's hostname
Expand Down Expand Up @@ -63,8 +71,14 @@ func ComputeRequiredDNSRecords(reg *AgentRegistration) []ExpectedDNSRecord {
// publishing _ans without _ans-badge would advertise an agent
// that fails the public discovery handshake.
if len(reg.Endpoints) > 0 {
badgeURL := reg.Endpoints[0].AgentURL
if tlPublicBaseURL != "" && reg.AgentID != "" {
// tlPublicBaseURL is validated at config load (https, no
// query/fragment/userinfo), so JoinPath cannot fail here.
badgeURL, _ = url.JoinPath(tlPublicBaseURL, "v1", "agents", reg.AgentID)
}
Comment on lines +74 to +79
badgeValue := fmt.Sprintf("v=ans-badge1; version=%s; url=%s",
version, reg.Endpoints[0].AgentURL)
version, badgeURL)
records = append(records, ExpectedDNSRecord{
Name: fmt.Sprintf("_ans-badge.%s", fqdn),
Type: DNSRecordTXT,
Expand Down
46 changes: 43 additions & 3 deletions internal/domain/dnsrecords_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func TestComputeRequiredDNSRecords_WithoutCert(t *testing.T) {
},
}

records := ComputeRequiredDNSRecords(reg)
records := ComputeRequiredDNSRecords(reg, "")
require.NotEmpty(t, records)

// 2 endpoints → 2 _ans TXT records + 1 badge record.
Expand Down Expand Up @@ -63,7 +63,7 @@ func TestComputeRequiredDNSRecords_WithCert(t *testing.T) {
ServerCert: &ByocServerCertificate{Fingerprint: "abcdef"},
}

records := ComputeRequiredDNSRecords(reg)
records := ComputeRequiredDNSRecords(reg, "")

var tlsaFound bool
for _, r := range records {
Expand All @@ -87,10 +87,50 @@ func TestComputeRequiredDNSRecords_WithCert(t *testing.T) {
func TestComputeRequiredDNSRecords_NoEndpoints(t *testing.T) {
ansName, _ := NewAnsName(mustSemVer(1, 0, 0), "agent.example.com")
reg := &AgentRegistration{AnsName: ansName}
records := ComputeRequiredDNSRecords(reg)
records := ComputeRequiredDNSRecords(reg, "")
assert.Empty(t, records)
}

func TestComputeRequiredDNSRecords_BadgeURLPointsToTL(t *testing.T) {
ansName, _ := NewAnsName(mustSemVer(1, 0, 0), "agent.example.com")
reg := &AgentRegistration{
AgentID: "test-agent-id",
AnsName: ansName,
Endpoints: []AgentEndpoint{
{Protocol: ProtocolMCP, AgentURL: "https://agent.example.com/mcp"},
},
}

records := ComputeRequiredDNSRecords(reg, "https://tl.example.org")
for _, r := range records {
if r.Purpose == PurposeBadge {
assert.Contains(t, r.Value, "url=https://tl.example.org/v1/agents/test-agent-id")
assert.NotContains(t, r.Value, "agent.example.com/mcp")
return
}
}
t.Fatal("no badge record found")
}

func TestComputeRequiredDNSRecords_BadgeFallbackWithoutTLURL(t *testing.T) {
ansName, _ := NewAnsName(mustSemVer(1, 0, 0), "agent.example.com")
reg := &AgentRegistration{
AnsName: ansName,
Endpoints: []AgentEndpoint{
{Protocol: ProtocolMCP, AgentURL: "https://agent.example.com/mcp"},
},
}

records := ComputeRequiredDNSRecords(reg, "")
for _, r := range records {
if r.Purpose == PurposeBadge {
assert.Contains(t, r.Value, "url=https://agent.example.com/mcp")
return
}
}
t.Fatal("no badge record found")
}

func TestProtocolToANSValue(t *testing.T) {
assert.Equal(t, "a2a", protocolToANSValue(ProtocolA2A))
assert.Equal(t, "mcp", protocolToANSValue(ProtocolMCP))
Expand Down
8 changes: 4 additions & 4 deletions internal/ra/handler/dto.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ type agentDetails struct {
Links []linkDTO `json:"links"`
}

func mapAgentDetails(res *service.DetailResult, r *http.Request) agentDetails {
func mapAgentDetails(res *service.DetailResult, r *http.Request, tlPublicBaseURL string) agentDetails {
reg := res.Registration
// Stamp endpoints onto the aggregate so the pending-block builder's
// call to domain.ComputeRequiredDNSRecords produces the full record
Expand All @@ -104,7 +104,7 @@ func mapAgentDetails(res *service.DetailResult, r *http.Request) agentDetails {
AgentStatus: string(reg.Status),
Endpoints: mapEndpointsToDTO(res.Endpoints),
RegistrationTimestamp: reg.Details.RegistrationTimestamp.Format("2006-01-02T15:04:05Z07:00"),
RegistrationPending: buildRegistrationPendingBlock(reg, r),
RegistrationPending: buildRegistrationPendingBlock(reg, r, tlPublicBaseURL),
Links: []linkDTO{
{Rel: "self", Href: agentURL(r, reg.AgentID)},
},
Expand All @@ -119,7 +119,7 @@ func mapAgentDetails(res *service.DetailResult, r *http.Request) agentDetails {
// buildV1RegistrationPending. Agents still driving validation/DNS
// expose the outstanding challenges + DNS records needed to
// progress; terminal states omit the block.
func buildRegistrationPendingBlock(reg *domain.AgentRegistration, r *http.Request) *registrationPendingResponse {
func buildRegistrationPendingBlock(reg *domain.AgentRegistration, r *http.Request, tlPublicBaseURL string) *registrationPendingResponse {
switch reg.Status {
case domain.StatusPendingValidation:
base := schemeOf(r) + "://" + r.Host + "/v2/ans/agents/" + reg.AgentID
Expand Down Expand Up @@ -150,7 +150,7 @@ func buildRegistrationPendingBlock(reg *domain.AgentRegistration, r *http.Reques
}
case domain.StatusPendingDNS:
base := schemeOf(r) + "://" + r.Host + "/v2/ans/agents/" + reg.AgentID
expected := domain.ComputeRequiredDNSRecords(reg)
expected := domain.ComputeRequiredDNSRecords(reg, tlPublicBaseURL)
dnsRecords := make([]dnsRecordDTO, 0, len(expected))
for _, rec := range expected {
dnsRecords = append(dnsRecords, dnsRecordDTO{
Expand Down
2 changes: 1 addition & 1 deletion internal/ra/handler/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (h *LifecycleHandler) Detail(w http.ResponseWriter, r *http.Request) {
WriteError(w, err)
return
}
WriteJSON(w, http.StatusOK, mapAgentDetails(res, r))
WriteJSON(w, http.StatusOK, mapAgentDetails(res, r, h.svc.TLPublicBaseURL()))
}

// ----- GET /v2/ans/agents/{agentId}/certificates/identity -----
Expand Down
10 changes: 5 additions & 5 deletions internal/ra/handler/v1registration.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ func (h *V1RegistrationHandler) Detail(w http.ResponseWriter, r *http.Request) {
WriteError(w, err)
return
}
WriteJSON(w, http.StatusOK, mapV1AgentDetail(res.Registration, res.Endpoints, r))
WriteJSON(w, http.StatusOK, mapV1AgentDetail(res.Registration, res.Endpoints, r, h.svc.TLPublicBaseURL()))
}

// ----- DTO mapping helpers -----
Expand Down Expand Up @@ -376,7 +376,7 @@ func rfc3339Zero(t time.Time) string {
// Endpoints arrive as a separate slice because the domain aggregate
// stores them in their own repository; the service layer gathers
// both and hands them in.
func mapV1AgentDetail(reg *domain.AgentRegistration, endpoints []domain.AgentEndpoint, r *http.Request) *v1AgentDetailResponse {
func mapV1AgentDetail(reg *domain.AgentRegistration, endpoints []domain.AgentEndpoint, r *http.Request, tlPublicBaseURL string) *v1AgentDetailResponse {
eps := make([]v1EndpointDTO, len(endpoints))
for i, e := range endpoints {
fns := make([]v1FunctionDTO, len(e.Functions))
Expand Down Expand Up @@ -421,7 +421,7 @@ func mapV1AgentDetail(reg *domain.AgentRegistration, endpoints []domain.AgentEnd
Endpoints: eps,
RegistrationTimestamp: reg.Details.RegistrationTimestamp.UTC().Format("2006-01-02T15:04:05Z"),
LastRenewalTimestamp: lastRenewal,
RegistrationPending: buildV1RegistrationPending(reg, r),
RegistrationPending: buildV1RegistrationPending(reg, r, tlPublicBaseURL),
Links: []v1LinkDTO{
{Rel: "self", Href: base},
},
Expand All @@ -444,7 +444,7 @@ func mapV1AgentDetail(reg *domain.AgentRegistration, endpoints []domain.AgentEnd
// publish (DISCOVERY/TRUST/BADGE/
// CERTIFICATE_BINDING), VERIFY_DNS nextStep,
// expiresAt scaled from the challenge deadline.
func buildV1RegistrationPending(reg *domain.AgentRegistration, r *http.Request) *v1RegistrationPendingResponse {
func buildV1RegistrationPending(reg *domain.AgentRegistration, r *http.Request, tlPublicBaseURL string) *v1RegistrationPendingResponse {
switch reg.Status {
case domain.StatusPendingValidation:
base := schemeOf(r) + "://" + r.Host + "/v1/agents/" + reg.AgentID
Expand Down Expand Up @@ -474,7 +474,7 @@ func buildV1RegistrationPending(reg *domain.AgentRegistration, r *http.Request)
}
case domain.StatusPendingDNS:
base := schemeOf(r) + "://" + r.Host + "/v1/agents/" + reg.AgentID
expected := domain.ComputeRequiredDNSRecords(reg)
expected := domain.ComputeRequiredDNSRecords(reg, tlPublicBaseURL)
dnsRecords := make([]v1DNSRecordDTO, 0, len(expected))
for _, rec := range expected {
dnsRecords = append(dnsRecords, v1DNSRecordDTO{
Expand Down
6 changes: 3 additions & 3 deletions internal/ra/service/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ func (s *RegistrationService) VerifyDNS(ctx context.Context, agentID string, in
reg.ServerCert = byoc
}

expected := domain.ComputeRequiredDNSRecords(reg)
expected := domain.ComputeRequiredDNSRecords(reg, s.tlPublicBaseURL)

mismatches, perRecord, err := s.verifyDNSRecords(ctx, reg.FQDN(), expected)
if err != nil {
Expand Down Expand Up @@ -798,7 +798,7 @@ func (s *RegistrationService) Revoke(ctx context.Context, agentID string, in Rev
return &RevokeResult{
Registration: reg,
RevokedAt: now,
DNSRecordsToRemove: domain.ComputeRequiredDNSRecords(reg),
DNSRecordsToRemove: domain.ComputeRequiredDNSRecords(reg, s.tlPublicBaseURL),
}, nil
}

Expand Down Expand Up @@ -912,6 +912,6 @@ func (s *RegistrationService) Revoke(ctx context.Context, agentID string, in Rev
return &RevokeResult{
Registration: reg,
RevokedAt: now,
DNSRecordsToRemove: domain.ComputeRequiredDNSRecords(reg),
DNSRecordsToRemove: domain.ComputeRequiredDNSRecords(reg, s.tlPublicBaseURL),
}, nil
}
16 changes: 16 additions & 0 deletions internal/ra/service/registration.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ type RegistrationService struct {
outbox OutboxEnqueuer
uow port.UnitOfWork
dnsVerifier port.DNSVerifier
// tlPublicBaseURL is the externally-reachable Transparency Log URL
// used in _ans-badge DNS records (e.g. "https://tl.example.org").
tlPublicBaseURL string
// signer is the KeyManager + keyID + raID tuple used to sign
// outbox events. When nil, events are still persisted but without
// a signature — this is only valid for tests; production configs
Expand Down Expand Up @@ -203,6 +206,19 @@ func (s *RegistrationService) WithDNSVerifier(v port.DNSVerifier) *RegistrationS
return s
}

// WithTLPublicBaseURL sets the externally-reachable Transparency Log
// URL used in _ans-badge DNS TXT records. Without this, badge records
// fall back to the agent's own endpoint URL.
func (s *RegistrationService) WithTLPublicBaseURL(publicBaseURL string) *RegistrationService {
s.tlPublicBaseURL = publicBaseURL
return s
}

// TLPublicBaseURL returns the configured public TL base URL.
func (s *RegistrationService) TLPublicBaseURL() string {
return s.tlPublicBaseURL
}

// RegisterAgent implements the V2 registration flow:
// 1. Validate the request shape via domain constructors.
// 2. Check ANS name uniqueness.
Expand Down
2 changes: 1 addition & 1 deletion internal/ra/service/v1event.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ func (s *RegistrationService) buildAgentRevokedV1Event(
// sees the full record set (including per-endpoint metadata
// records). If it didn't, we'd get back an empty list and the
// revoke envelope would ship with no DNS tear-down guidance.
expected := domain.ComputeRequiredDNSRecords(reg)
expected := domain.ComputeRequiredDNSRecords(reg, s.tlPublicBaseURL)
dnsMap := make(map[string]string, len(expected))
for _, r := range expected {
dnsMap[r.Name] = r.Value
Expand Down